# Process Tetris Videos

Tetris videos are found in the Data folder in each user's session file.  Their names include the timestamp of their initial recording.

This Script includes two processing steps: you can pass in the participant and session ('P1', 'sess1') and it will run through each frame of the video, OCR relevant sections looking for text (i.e. 'RESUME', 'PAUSED', 'START', 'GAME OVER'), and write a metadata file that allows us to just have a simple dataframe that captures when the user is playing and when they succumb to the game.

We might develop a prior of focus state based on this game play structure.



In [7]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import re
import easyocr
import pandas as pd
from multiprocessing import Pool
from IPython.display import display, clear_output
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed

%matplotlib inline

In [10]:
class VideoAnnotator:
    def __init__(self, folder, user, sess):
        
        full_path = os.path.join(folder + '/' + user, user + '_' + sess)
        # Find video file and extract timestamp
        for file_name in os.listdir(full_path):
            if file_name.endswith(".MP4"):
                video_file = os.path.join(full_path, file_name)
                # Assuming timestamp is the number after "Final" in the filename
                timestamp_match = re.search(r"Final(\d+)", file_name)
                if timestamp_match:
                    self.video_timestamp = int(timestamp_match.group(1))*1000
                break
        self.status = user + ', ' + sess
        self.cap = cv2.VideoCapture(video_file)
        self.reader = easyocr.Reader(['en'],gpu = False)
        self.df = pd.DataFrame(columns=['timestamp', 'status', 'ocr_string'])
    
    def progress(self, count, total, status=''):
        bar_len = 60
        filled_len = int(round(bar_len * count / float(total)))
        bar = '=' * filled_len + '-' * (bar_len - filled_len)
        clear_output(wait=True)
        display('{} [{}] {}'.format(status, bar, count))
        
    def run(self, speed=1):
        
        prev_ocr = ''
        prev_ocr_gover = ''
        
        #ROIs where we see pause/resume status and game over text
        x1, y1, x2, y2 = 750, 600, 1200, 750 
        x3, y3, x4, y4 = 725, 900, 1225, 1150
        
        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_count = 0
        
        while(self.cap.isOpened()):
            ret, frame = self.cap.read()
            if not ret:
                break
        
            if frame_count % speed == 0:
            
                # Extract the ROI from the image
                roi = frame[y1:y2, x1:x2]
                roi_gameover = frame[y3:y4, x3:x4]

                #OCR it
                ocr_output = self.reader.readtext(roi)
                if len(ocr_output):
                    ocr_result = ocr_output[0][1].lower()
                else:
                    ocr_result = ''

                ocr_output_gover = self.reader.readtext(roi_gameover)
                if len(ocr_output_gover):
                    ocr_result_gover = ocr_output_gover[0][1].lower()
                else:
                    ocr_result_gover = ''

                #check if OCR has changed, indicating an update in game status
                if (prev_ocr != ocr_result or prev_ocr_gover != ocr_result_gover):

                    prev_ocr = ocr_result
                    prev_ocr_gover = ocr_result_gover

                    status = 'NOTIFICATION'

                    if (ocr_result == '' and ocr_result_gover == ''):
                        status='PLAYING'
                    elif('game' in ocr_result_gover or 'over' in ocr_result_gover):
                        status='GAME_OVER'
                    elif('start' in ocr_result or 'resume' in ocr_result):
                        status='PAUSED'
                    elif('falling' in ocr_result or 'blocks' in ocr_result or 'player' in ocr_result_gover):
                        status='BOOT_MENU'
                    elif('latest' in ocr_result or 'public' in ocr_result or 'score' in ocr_result):
                        status='SCOREBOARD'
                    elif(ocr_result != ''):
                        status='UNKNOWN'

                    new_row = {
                    'timestamp': self.cap.get(cv2.CAP_PROP_POS_MSEC) + self.video_timestamp,
                    'status': status,          # An example status
                    'ocr_string': ocr_result + ' | ' + ocr_result_gover
                    }

                    self.df = pd.concat([self.df, pd.DataFrame([new_row])], ignore_index=True)

            #uncomment for progress bar for one video file
            #self.progress(frame_count, total_frames, status='Processing video ' + self.status)
            
            frame_count += 1

        self.cap.release()
        
        return self.df

## Single File Process and Save

this will overwrite existing `video_meta.pkl` in the session folder

In [11]:
folder = '/Volumes/Secondary/PhDStudy_Results'
user = 9
session = 1

va = VideoAnnotator(folder, 'P' + str(user), 'sess' + str(session))
video_metadata = va.run(speed=8)
video_metadata.to_pickle(folder + '/P' + str(user) + '/P' + str(user) + '_sess' + str(session) + '/video_meta.pkl')

Using CPU. Note: This module is much faster with a GPU.


AttributeError: 'DataFrame' object has no attribute 'append'

## Process All Videos and Save

this will *not* overwrite existing `video_meta.pkl` in the session folder.  If one exists and is incomplete, you must delete it or it will skip it.  Also skips over nonexistant participants/folders.

In [9]:
def process_video(args):
    folder, user, session = args
    try:
        print(f'Starting P{user} Session #{session}')
        va = VideoAnnotator(folder, 'P' + str(user), 'sess' + str(session))
        video_metadata = va.run(speed=8)
        video_metadata.to_pickle(os.path.join(folder, f'P{user}', f'P{user}_sess{session}', 'video_meta.pkl'))
        return f"Completed user {user}, session {session}"
    except Exception as e:
        return f"Error processing video for user {user} session {session}: {e}"

# Define your folder and list of users and sessions
folder = '/Volumes/Secondary/PhDStudy_Results'
users = list(range(1, 26))  # Users 1 to 25
sessions = [1, 2]  # Sessions 1 and 2

# Create a list of all combinations of users and sessions
tasks = [(folder, user, session) for user in users for session in sessions]

# Create a multiprocessing pool and process all videos in parallel
with ProcessPoolExecutor(max_workers=4) as executor:  # Change the number 4 to control how many videos are processed at once
    futures = {executor.submit(process_video, task): task for task in tasks}
    
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing videos"):
        print(future.result())

Processing videos:   0%|                                                                                                                 | 0/50 [00:00<?, ?it/s]Process SpawnProcess-1:
Traceback (most recent call last):
  File "/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/usr/local/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'p

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

In [12]:
pd.__version__

'2.0.1'

## Read DF Back

In [6]:
folder = 'Data'
user = 15
session = 2

metadata_read = pd.read_pickle(folder + '/P' + str(user) + '/P' + str(user) + '_sess' + str(session) + '/video_meta.pkl')
metadata_read.head(50)

Unnamed: 0,timestamp,status,ocr_string
0,1682190516410.0,PAUSED,resume |
1,1682191415565.0,PAUSED,resume | 1
2,1682191415698.3333,PAUSED,resume | l
3,1682191415831.6667,PAUSED,resume |
4,1682191415965.0,UNKNOWN,4ong 43 ou cam |
5,1682191416098.3333,UNKNOWN,as you can: |
6,1682191416231.6667,UNKNOWN,2 | 00
7,1682191416366.6667,UNKNOWN,ony 4> 9ou cam |
8,1682191416500.0,PAUSED,resume |
9,1682191417833.3333,PLAYING,|
