Screen Time Post Processing
---

The purpose of this notebook is to select rows from the eye tracking data corresponding to times during which the game matrix screen were displayed. 

This is carried out in the following phases:
1. Screen Time Data Reshaping
    * In this phase the screen timing file will be loaded and reshaped to facilitate filtering of the eye tracking data in phase 2.
2. Eye Tracker Data Filtration
    * In this phase the eye tracking data file will be loaded, reformated, and finally filtered.

In [69]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series, ExcelFile, ExcelWriter
import datetime
import time

### Phase 1: Load Screen Timing for Subject 1 and Subject 2

In [115]:
phase_0_rows = 3*2
phase_1_rows = 12*2
phase_2_rows = 2*2
phase_3_rows = 11*2

total_rows = phase_0_rows + phase_1_rows + phase_2_rows + phase_3_rows
total_rows

56

In [116]:
subject_1_timing = pd.read_csv('session_data/session2/timing_subject_1.csv', skipinitialspace=True, parse_dates=['time'])
assert len(subject_1_timing) == total_rows

In [117]:
subject_2_timing = pd.read_csv('session_data/session2/timing_subject_2.csv', skipinitialspace=True, parse_dates=['time'])
assert len(subject_2_timing) == total_rows

In [118]:
screen_timing = subject_1_timing.append(subject_2_timing)
screen_timing = screen_timing.reset_index(drop=True)

In [119]:
screen_timing.head()

Unnamed: 0,subject,state,phase,time
0,1,start,0,2018-01-16 01:24:01.051059
1,1,stop,0,2018-01-16 01:24:13.684080
2,1,start,0,2018-01-16 01:24:18.646309
3,1,stop,0,2018-01-16 01:24:30.906896
4,1,start,0,2018-01-16 01:24:34.730275


In [120]:
screen_timing.time = screen_timing.time.dt.tz_localize('utc')
screen_timing.time = screen_timing.time.dt.tz_convert('America/New_York')
screen_timing.head()

Unnamed: 0,subject,state,phase,time
0,1,start,0,2018-01-15 20:24:01.051059-05:00
1,1,stop,0,2018-01-15 20:24:13.684080-05:00
2,1,start,0,2018-01-15 20:24:18.646309-05:00
3,1,stop,0,2018-01-15 20:24:30.906896-05:00
4,1,start,0,2018-01-15 20:24:34.730275-05:00


In [121]:
num_screens = len(screen_timing)
num_screens

112

In [122]:
start_times = screen_timing.loc[[i for i in range(0, num_screens, 2)], 'time'].reset_index(drop=True)
stop_times = screen_timing.loc[[i for i in range(1, num_screens, 2)], 'time'].reset_index(drop=True)

In [123]:
screen_timing.drop('time', axis=1, inplace=True)
screen_timing.drop('state', axis=1, inplace=True)

In [124]:
screen_timing = screen_timing.loc[[i for i in range(0, num_screens, 2)]]
screen_timing = screen_timing.reset_index(drop=True)

In [125]:
screen_timing['start'] = start_times
screen_timing['stop'] = stop_times
len(screen_timing)

56

In [126]:
screen_timing.dtypes

subject                               int64
phase                                 int64
start      datetime64[ns, America/New_York]
stop       datetime64[ns, America/New_York]
dtype: object

In [127]:
screen_timing.head()

Unnamed: 0,subject,phase,start,stop
0,1,0,2018-01-15 20:24:01.051059-05:00,2018-01-15 20:24:13.684080-05:00
1,1,0,2018-01-15 20:24:18.646309-05:00,2018-01-15 20:24:30.906896-05:00
2,1,0,2018-01-15 20:24:34.730275-05:00,2018-01-15 20:24:47.415498-05:00
3,1,1,2018-01-15 20:25:24.747986-05:00,2018-01-15 20:25:37.706615-05:00
4,1,1,2018-01-15 20:25:48.365327-05:00,2018-01-15 20:25:59.978396-05:00


## Phase 1: Load Game Pair Order

In [13]:
pair_order = pd.read_csv('session_data/session2/PairOrder.xls', sep='\t')
pair_order

Unnamed: 0,GamePairOrder,Period,SubjectID,Phase,Order,GamePairID
0,GamePairOrder,1,1,2,1,1
1,GamePairOrder,1,1,3,2,4
2,GamePairOrder,1,1,3,3,2
3,GamePairOrder,1,1,3,4,3
4,GamePairOrder,1,1,3,5,5
5,GamePairOrder,1,1,3,6,9
6,GamePairOrder,1,1,3,7,8
7,GamePairOrder,1,1,3,8,7
8,GamePairOrder,1,1,3,9,10
9,GamePairOrder,1,1,3,10,11


#### Merge the Game Pair Order and Screen Timing

In [825]:
screen_timing['pair_id'] = pair_order
screen_timing.head()

Unnamed: 0,subject,action,screen,timestamp,pair_id
0,1,start,1,1513359000.0,1
1,1,stop,1,1513359000.0,1
2,1,start,1,1513359000.0,2
3,1,stop,1,1513359000.0,2
4,1,start,1,1513359000.0,3


#### Convert Timestamp Column Type to DateTime 

Note: Still timezone unaware

In [826]:
screen_timing.timestamp = pd.to_datetime(screen_timing.timestamp, unit='s')
screen_timing.dtypes

subject               int64
action               object
screen                int64
timestamp    datetime64[ns]
pair_id               int64
dtype: object

#### Localize Timestamp Column to America/New_York

In [827]:
screen_timing.timestamp = screen_timing.timestamp.dt.tz_localize('utc')
screen_timing.timestamp = screen_timing.timestamp.dt.tz_convert('America/New_York')
screen_timing.head()

Unnamed: 0,subject,action,screen,timestamp,pair_id
0,1,start,1,2017-12-15 12:26:58.614861-05:00,1
1,1,stop,1,2017-12-15 12:27:10.834945-05:00,1
2,1,start,1,2017-12-15 12:27:21.726729-05:00,2
3,1,stop,1,2017-12-15 12:27:33.899904-05:00,2
4,1,start,1,2017-12-15 12:27:44.057208-05:00,3


In [828]:
screen_timing.dtypes

subject                                 int64
action                                 object
screen                                  int64
timestamp    datetime64[ns, America/New_York]
pair_id                                 int64
dtype: object

#### Extract Screen Start & Stop Times

In [829]:
num_screens = len(screen_timing)//2
start_times = screen_timing.loc[[i*2 for i in range(num_screens)], 'timestamp'].reset_index(drop=True)
stop_times = screen_timing.loc[[i*2+1 for i in range(num_screens)], 'timestamp'].reset_index(drop=True)

#### Drop the Timestamp and Action from the Screen Timing Data Frame

In [830]:
screen_timing.drop('timestamp', axis=1, inplace=True)
screen_timing.drop('action', axis=1, inplace=True)

#### Remove Duplicate Rows from the Screen Timing DF

In [831]:
screen_timing = screen_timing.loc[[i*2 for i in range(num_screens)]]
screen_timing = screen_timing.reset_index(drop=True)

#### Add the Start and Stop Time Columns to the Screen Timing DF

In [832]:
screen_timing['start'] = start_times
screen_timing['stop'] = stop_times
screen_timing.head()

Unnamed: 0,subject,screen,pair_id,start,stop
0,1,1,1,2017-12-15 12:26:58.614861-05:00,2017-12-15 12:27:10.834945-05:00
1,1,1,2,2017-12-15 12:27:21.726729-05:00,2017-12-15 12:27:33.899904-05:00
2,1,1,3,2017-12-15 12:27:44.057208-05:00,2017-12-15 12:27:56.136652-05:00
3,1,1,4,2017-12-15 12:28:07.231554-05:00,2017-12-15 12:28:19.498497-05:00
4,1,1,5,2017-12-15 12:28:27.905633-05:00,2017-12-15 12:28:40.797662-05:00


In [833]:
screen_timing.dtypes

subject                               int64
screen                                int64
pair_id                               int64
start      datetime64[ns, America/New_York]
stop       datetime64[ns, America/New_York]
dtype: object

## Phase 2: Eye Tracker Data Filtration

#### Load the Eye Tracking Data File

* Note: TIME is the time elapsed in seconds since the last system initalization. Since the camera frame rate is 60 Hz (i.e. 60 cycles per second) the time should be in increments of 0.0167.
* Todo: Confirm that time is not reset after calibration.

In [834]:
gaze = pd.read_csv('session_data/subject_1_all_gaze.csv')
gaze.head()

Unnamed: 0,MEDIA_ID,MEDIA_NAME,CNT,TIME(2017/12/15 12:26:37.409),TIMETICK(f=3328125),FPOGX,FPOGY,FPOGS,FPOGD,FPOGID,...,RPCX,RPCY,RPD,RPS,RPV,BKID,BKDUR,BKPMIN,AOI,Unnamed: 32
0,0,NewMedia0,0,0.0,2773055138,0.21463,0.28561,0.0,0.0,1,...,0.76706,0.60058,15.80494,0.99245,1,0,0.0,12,,
1,0,NewMedia0,1,0.01642,2773109769,0.21458,0.28474,0.0,0.01642,1,...,0.76705,0.60008,15.80904,0.98184,1,0,0.0,12,,
2,0,NewMedia0,2,0.03235,2773162829,0.21569,0.28609,0.0,0.03235,1,...,0.7671,0.59952,15.44995,0.97123,1,0,0.0,12,,
3,0,NewMedia0,3,0.04883,2773217551,0.21602,0.28719,0.0,0.04883,1,...,0.76737,0.59973,15.2754,0.96063,1,0,0.0,12,,
4,0,NewMedia0,4,0.06531,2773272481,0.21605,0.28745,0.0,0.06531,1,...,0.76763,0.60002,15.75126,0.99752,1,0,0.0,12,,


#### Extract the Initialization Time from the Column Name

In [835]:
time_column_name = list(gaze)[3]
time_column_name

'TIME(2017/12/15 12:26:37.409)'

#### Rename Time Column

In [836]:
gaze.rename(columns={time_column_name: 'Time'}, inplace=True)
gaze.head()

Unnamed: 0,MEDIA_ID,MEDIA_NAME,CNT,Time,TIMETICK(f=3328125),FPOGX,FPOGY,FPOGS,FPOGD,FPOGID,...,RPCX,RPCY,RPD,RPS,RPV,BKID,BKDUR,BKPMIN,AOI,Unnamed: 32
0,0,NewMedia0,0,0.0,2773055138,0.21463,0.28561,0.0,0.0,1,...,0.76706,0.60058,15.80494,0.99245,1,0,0.0,12,,
1,0,NewMedia0,1,0.01642,2773109769,0.21458,0.28474,0.0,0.01642,1,...,0.76705,0.60008,15.80904,0.98184,1,0,0.0,12,,
2,0,NewMedia0,2,0.03235,2773162829,0.21569,0.28609,0.0,0.03235,1,...,0.7671,0.59952,15.44995,0.97123,1,0,0.0,12,,
3,0,NewMedia0,3,0.04883,2773217551,0.21602,0.28719,0.0,0.04883,1,...,0.76737,0.59973,15.2754,0.96063,1,0,0.0,12,,
4,0,NewMedia0,4,0.06531,2773272481,0.21605,0.28745,0.0,0.06531,1,...,0.76763,0.60002,15.75126,0.99752,1,0,0.0,12,,


#### Convert the Start Time to a Datetime Object

In [837]:
date_time_text = time_column_name[5:-1]
date = datetime.datetime.strptime(date_time_text, "%Y/%m/%d %H:%M:%S.%f")
date

datetime.datetime(2017, 12, 15, 12, 26, 37, 409000)

#### Create Series from Start Time

In [838]:
time_col = pd.Series(np.repeat(date, len(gaze)))

#### Convert Time in Seconds to Microseconds

In [839]:
gaze.Time = gaze.Time*10**6
gaze.Time = time_micro.astype(int)
gaze.head()

Unnamed: 0,MEDIA_ID,MEDIA_NAME,CNT,Time,TIMETICK(f=3328125),FPOGX,FPOGY,FPOGS,FPOGD,FPOGID,...,RPCX,RPCY,RPD,RPS,RPV,BKID,BKDUR,BKPMIN,AOI,Unnamed: 32
0,0,NewMedia0,0,0,2773055138,0.21463,0.28561,0.0,0.0,1,...,0.76706,0.60058,15.80494,0.99245,1,0,0.0,12,,
1,0,NewMedia0,1,16420,2773109769,0.21458,0.28474,0.0,0.01642,1,...,0.76705,0.60008,15.80904,0.98184,1,0,0.0,12,,
2,0,NewMedia0,2,32349,2773162829,0.21569,0.28609,0.0,0.03235,1,...,0.7671,0.59952,15.44995,0.97123,1,0,0.0,12,,
3,0,NewMedia0,3,48830,2773217551,0.21602,0.28719,0.0,0.04883,1,...,0.76737,0.59973,15.2754,0.96063,1,0,0.0,12,,
4,0,NewMedia0,4,65310,2773272481,0.21605,0.28745,0.0,0.06531,1,...,0.76763,0.60002,15.75126,0.99752,1,0,0.0,12,,


#### Add Start Time to Time Elapsed Since Observation

In [840]:
gaze.Time = pd.to_timedelta(gaze.Time, unit='us') + time_col

In [841]:
gaze.head()

Unnamed: 0,MEDIA_ID,MEDIA_NAME,CNT,Time,TIMETICK(f=3328125),FPOGX,FPOGY,FPOGS,FPOGD,FPOGID,...,RPCX,RPCY,RPD,RPS,RPV,BKID,BKDUR,BKPMIN,AOI,Unnamed: 32
0,0,NewMedia0,0,2017-12-15 12:26:37.409000,2773055138,0.21463,0.28561,0.0,0.0,1,...,0.76706,0.60058,15.80494,0.99245,1,0,0.0,12,,
1,0,NewMedia0,1,2017-12-15 12:26:37.425420,2773109769,0.21458,0.28474,0.0,0.01642,1,...,0.76705,0.60008,15.80904,0.98184,1,0,0.0,12,,
2,0,NewMedia0,2,2017-12-15 12:26:37.441349,2773162829,0.21569,0.28609,0.0,0.03235,1,...,0.7671,0.59952,15.44995,0.97123,1,0,0.0,12,,
3,0,NewMedia0,3,2017-12-15 12:26:37.457830,2773217551,0.21602,0.28719,0.0,0.04883,1,...,0.76737,0.59973,15.2754,0.96063,1,0,0.0,12,,
4,0,NewMedia0,4,2017-12-15 12:26:37.474310,2773272481,0.21605,0.28745,0.0,0.06531,1,...,0.76763,0.60002,15.75126,0.99752,1,0,0.0,12,,


In [842]:
gaze.dtypes

MEDIA_ID                        int64
MEDIA_NAME                     object
CNT                             int64
Time                   datetime64[ns]
TIMETICK(f=3328125)             int64
FPOGX                         float64
FPOGY                         float64
FPOGS                         float64
FPOGD                         float64
FPOGID                          int64
FPOGV                           int64
BPOGX                         float64
BPOGY                         float64
BPOGV                           int64
CX                            float64
CY                            float64
CS                              int64
USER                          float64
LPCX                          float64
LPCY                          float64
LPD                           float64
LPS                           float64
LPV                             int64
RPCX                          float64
RPCY                          float64
RPD                           float64
RPS         

In [843]:
gaze.shape

(52218, 33)

#### Localize Time to America/New_York

In [844]:
gaze.Time = gaze.Time.dt.tz_localize('America/New_York')

#### Select Rows Corresponding to Times During Which a Game Matrix Screen Was Displayed

In [846]:
filtered_data = pd.DataFrame()
for index,row in screen_timing.iterrows():
    filtered_data = pd.concat([filtered_data, gaze.loc[(gaze.Time >= row.start) & (gaze.Time <= row.stop)]])

In [847]:
filtered_data.head()

Unnamed: 0,MEDIA_ID,MEDIA_NAME,CNT,Time,TIMETICK(f=3328125),FPOGX,FPOGY,FPOGS,FPOGD,FPOGID,...,RPCX,RPCY,RPD,RPS,RPV,BKID,BKDUR,BKPMIN,AOI,Unnamed: 32
1291,0,NewMedia0,1291,2017-12-15 12:26:58.618720-05:00,2843643810,0.92092,1.01077,19.41907,1.74127,48,...,0.81645,0.62788,16.22904,0.97162,1,0,0.0,14,,
1292,0,NewMedia0,1292,2017-12-15 12:26:58.635140-05:00,2843698278,0.92845,1.00923,21.17682,0.04932,49,...,0.81668,0.62835,15.98209,0.99321,1,0,0.0,14,,
1293,0,NewMedia0,1293,2017-12-15 12:26:58.651430-05:00,2843752565,0.93464,1.00519,21.17682,0.06561,49,...,0.81677,0.62782,16.00013,1.0148,1,0,0.0,14,,
1294,0,NewMedia0,1294,2017-12-15 12:26:58.667910-05:00,2843807485,0.9402,1.00162,21.17682,0.08209,49,...,0.81677,0.62793,15.89787,1.03639,1,0,0.0,14,,
1295,0,NewMedia0,1295,2017-12-15 12:26:58.684700-05:00,2843863338,0.94283,0.99631,21.17682,0.09888,49,...,0.81663,0.62798,15.92943,1.03639,1,0,0.0,14,,


In [848]:
filtered_data.shape

(24042, 33)

#### Number of Rows Removed

In [852]:
gaze.shape[0] - filtered_data.shape[0]

28176