In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
video_data = pd.read_excel('data/EEG Video Timings.xlsx')
type_col_name = list(filter(lambda s: s.startswith('Type'), video_data.columns))[0]
generalized_sz = video_data[video_data[type_col_name] == 0]
fnames = 'data/videos/' + generalized_sz['Filename']+'.mp4'

In [3]:
generalized_sz['ID'].nunique()

30

### Transform keypoints to distances and angles

#### Calculate Angles 3D

In [4]:

FRAME_WIDTH = 960
FRAME_HEIGHT = 540
def angle_between_3d(landmarks, a_img, b_img, c_img, prefix=''):
    '''Given a row of landmarks with columns x0, y0, z0 ... x32, y32, z32; and 3
    point numbers a, b, and c, this function finds the measure of angle ABC,
    with point B at the vertex. Returns angle in radians'''

    # the reference image is 1-indexed
    a = a_img - 1
    b = b_img - 1
    c = c_img - 1

    xa = landmarks[prefix+'x'+str(a)]
    ya = landmarks[prefix+'y'+str(a)]
    za = landmarks[prefix+'z'+str(a)] * FRAME_WIDTH

    xb = landmarks[prefix+'x'+str(b)]
    yb = landmarks[prefix+'y'+str(b)]
    zb = landmarks[prefix+'z'+str(b)] * FRAME_WIDTH

    xc = landmarks[prefix+'x'+str(c)]
    yc = landmarks[prefix+'y'+str(c)]
    zc = landmarks[prefix+'z'+str(c)] * FRAME_WIDTH

    side_a = np.sqrt(np.power(xb - xc, 2) + np.power(yb - yc, 2) + np.power(zb - zc, 2))
    side_b = np.sqrt(np.power(xa - xc, 2) + np.power(ya - yc, 2) + np.power(za - zc, 2))
    side_c = np.sqrt(np.power(xb - xa, 2) + np.power(yb - ya, 2) + np.power(zb - za, 2))

    numer = np.power(side_a, 2) + np.power(side_c, 2) - np.power(side_b, 2)
    denom = 2 * side_a * side_c
    try:
        return np.arccos(numer/denom)
        # return np.abs(np.arctan((yc - yb) / (xc - xb)) - np.arctan((ya - yb) / (xa - xb)))
    except ZeroDivisionError:
        return np.nan


def distance_between_3d(landmarks, a_img, b_img, prefix=''):
    a = a_img - 1
    b = b_img - 1

    xa = landmarks[prefix+'x'+str(a)]
    ya = landmarks[prefix+'y'+str(a)]
    za = landmarks[prefix+'z'+str(a)] * FRAME_WIDTH

    xb = landmarks[prefix+'x'+str(b)]
    yb = landmarks[prefix+'y'+str(b)]
    zb = landmarks[prefix+'z'+str(b)] * FRAME_WIDTH

    return np.sqrt((xa - xb) **2 + (ya - yb) **2 + (za - zb) ** 2)

#### Calculate Angles 2D

In [5]:
import numpy as np
def angle_between(landmarks, a_img, b_img, c_img, prefix=''):
    '''Given a row of landmarks with columns x0, y0, z0 ... x32, y32, z32; and 3
    point numbers a, b, and c, this function finds the measure of angle ABC,
    with point B at the vertex. Returns angle in radians'''

    # the reference image is 1-indexed
    a = a_img - 1
    b = b_img - 1
    c = c_img - 1

    xa = landmarks[prefix+'x'+str(a)]
    ya = landmarks[prefix+'y'+str(a)]
    # za = landmarks[prefix+'z'+str(a)] * FRAME_WIDTH

    xb = landmarks[prefix+'x'+str(b)]
    yb = landmarks[prefix+'y'+str(b)]
    # zb = landmarks[prefix+'z'+str(b)] * FRAME_WIDTH

    xc = landmarks[prefix+'x'+str(c)]
    yc = landmarks[prefix+'y'+str(c)]
    # zc = landmarks[prefix+'z'+str(c)] * FRAME_WIDTH

    side_a = np.sqrt(np.power(xb - xc, 2) + np.power(yb - yc, 2))
    side_b = np.sqrt(np.power(xa - xc, 2) + np.power(ya - yc, 2))
    side_c = np.sqrt(np.power(xb - xa, 2) + np.power(yb - ya, 2))

    numer = np.power(side_a, 2) + np.power(side_c, 2) - np.power(side_b, 2)
    denom = 2 * side_a * side_c
    try:
        return np.arccos(numer/denom)
        # return np.abs(np.arctan((yc - yb) / (xc - xb)) - np.arctan((ya - yb) / (xa - xb)))
    except ZeroDivisionError:
        return np.nan


def distance_between(landmarks, a_img, b_img, prefix=''):
    a = a_img - 1
    b = b_img - 1

    xa = landmarks[prefix+'x'+str(a)]
    ya = landmarks[prefix+'y'+str(a)]
    # za = landmarks[prefix+'z'+str(a)] * FRAME_WIDTH

    xb = landmarks[prefix+'x'+str(b)]
    yb = landmarks[prefix+'y'+str(b)]
    # zb = landmarks[prefix+'z'+str(b)] * FRAME_WIDTH

    return np.sqrt((xa - xb) **2 + (ya - yb) **2)

In [6]:
angles = ['R_axilla', 'L_axilla', 'R_elbow', 'L_elbow', 'R_ulnar_wrist', 
          'L_ulnar_wrist', 'R_thumb', 'L_thumb', 'R_hip', 'L_hip', 'R_knee',
          'L_knee', 'R_neck', 'L_neck', 'R_arm', 'L_arm', 'R_forearm',
          'L_forearm', 'R_thigh', 'L_thigh', 'R_leg', 'L_leg', 
          
          'R_mouth_angle',
          'Upper_mouth_0', 'Upper_mouth_1', 
          'Upper_mouth_2', 'Upper_mouth_3', 'Upper_mouth_4', 
          'L_mouth_angle', 
          'Lower_mouth_0', 'Lower_mouth_1', 
          'Lower_mouth_2', 'Lower_mouth_3', 'Lower_mouth_4',
          
          'R_lateral_canthus', 
          'R_medial_canthus',
          'L_lateral_canthus', 
          'L_medial_canthus',

          'R_eye_height',
          'L_eye_height',
          'Mouth_height',

          # *['v'+str(i) for i in range(33)]
          ]
# add column labels for confidences at the end
conf_labels = ['conf'+str(i) for i in range(133)]
for col in conf_labels:
    angles.append(col)

def all_the_angles(df) -> pd.DataFrame:
    '''
    Given a df of pose landmarks, returns an array of important angles to
    remove location-dependency of data

    see MediaPipe docs for the pose mapping: 
    https://google.github.io/mediapipe/solutions/pose.html'''

    column_data = [
        ### POSE
        angle_between_3d(df, 9, 7, 13), # R axilla
        angle_between_3d(df, 8, 6, 12), # L axilla
        angle_between_3d(df, 7, 9, 11), # R elbow
        angle_between_3d(df, 6, 8, 10), # L elbow
        angle_between(df, 9, 11, 130), # R ulnar wrist
        angle_between(df, 8, 10, 109), # L ulnar wrist
        angle_between(df, 9, 11, 118), # R thumb
        angle_between(df, 8, 10, 97), # L thumb
        angle_between_3d(df, 7, 13, 15), # R hip
        angle_between_3d(df, 6, 12, 14), # L hip
        angle_between_3d(df, 13, 15, 17), # R knee
        angle_between_3d(df, 12, 14, 16), # L knee
        angle_between_3d(df, 1, 5, 7),   # R "neck"
        angle_between_3d(df, 1, 4, 6),   # L "neck"

        distance_between_3d(df, 7, 9),  # R arm
        distance_between_3d(df, 6, 8),  # L arm
        distance_between_3d(df, 9, 11),  # R forearm
        distance_between_3d(df, 8, 10),  # L forearm
        distance_between_3d(df, 13, 15),  # R thigh
        distance_between_3d(df, 12, 14),  # L thigh
        distance_between_3d(df, 15, 17),  # R leg
        distance_between_3d(df, 14, 16),  # L leg
        ### FACE
        angle_between(df, 83, 72, 73), # R mouth angle
        angle_between(df, 72, 73, 74),
        angle_between(df, 73, 74, 75),
        angle_between(df, 74, 75, 76),
        angle_between(df, 75, 76, 77),
        angle_between(df, 76, 77, 78),
        angle_between(df, 77, 78, 79), # L mouth angle
        angle_between(df, 78, 79, 80),
        angle_between(df, 79, 80, 81),
        angle_between(df, 80, 81, 82),
        angle_between(df, 81, 82, 83),
        angle_between(df, 82, 83, 72),

        ### EYES
        angle_between(df, 65, 60, 61),  # R lateral canthus
        angle_between(df, 62, 63, 64),  # R medial canthus
        angle_between(df, 70, 69, 68),  # L lateral canthus
        angle_between(df, 67, 66, 71),  # L medial canthus

        distance_between(df, 62, 64), # R eye height
        distance_between(df, 67, 71), # L eye height
        distance_between(df, 86, 90), # mouth height
    ]
    confs = df[conf_labels]
    column_data = np.append(np.array(column_data).transpose(), confs, axis=1) 

    new_df = pd.DataFrame(data=column_data)
    new_df.columns = angles
    return new_df

def compute_angles(df: pd.DataFrame):
    df_angles = all_the_angles(df)
    df_angles['class'] = df['class']
    if 'pt_id' in df.columns:
        df_angles['pt_id'] = df['pt_id']
    if 'vid_id' in df.columns:
        df_angles['vid_id'] = df['vid_id']
    # SKLearn's HistGradientBoostingClassifier cna handle NA values,
    # so I'd like to see how that works
    #return df_angles.drop('vid_id', axis=1)
    #return df_angles.groupby(df_angles['vid_id']).fillna(method='ffill')
    return df_angles.groupby(df_angles['vid_id']).ffill()

### Color Lookup

In [7]:
'''color_lookup = {
    6348578: 'tab:blue',
    2767430: 'tab:orange',
    5352576: 'tab:green',
    5514820: 'tab:red',
    5271491: 'tab:purple',
    5323733: 'tab:brown',
    6394294: 'tab:pink',
    5439586: 'tab:gray',
    2604950: 'tab:olive',
    5497695: 'tab:cyan',
    5447543: 'turquoise',
    5235825: 'gold',
    2940398: 'lightgreen',
    5512494: 'deeppink',
    #6381028: 'black',  #7953A100,7953A200,7953A300,7953A400,7953A501,7953A600,7953A700
    # new pts
    6338772: 'gray',
    6338772: 'gray',
    582992: 'gray', 
    5489744: 'gray', 
    5489744: 'gray'
}'''

"color_lookup = {\n    6348578: 'tab:blue',\n    2767430: 'tab:orange',\n    5352576: 'tab:green',\n    5514820: 'tab:red',\n    5271491: 'tab:purple',\n    5323733: 'tab:brown',\n    6394294: 'tab:pink',\n    5439586: 'tab:gray',\n    2604950: 'tab:olive',\n    5497695: 'tab:cyan',\n    5447543: 'turquoise',\n    5235825: 'gold',\n    2940398: 'lightgreen',\n    5512494: 'deeppink',\n    #6381028: 'black',  #7953A100,7953A200,7953A300,7953A400,7953A501,7953A600,7953A700\n    # new pts\n    6338772: 'gray',\n    6338772: 'gray',\n    582992: 'gray', \n    5489744: 'gray', \n    5489744: 'gray'\n}"

if I just change the color_lookup dictionary, I can get a test dataset to work on

### Calculate Velocity and Acceleration

In [None]:
from sklearn.metrics import confusion_matrix
from scipy.signal import welch
import numpy as np
import time

all_vid_df_times = []
for _idx, row in generalized_sz.iterrows():
    try:
        df = pd.read_csv('data/pose/' + row['Filename']+'_dbx.csv')
    except FileNotFoundError:
        print(f"File not found: {'data/pose/' + row['Filename']+'_dbx.csv'}")
        continue

    # only use the same patients that we've been using
    '''if row['ID'] not in color_lookup.keys():
        continue'''
    
    try:
        m, s = row['Video\nT0 to Start'].split(':')
    # when the Start time is listed as "-", don't use this video
    except ValueError:
        continue
    # 60 seconds per minute, 30 at the end is the fps
    start_frame = (int(m) * 60 + int(s)) * 30
    df['class'] = np.where(df.index < start_frame, 'nml', 'sz')
    df['class'] = pd.Categorical(df['class'])
    df['vid_id'] = row['Filename']
    df_angles = compute_angles(df)

    if 'vid_id' not in df_angles.columns:
        df_angles['vid_id'] = df['vid_id']
    df_angles['pt_id'] = row['ID']
    df_angles = pd.concat([df_angles, df.loc[:,'xmin':'ymax']], axis=1)
    v = df_angles.drop('pt_id', axis=1).drop('class', axis=1).groupby('vid_id').rolling(300).mean().diff(150)
    # v = df_angles.drop('class', axis=1).groupby('vid_id').rolling(10).mean().diff(30)
    v = v.rename(lambda x: 'v_'+x, axis=1)
    a = v.reset_index().drop('level_1', axis=1).groupby('vid_id').diff()
    a = a.rename(lambda x: 'a_'+x[2:], axis=1)

    v = v.reset_index(drop=True)
    a = a.reset_index(drop=True)
    df_time = df_angles.merge(v, left_index=True, right_index=True).merge(a, left_index=True, right_index=True)
    
    all_vid_df_times.append(df_time)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


File not found: data/pose/79611O00_dbx.csv


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [None]:
type(all_vid_df_times)

In [None]:
df_time = pd.concat(all_vid_df_times)

In [None]:
df_time['pt_id'].nunique()

In [None]:
df_time['vid_id'].nunique()

In [None]:
print(df_time.columns)

In [None]:
# Drop columns that start with specific prefixes - velocity and acceleration calculations were done on conf
# copy conf after velocity and acceleration calculations - TODO
columns_to_drop = [col for col in df_time.columns if col.startswith(("v_conf", "a_conf", "v_x", "v_y", "a_x", "a_y"))]
df_time = df_time.drop(columns=columns_to_drop)

In [None]:
df_time

In [None]:
# Save the DataFrame to a pickle file using pandas
#df_time.to_pickle('data/df_time_30_60_without_detection.pkl')
df_time.to_pickle('data/df_time_find_best.pkl')
