In [None]:
import numpy as np
import pandas as pd
import scipy.stats

pd.options.mode.chained_assignment = None
from matplotlib import pyplot as plt, rcParams
# import cv2
import seaborn as sns

sns.set(style="white", context="paper")
from cycler import cycler
import os, sys
import glob
from datetime import datetime, timedelta
from itertools import combinations, product
import base64
from PIL import Image
from io import BytesIO as _BytesIO
import requests
import json
import pickle
from datetime import datetime
from IPython.display import display, Markdown, Latex
from sklearn.metrics import *
import collections
from copy import deepcopy
import traceback
from sympy import Point, Polygon
from decorators import *
from smartprint import smartprint as sprint
from scipy.spatial.distance import cdist
from sklearn.cluster import DBSCAN
# import plotly
# from pandas_profiling import ProfileReport

pd.options.display.max_columns = None
def printm(s): return display(Markdown(s))
    
SERVER_CACHE_DIR = '/mnt/ci-nas-cache/edulyzeV2/cache_compute_4/fixed_face'
os.makedirs(SERVER_CACHE_DIR,exist_ok=True)

track_analysis_meta_cache = f'{SERVER_CACHE_DIR}/analysis_tracking/meta_info'
base_dir = '/mnt/ci-nas-cache/edulyzeV2/pose_face_gaze_emb_fixed_face/'

track_analysis_session_data = f'{SERVER_CACHE_DIR}/analysis_tracking/session_tracking_info'
os.makedirs(track_analysis_session_data,exist_ok=True)

postprocessed_id_map_data_dir = f'{SERVER_CACHE_DIR}/analysis_tracking/processed_id_maps'
os.makedirs(postprocessed_id_map_data_dir, exist_ok=True)

emb_analysis_session_data = f'{SERVER_CACHE_DIR}/analysis_emb/session_emb_info_new'
os.makedirs(emb_analysis_session_data,exist_ok=True)

embmatched_id_raw_data_dir = f'{SERVER_CACHE_DIR}/analysis_emb/embmatched_id_raw'
os.makedirs(embmatched_id_raw_data_dir,exist_ok=True)

embmatched_id_map_data_dir = f'{SERVER_CACHE_DIR}/analysis_tracking/embmatched_id_maps'
os.makedirs(embmatched_id_map_data_dir, exist_ok=True)

cross_session_input_data_dir = f'{SERVER_CACHE_DIR}/analysis_emb/cross_session_input'
os.makedirs(cross_session_input_data_dir, exist_ok=True)

id_viz_cache_root = f'{SERVER_CACHE_DIR}/analysis_emb/session_matching_info'
os.makedirs(id_viz_cache_root, exist_ok=True)


## Get frame file data for all sessions

In [None]:

frame_file_data = {}
for course_idx, course_dir in enumerate(glob.glob(f"{base_dir}/*")):
    course_name = course_dir.split("/")[-1]
    course_cache_file = f"{track_analysis_meta_cache}/{course_name}"
    if os.path.exists(course_cache_file):
        frame_file_data[course_name] = pickle.load(open(course_cache_file,"rb"))
        continue
    frame_file_data[course_name]={}
        
    for session_idx, session_dir in enumerate(glob.glob(f"{course_dir}/*")):
        session_name = session_dir.split("/")[-1]
        frame_file_data[course_name][session_name] = {}
        frame_files = glob.glob(f"{session_dir}/*")
        frame_file_names = [xr.split("/")[-1] for xr in frame_files]
        if 'end.pb' in frame_file_names:
            frame_file_data[course_name][session_name]['is_completed']=True
        else:
            frame_file_data[course_name][session_name]['is_completed']=False            
        frame_ids = [int(xr.split(".")[0]) for xr in frame_file_names if not (xr=='end.pb')]
        frame_file_data[course_name][session_name]['frame_ids'] = sorted(frame_ids)
        frame_file_data[course_name][session_name]['dir_location'] = session_dir
        print(f"Got metadata for course: {course_idx}-{course_name}, session:{session_idx}-{session_name}")
    pickle.dump(frame_file_data[course_name],open(course_cache_file,"wb")) 
        
frame_file_data.keys()


In [None]:
session_filter_list = ['classinsight-cmu_05681A_ghc_4301_201905011630',
 'classinsight-cmu_05681A_ghc_4301_201904171630',
 'classinsight-cmu_05681A_ghc_4301_201902201630',
 'classinsight-cmu_05681A_ghc_4301_201904101630',
 'classinsight-cmu_05681A_ghc_4301_201901231630',
 'classinsight-cmu_05418A_ghc_4102_201902251200',
 'classinsight-cmu_05418A_ghc_4102_201904081200',
 'classinsight-cmu_05418A_ghc_4102_201905011200',
 'classinsight-cmu_05418A_ghc_4102_201904291200',
 'classinsight-cmu_05418A_ghc_4102_201904011200',
 'classinsight-cmu_05748A_ghc_4101_201902141630',
 'classinsight-cmu_05748A_ghc_4101_201904021630',
 'classinsight-cmu_05748A_ghc_4101_201902051630',
 'classinsight-cmu_05748A_ghc_4101_201902281630',
 'classinsight-cmu_05748A_ghc_4101_201903071630',
 'classinsight-cmu_21127J_ghc_4102_201904230930',
 'classinsight-cmu_21127J_ghc_4102_201903260930',
 'classinsight-cmu_21127J_ghc_4102_201904160930',
 'classinsight-cmu_21127J_ghc_4102_201904300930',
 'classinsight-cmu_21127J_ghc_4102_201903190930',
 'classinsight-cmu_05410A_ghc_4301_201904151500',
 'classinsight-cmu_05410A_ghc_4301_201902251500',
 'classinsight-cmu_05410A_ghc_4301_201904081500',
 'classinsight-cmu_05410A_ghc_4301_201904221500',
 'classinsight-cmu_05410A_ghc_4301_201902181500',
                       
 'classinsight-cmu_17214B_ph_a21_201902271030',
 'classinsight-cmu_17214B_ph_a21_201903061030',
 'classinsight-cmu_17214B_ph_a21_201904031030',
 'classinsight-cmu_17214B_ph_a21_201904101030',
 'classinsight-cmu_17214B_ph_a21_201904241030',
 'classinsight-cmu_17214C_ph_225b_201904031130',
 'classinsight-cmu_17214C_ph_225b_201904101130',
 'classinsight-cmu_17214C_ph_225b_201904171130',
 'classinsight-cmu_17214C_ph_225b_201904241130',
 'classinsight-cmu_17214C_ph_225b_201905011130',
 'classinsight-cmu_05410B_ghc_4211_201902111500',
 'classinsight-cmu_05410B_ghc_4211_201903181500',
 'classinsight-cmu_05410B_ghc_4211_201904081500',
 'classinsight-cmu_05410B_ghc_4211_201904151500',
 'classinsight-cmu_05410B_ghc_4211_201904221500',
 'classinsight-cmu_05410B_ghc_4211_201901281500'
]


In [None]:
course='05748A'
sessions = [xr for xr in session_filter_list if (course in xr)]
sessions


In [None]:
# get input for all sessions
course_input_dict = {}
for session in sessions:
    session_input_file = f'{cross_session_input_data_dir}/{session}-front.pb'
    course_input_dict[session] = pickle.load(open(session_input_file,"rb"))
course_input_dict.keys()    


In [None]:
for session in sessions:
    sprint(session, course_input_dict[session].keys())


In [None]:
course_input_dict[session][0].keys()


In [None]:
sorted(sessions)


# Experiments with networkX

In [None]:
# get top 2 matches across all session pairs
MATCH_THRESHOLD=0.4
node_set = set()
session_matches = {}
for (sessionA, sessionB) in product(sorted(sessions)[:2], sorted(sessions)[:2]):
    sessionA_key, sessionB_key = sessionA.split("_")[-1][4:8], sessionB.split("_")[-1][4:8]
    if not (sessionA==sessionB):
        # if sessionA not in session_matches:
        # match session A and session B based on gaze clustering
        match_scores = {}
        for idA,idB in product(course_input_dict[sessionA].keys(), course_input_dict[sessionB].keys()):
            gaze_embA, gaze_embB = course_input_dict[sessionA][idA]['gaze_emb'], course_input_dict[sessionB][idB]['gaze_emb']                
            if idA not in match_scores:
                match_scores[idA] = {}
            if gaze_embA is None or gaze_embB is None:
                match_scores[idA][idB] = np.inf
            else:
                match_distance = cdist(gaze_embA.reshape(1,-1), gaze_embB.reshape(1,-1))[0][0]
                match_scores[idA][idB] = match_distance
        df_match = pd.DataFrame(match_scores) 
        final_matches = []
        for col in df_match.columns:
            sessionB_matches = deepcopy(df_match[col]).sort_values().head(3).index.values
            for match_id in sessionB_matches:
                if match_scores[col][match_id]<MATCH_THRESHOLD:
                    final_matches.append((f'{sessionA_key}_{col}', f'{sessionB_key}_{match_id}', match_scores[col][match_id]))
                    node_set.add((sessionA_key, f'{sessionA_key}_{col}'))
                    node_set.add((sessionB_key,f'{sessionB_key}_{match_id}'))
        if sessionA_key not in session_matches:
            session_matches[sessionA_key]={}
        session_matches[sessionA_key][sessionB_key] = final_matches
                
        


In [None]:
node_list = list(node_set)
color_map=  {
    '0205':"red",
    '0214':"blue",
    '0228':"green",
    '0307':"black",
    '0402':'brown'
}
node_color = [color_map[xr[0]] for xr in node_list]
node_name = [xr[1] for xr in node_list]
node_ids = np.arange(len(node_name))

session_pos_val = ['0205', '0214', '0228', '0307', '0402']
edge_labels = np.concatenate([session_matches[xr][yr] for xr,yr in product(session_matches.keys(), session_matches.keys()) if not (xr==yr)])
edge_weights = [ round(1-float(xr[2]),2) for xr in edge_labels]
weighted_edge_ids = [(node_name.index(xr[0]), node_name.index(xr[1]), round(1-float(xr[2]),2)) for xr in edge_labels]
edge_ids = [(node_name.index(xr[0]), node_name.index(xr[1])) for xr in edge_labels]

len(edge_labels)


In [None]:
node_pos = {node_ids[nr]:(session_pos_val.index(node_list[nr][0]), int(node_name[nr].split("_")[1])*2) for nr in range(len(node_ids))}
# node_pos


In [None]:
import networkx as nx
plt.figure(figsize=(40,40))
G = nx.DiGraph()
G.add_nodes_from(node_ids)
G.add_weighted_edges_from(weighted_edge_ids)
labels = {node_ids[nr]:node_name[nr] for nr in range(len(node_ids))}


nx.draw_networkx(G, pos = node_pos, labels = labels, arrows = True,
                 node_shape = "s", node_size=1000,
                 node_color = node_color,
                 # edgelist=edge_ids, 
                 # edge_color=edge_weights,
                 edgecolors = "gray")     #edges of the box of node
nx.draw_networkx_edge_labels(G, pos = node_pos,
                             edge_labels={edge_ids[nr]:edge_weights[nr] for nr in range(len(edge_weights))},
                             font_color='black')

plt.show()


In [None]:
session_matches.keys()


In [None]:
# get a filtered graph using only connections from one single edge
def drawnodegraph(graph, nodename, info=False,weightbar=0):
  # graph will be your networkx graph
  # nodename will be the node that you want to focus on
  # the default value for weightbar is 0, if increase the bar, rare relationship will be removed. Assuming no negative weights
  temp = graph.copy(as_view=False) # make a temporary graph to avoid losing original ones
  temp.remove_edges_from((e for e, w in nx.get_edge_attributes(temp,'weight').items() if w <= weightbar)) # remove rare relationhsip if weightbar is not 0
  nodelist = list(temp.neighbors(n=nodename)) #generate the nodes that have relationship with our target node
  nodelist.append(nodename) # add the target to the list
  print(nodelist)
  Sub = temp.subgraph(nodelist) # draw subgraph
  
  edges,weights = zip(*nx.get_edge_attributes(Sub,'weight').items())
  # pos=nx.spring_layout(Sub,k=0.7,seed=42)
  node_map = {nodename:7000} 
  nodesize=[node_map.get(node, 3500) for node in Sub.nodes()] # enlarge our target node
  # val_map = {nodename:0.5714285714285714}
  # nodecolor = [val_map.get(node, 0.25) for node in Sub.nodes()] # change the color of our target node
  width = [w*2 for w in weights] # change the edge's width based on the weights of the edges
  # nodecolor = 
  print({xr: node_pos[xr] for xr in nodelist})
  nx.draw_networkx(Sub,
                   pos = {xr: node_pos[xr] for xr in nodelist},
                   # cmap=plt.get_cmap('viridis'),
                   with_labels=True,
                   labels = {xr:node_name[xr] for xr in nodelist}, 
                   node_color=[node_color[xr] for xr in Sub.nodes],
                   node_size=nodesize,
                   # edgelist=edges, 
                   edge_color="black",
                   #edge_cmap=plt.cm.Blues_r,
                   style="solid",
                   font_color='white',
                   font_size=12,
                   width =width,
                  connectionstyle="arc3,rad=0.05")
  plt.subplots_adjust(left=2, bottom=3.2, right=6, top=6)

  # if info:
  #   print("----------------------------------------")
  #   print("Density:",nx.classes.function.density(Sub))
  #   print("The information of the graph:",nx.info(Sub))
  #   print("----------------------------------------")

  return plt.show()


In [None]:
drawnodegraph(G, node_ids[5])
drawnodegraph(G, node_ids[16])
drawnodegraph(G, node_ids[22])
drawnodegraph(G, node_ids[27])


In [None]:
drawnodegraph(G, node_ids[21])


# Write algorithm for densest subnet of given size 5 in this case.

In [None]:
# create a graph with networkx
collect all nodes


In [None]:
session_matches['0205']['0214']


In [None]:
session_matches['0228']['0214']


In [None]:
session_matches['0214']['0228']


# Method 1: Sort in face sizes, select best of three based on least face_size differences.

In [None]:
MATCH_THRESHOLD=0.3
final_matches = []
for (sessionA, sessionB) in product(sorted(sessions), sorted(sessions)):
    sessionA_key, sessionB_key = sessionA.split("_")[-1][4:8], sessionB.split("_")[-1][4:8]
    if not (sessionA==sessionB):
        # if sessionA not in session_matches:
        # match session A and session B based on gaze clustering
        match_scores_gaze = {}
        match_scores_clu  = {}
        for idA,idB in product(course_input_dict[sessionA].keys(), course_input_dict[sessionB].keys()):
            
            gaze_embA, gaze_embB = course_input_dict[sessionA][idA]['gaze_emb'], course_input_dict[sessionB][idB]['gaze_emb']                
            clu_embA, clu_embB = course_input_dict[sessionA][idA]['cluster_emb'], course_input_dict[sessionB][idB]['cluster_emb']                
            
            if idA not in match_scores_gaze:
                match_scores_gaze[idA] = {}
            if idA not in match_scores_clu:
                match_scores_clu[idA] = {}
                
            if gaze_embA is None or gaze_embB is None:
                match_scores_gaze[idA][idB] = np.inf
            else:
                match_distance = cdist(gaze_embA.reshape(1,-1), gaze_embB.reshape(1,-1))[0][0]
                match_scores_gaze[idA][idB] = match_distance

            if clu_embA is None or clu_embB is None:
                match_scores_clu[idA][idB] = np.inf
            else:
                match_distance = cdist(clu_embA.reshape(1,-1), clu_embB.reshape(1,-1))[0][0]
                match_scores_clu[idA][idB] = match_distance
        
        df_match_gaze = pd.DataFrame(match_scores_gaze) 
        df_match_clu = pd.DataFrame(match_scores_clu) 
        gaze_cols = df_match_gaze.columns.values.tolist()
        clu_cols = df_match_clu.columns.values.tolist()
        all_cols = np.unique(gaze_cols+clu_cols)
        for col in all_cols:
            if col not in df_match_clu.columns:
                df_match_clu[col] = np.inf
            if col not in df_match_gaze.columns:
                df_match_clu[col] = np.inf
            sessionB_matches = deepcopy(df_match_clu[col]).sort_values().head(3).index.values.tolist() + \
                                deepcopy(df_match_gaze[col]).sort_values().head(3).index.values.tolist()
            sessionB_matches = np.unique(sessionB_matches)
            for match_id in sessionB_matches:
                if (match_scores_clu[col][match_id]<MATCH_THRESHOLD) & (match_scores_gaze[col][match_id]<MATCH_THRESHOLD):
                    col_face_area = course_input_dict[sessionA][col]['face_width_med'] * course_input_dict[sessionA][col]['face_height_med']
                    match_face_area = course_input_dict[sessionB][match_id]['face_width_med'] * course_input_dict[sessionB][match_id]['face_height_med']
                    rel_diff = np.abs(col_face_area-match_face_area)*100/min(col_face_area,match_face_area)
                    final_matches.append((sessionA_key, sessionB_key, f'{sessionA_key}_{col}', f'{sessionB_key}_{match_id}', match_scores_gaze[col][match_id], match_scores_clu[col][match_id], col_face_area, match_face_area))


In [None]:
df_final_matches = pd.DataFrame(final_matches, columns=['sessionA','sessionB','idA','idB','match_score_gaze','match_score_clu','face_areaA','face_areaB'])
df_final_matches


In [None]:
df_final_matches[df_final_matches.idA=='0205_7'].sort_values(by='match_score_gaze')


In [None]:
s1,s2 = '0205','0214'


In [None]:
df_pair_matches =df_final_matches
# df_pair_matches


In [None]:
df_pair_matches = df_pair_matches.sort_values(by=['face_areaA','face_areaB'],ascending=False)
df_pair_matches['area_diff'] = np.abs(df_pair_matches['face_areaA'] - df_pair_matches['face_areaB'])*100/np.minimum(df_pair_matches['face_areaA'],df_pair_matches['face_areaB'])
df_pair_matches.sort_values(by='idB')


In [None]:
df_pair_matches[(df_pair_matches.match_score_gaze<0.2) & (df_pair_matches.match_score_clu<0.2) & (df_pair_matches.area_diff<20) & (np.minimum(df_pair_matches.face_areaA, df_pair_matches.face_areaB)>np.median(df_pair_matches.face_areaA))]


In [None]:
id_matches = {}
matched_idA, matched_idB = [],[]


In [None]:
best_match_1 = df_pair_matches[(df_pair_matches.match_score_gaze<0.2) & 
                                (df_pair_matches.match_score_clu<0.2) & 
                                (df_pair_matches.area_diff<20) & 
                                (np.minimum(df_pair_matches.face_areaA, df_pair_matches.face_areaB)>np.median(df_pair_matches.face_areaA))
                                ].sort_values(by='match_score_gaze')
sprint(best_match_1)
for idx,row in best_match_1.iterrows():
    if row['idA'] in matched_idA:
        continue
    elif row['idB'] in matched_idB:
        continue
    else:
        id_matches[row['idA']] = row['idB']
        matched_idA.append(row['idA'])
        matched_idB.append(row['idB'])
df_next_matches = df_pair_matches[(~df_pair_matches.idA.isin(matched_idA)) & (~df_pair_matches.idB.isin(matched_idB))]
id_matches


In [None]:
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idA',as_index=False)['idB'].count(), on='idA',suffixes=('','_count'))
df_next_matches


In [None]:
df_single_matches = df_next_matches[df_next_matches.idB_count<=1]
best_match_2 = df_single_matches[(df_single_matches.match_score_gaze<0.25) & 
                                (df_single_matches.match_score_clu<0.25) & 
                                (df_single_matches.area_diff<20) & 
                                (np.minimum(df_single_matches.face_areaA, df_single_matches.face_areaB)>np.median(df_pair_matches.face_areaA))
                                ].sort_values(by='match_score_gaze')
sprint(best_match_2)
for idx,row in best_match_2.iterrows():
    if row['idA'] in matched_idA:
        continue
    elif row['idB'] in matched_idB:
        continue
    else:
        id_matches[row['idA']] = row['idB']
        matched_idA.append(row['idA'])
        matched_idB.append(row['idB'])
df_next_matches = df_pair_matches[(~df_pair_matches.idA.isin(matched_idA)) & (~df_pair_matches.idB.isin(matched_idB))]
id_matches


In [None]:
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idB',as_index=False)['idA'].count(), on='idB',suffixes=('','_count'))
df_next_matches


In [None]:
best_match_3 = df_next_matches[(df_next_matches.idA_count<=1) & 
                                (df_next_matches.area_diff<20)].sort_values(by='match_score_gaze')
sprint(best_match_3)
for idx,row in best_match_3.iterrows():
    if row['idA'] in matched_idA:
        continue
    elif row['idB'] in matched_idB:
        continue
    else:
        id_matches[row['idA']] = row['idB']
        matched_idA.append(row['idA'])
        matched_idB.append(row['idB'])
        

In [None]:
df_next_matches = df_pair_matches[(~df_pair_matches.idA.isin(matched_idA)) & (~df_pair_matches.idB.isin(matched_idB))]
id_matches


In [None]:
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idB',as_index=False)['idA'].count(), on='idB',suffixes=('','_count'))
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idA',as_index=False)['idB'].count(), on='idA',suffixes=('','_count'))
df_next_matches.sort_values(by='idA_count')


In [None]:
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idA',as_index=False).agg({'match_score_gaze':lambda x: 1.0 if (len(x)<2) else sorted(x)[1]-sorted(x)[0]}), on='idA',suffixes=('','_best_diff'))


In [None]:
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idA',as_index=False).agg({'match_score_gaze':lambda x: sorted(x)[0]}), on='idA',suffixes=('','_min_match'))



In [None]:
best_match_3 = df_next_matches[(df_next_matches.match_score_gaze<0.2) &
                (df_next_matches.match_score_gaze_best_diff>0.05) &
                (df_next_matches.match_score_gaze==df_next_matches.match_score_gaze_min_match)].sort_values(by='match_score_gaze')
sprint(best_match_3)
for idx,row in best_match_3.iterrows():
    if row['idA'] in matched_idA:
        continue
    elif row['idB'] in matched_idB:
        continue
    else:
        id_matches[row['idA']] = row['idB']
        matched_idA.append(row['idA'])
        matched_idB.append(row['idB'])
        


In [None]:
df_next_matches = df_pair_matches[(~df_pair_matches.idA.isin(matched_idA)) & (~df_pair_matches.idB.isin(matched_idB))]
id_matches


In [None]:
df_next_matches = df_next_matches[np.abs(df_next_matches['match_score_gaze']-df_next_matches['match_score_clu'])<0.05]
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idB',as_index=False)['idA'].count(), on='idB',suffixes=('','_count'))
df_next_matches = pd.merge(df_next_matches, df_next_matches.groupby('idA',as_index=False)['idB'].count(), on='idA',suffixes=('','_count'))
df_next_matches


In [None]:
df_next_matches = df_next_matches.sort_values(by=['face_areaA'],ascending=False)


In [None]:
df_next_matches


In [None]:
# find matches where the best matches are not overlapping, and leave everything else
id_matches


In [None]:
sprint(s1,s2)
sprint(id_matches)


In [None]:
sprint(s1,s2)
sprint(id_matches)


In [None]:
sprint(s1,s2)
sprint(id_matches)


# Get visualization for all sessions

In [None]:
sample_course = '05681A'
sample_session_id = 'classinsight-cmu_05681A_ghc_4301_201904101630-front'
session_tracking_cache_file = f"{track_analysis_session_data}/{sample_session_id}.pb"
session_preprocessed_id_map_file = f"{postprocessed_id_map_data_dir}/{sample_session_id}.pb"
session_frame_dir = f'{base_dir}/{sample_course}/{sample_session_id}'
session_video_file = f'/mnt/ci-nas-classes/classinsight/2019S/video_backup/{sample_session_id.split("-front")[0]}/{sample_session_id}.avi'
session_frame_dir


In [None]:
df_tracking_new = pickle.load(open(session_tracking_cache_file,"rb")).transpose()
old_to_new_id_map = pickle.load(open(session_preprocessed_id_map_file,"rb"))
total_idxs = df_tracking_new.index.max()
for old_id in old_to_new_id_map:
    new_id = old_to_new_id_map[old_id]
    if not new_id==10000:
        new_id_col = f'N{new_id}'
        if new_id_col not in df_tracking_new:
            df_tracking_new[new_id_col] = None
        df_tracking_new[new_id_col] =  df_tracking_new[old_id].where(~df_tracking_new[old_id].isnull(), df_tracking_new[old_id])
    df_tracking_new = df_tracking_new.drop(old_id, axis=1)




# Run single session compilation for cross session id tracking input

In [None]:
session = 'classinsight-cmu_05681A_ghc_4301_201904171630'

# Get embedding and gaze information for all frames for all sessions (Run if needed, commented out for now)?
session_emb_info = pickle.load(open(f'{emb_analysis_session_data}/{session}-front.pb','rb'))
session_id_map = pickle.load(open(f"{postprocessed_id_map_data_dir}/{session}-front.pb","rb"))
df_session_eligible_pairs = pd.read_csv(f"{embmatched_id_map_data_dir}/{session}-front.csv")
session_video_file = f'/mnt/ci-nas-classes/classinsight/2019S/video_backup/{session}/{session}-front.avi'



In [None]:
# Replace raw ids with mapped ids after postprocessing for both sessions
session_emb_info = {
    xr:{
        session_id_map[yr]:session_emb_info[xr][yr] 
            for yr in session_emb_info[xr] if not (session_id_map[yr]==10000)} for xr in session_emb_info}

# Correct new ids with dict from eligible pairs
eligible_id_map_dict = {}
for id_pair in df_session_eligible_pairs.id_pair.values:
    (id1, id2) = eval(id_pair)
    print(id1, id2)
    if id2 in eligible_id_map_dict:
        eligible_id_map_dict[id1] = eligible_id_map_dict[id2]
        print(f"{id1}--> {eligible_id_map_dict[id2]}")
    # elif id1 in eligible_id_map_dict:
    #     eligible_id_map_dict[id2] = eligible_id_map_dict[id1]
    #     print(f"{id2}--> {eligible_id_map_dict[id1]}")
    else:
        eligible_id_map_dict[id2] = id1
        print(f"{id2}--> {id1}")

sprint(eligible_id_map_dict)


In [None]:
# Correct new ids with dict from eligible pairs
eligible_id_map_dict = {}
for id_pair in df_session_eligible_pairs.id_pair.values:
    (id1, id2) = eval(id_pair)
    print(id1, id2)
    if id2 in eligible_id_map_dict:
        eligible_id_map_dict[id1] = eligible_id_map_dict[id2]
    else:
        eligible_id_map_dict[id2] = id1
        print(f"{id2}--> {id1}")
sprint(eligible_id_map_dict)

for key in eligible_id_map_dict:
    key_value = eligible_id_map_dict[key]
    if key_value in eligible_id_map_dict.keys():
        eligible_id_map_dict[key] = eligible_id_map_dict[key_value]
sprint(eligible_id_map_dict)        


In [None]:
# arrange info as per tracking id across both sessions

gaze_info = {}
emb_info = {}
bbox_info = {}
face_info = {}
for frame_number in session_emb_info:
    for trackId_old in session_emb_info[frame_number]:
        trackId = trackId_old if (trackId_old not in eligible_id_map_dict.keys()) else eligible_id_map_dict[trackId_old]
        if trackId not in gaze_info:
            gaze_info[trackId] = []
            emb_info[trackId]=[]
            bbox_info[trackId] = []
            face_info[trackId] = []
        # get  gaze info
        try:
            id_bbox = session_emb_info[frame_number][trackId]['bbox']
            bbox_info[trackId].append([frame_number]+list(id_bbox))

            id_face = session_emb_info[frame_number][trackId]['face'][0]
            face_info[trackId].append([frame_number]+list(id_face))
            
            pitch, roll, yaw= session_emb_info[frame_number][trackId]['rvec'][0]
            pitch, roll, yaw=np.rad2deg(pitch), np.rad2deg(roll), np.rad2deg(yaw)
            gaze_sx, gaze_sy, gaze_ex, gaze_ey = session_emb_info[frame_number][trackId]['gaze_2d'][0].flatten()
            gaze_info[trackId].append([frame_number, pitch, roll, yaw, gaze_sx, gaze_sy, gaze_ex, gaze_ey])
            face_emb = session_emb_info[frame_number][trackId]['face_embedding'].tolist()
            emb_info[trackId].append([frame_number]+face_emb)
        except:
            continue

for id in gaze_info:
    gaze_info[id] = pd.DataFrame(gaze_info[id], columns=['frame','pitch','roll','yaw','gaze_sx', 'gaze_sy', 'gaze_ex', 'gaze_ey']).set_index('frame')
    emb_info[id] =pd.DataFrame(emb_info[id], columns=['frame']+np.arange(512).tolist()).set_index('frame')
    bbox_info[id] = pd.DataFrame(bbox_info[id], columns=['frame']+np.arange(5).tolist()).set_index('frame')
    face_info[id] = pd.DataFrame(face_info[id], columns=['frame']+np.arange(15).tolist()).set_index('frame')

sprint({xr:(gaze_info[xr].shape[0],emb_info[xr].shape[0], face_info[xr].shape[0], bbox_info[xr].shape[0]) for xr in emb_info})


In [None]:
# get gaze based embeddings
MAX_GAZE_DEVIATION_DEG = 30
# MAX_EMBEDDING_FRAMES = 1000
MIN_EMBEDDING_FRAMES = 100

gaze_based_embeddings = {}
for sid in emb_info.keys():
    #filter correct frames
    frames = gaze_info[sid][
        (gaze_info[sid].yaw.abs()<MAX_GAZE_DEVIATION_DEG) & 
        (gaze_info[sid].pitch.abs()<MAX_GAZE_DEVIATION_DEG) & 
        (gaze_info[sid].roll.abs()<MAX_GAZE_DEVIATION_DEG)].index.values

    num_frames = len(frames)
    if (num_frames<MIN_EMBEDDING_FRAMES):
        sprint(f"Not sufficient frames to match {sid}:{len(frames)}")
        continue
    #get id embeddings    
    median_emb = np.median(emb_info[sid].loc[frames],axis=0)
    sprint(f"Got gaze embedding for {sid}.")
    gaze_based_embeddings[sid]=median_emb


In [None]:
# get cluster based embeddings
CLU_EPS = 0.4
CLU_MIN_PTS = 100
np.random.seed(42)
cluster_based_emb = {}
for sid in emb_info:
    emb_clu = DBSCAN(min_samples=CLU_MIN_PTS, eps=CLU_EPS)
    emb_clu.fit(emb_info[sid].values)
    if max(emb_clu.labels_)<0:
        sprint(f"All frames are outliers, not proceeding with id {sid}")
        continue
    best_cluster_id = pd.Series(emb_clu.labels_[emb_clu.labels_>=0]).value_counts().index[0]
    frames = emb_info[sid].iloc[emb_clu.labels_==best_cluster_id].index.values
    cluster_based_emb[sid] = np.median(emb_info[sid].loc[frames],axis=0)
    sprint(f"Got cluster embedding for {sid}")


In [None]:
session_id_info = {}
for sid in face_info:
    sample_df =deepcopy(bbox_info[sid])
    
    sample_df['bbox_width'] = sample_df[2]-sample_df[0]
    sample_df['bbox_height'] = sample_df[3]-sample_df[1]
    bbox_width_med, bbox_height_med = sample_df['bbox_width'].median(), sample_df['bbox_height'].median()
    bbox_width_iqd = sample_df['bbox_width'].quantile(0.75) - sample_df['bbox_width'].quantile(0.25)
    bbox_height_iqd = sample_df['bbox_height'].quantile(0.75) - sample_df['bbox_height'].quantile(0.25)

    sample_df =deepcopy(face_info[sid])
    sample_df['face_width'] = sample_df[2]-sample_df[0]
    sample_df['face_height'] = sample_df[3]-sample_df[1]
    sample_df['face_height'] = sample_df[3]-sample_df[1]
    sample_df['face_x'] = (sample_df[2]+sample_df[0]) / 2
    sample_df['face_y'] = (sample_df[3]+sample_df[1]) / 2
    
    face_width_med, face_height_med = sample_df['face_width'].median(), sample_df['face_height'].median()
    face_width_iqd = sample_df['face_width'].quantile(0.75) - sample_df['face_width'].quantile(0.25)
    face_height_iqd = sample_df['face_height'].quantile(0.75) - sample_df['face_height'].quantile(0.25)
    
    face_x_med, face_y_med = sample_df['face_x'].median(), sample_df['face_x'].median()
    face_x_iqd = sample_df['face_x'].quantile(0.75) - sample_df['face_x'].quantile(0.25)
    face_y_iqd = sample_df['face_y'].quantile(0.75) - sample_df['face_y'].quantile(0.25)

    session_id_info[sid] = dict(bbox_width_med=bbox_width_med, bbox_height_med=bbox_height_med,bbox_width_iqd=bbox_width_iqd, bbox_height_iqd=bbox_height_iqd,
                         face_width_med=face_width_med, face_height_med=face_height_med, face_width_iqd=face_width_iqd, face_height_iqd=face_height_iqd,
                         face_x_med=face_x_med, face_y_med=face_y_med, face_x_iqd=face_x_iqd, face_y_iqd=face_y_iqd, 
                         cluster_emb = cluster_based_emb.get(sid, None), 
                         gaze_emb=gaze_based_embeddings.get(sid, None))

sprint(face_width_med, face_width_iqd, face_height_med, face_height_iqd, face_x_med, face_x_iqd, face_y_med, face_y_iqd)
    

In [None]:
match_scores = {}
for idA in matching_info_dictB:
    match_scores[idA] = {}
    for idB in matching_info_dictB[idA]:
        match_matrix = matching_info_dictB[idA][idB]['match_matrix']
        match_distance = np.median(np.median(match_matrix,axis=1))
        match_scores[idA][idB] = match_distance

df_matching = pd.DataFrame(match_scores)

#--------
fig, axn = plt.subplots(1,1,figsize=(20,10))
sns.heatmap(df_matching.round(2), annot=True,ax=axn,cmap='bone_r')
for gtA in map(int, gt_map):
    for gtB in map(int, gt_map[str(gtA)]):
        if (gtA>=0) and (gtB>=0):
            if (gtA in df_matching.columns) and (gtB in df_matching.index):
                locA, locB = df_matching.columns.get_loc(gtA), df_matching.index.get_loc(gtB)
                axn.add_patch(Rectangle((locA, locB), 1, 1, fill=False, edgecolor='red', lw=4))

for locA in range(df_matching.shape[1]):
    locBs = df_matching.iloc[:,locA].argsort()[:3]
    for locB in locBs:
        axn.add_patch(Rectangle((locA, locB), 1, 1, fill=False, edgecolor='blue', lw=1))


axn.set_xlabel(f"Session A: {sessionA}",fontsize=16)
axn.set_ylabel(f"Session B: {sessionB}",fontsize=16)
plt.savefig(f'plots/Method2b_{course}_{sessionA.split("_")[-1]}_{sessionB.split("_")[-1]}.png',dpi=400,bbox_inches='tight')
