# Correlation Comparison - T vs B


The correlation between (AU06_r, AU12_r) is higher in W-B (0.725) than W-T (0.558)	

In [2]:
#datafile = 'all_frames.pkl.xz' # FG/UBICOMP data N=151
#datafile = '../data/all_frames_wclust.pkl.xz' # with AU6_AU12 clusters
datafile = '../data/all_frames.pkl.xz'

CONFIDENCE_TOL = 0.90 # only use data with conf > this

#-----------------

import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.options.display.float_format = '{:,.3f}'.format
from IPython.display import display
import matplotlib.pyplot as plt


print('...loading data')
if 'pkl' in datafile:
    df = pd.read_pickle(datafile)
else:
    df = pd.read_csv(datafile, skipinitialspace=True) 
n_tot = df.shape[0]
df = df[df['confidence'] >= CONFIDENCE_TOL]
        
print('n:',df.shape[0])
print('n%:',df.shape[0]/n_tot)
print('# files = ', df['Filename'].nunique())

df_c = df[df.columns[df.columns.str.contains('_c')]]
df_r = df[df.columns[df.columns.str.contains('_r')]]

print('AU_r feature count:', df_r.shape[1])
print('AU_c feature count:', df_c.shape[1])
#print('\nAU_r features:' , list(df_r.columns))
#print('AU_c features:', list(df_c.columns))

print('\ncolumns: ')
for c in df.columns:
    print(c,end=',')
    

...loading data
n: 1246319
n%: 0.7480277767774617
# files =  298
AU_r feature count: 17
AU_c feature count: 18

columns: 
Filename,filetype,segment,timestamp,confidence,success,pose_Tx,pose_Ty,pose_Tz,pose_Rx,pose_Ry,pose_Rz,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,AU14_r,AU15_r,AU17_r,AU20_r,AU23_r,AU25_r,AU26_r,AU45_r,AU01_c,AU02_c,AU04_c,AU05_c,AU06_c,AU07_c,AU09_c,AU10_c,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,

## Correlations

In [3]:
def get_redundant_pairs(df):
    '''Get diagonal and lower triangular pairs of correlation matrix'''
    pairs_to_drop = set()
    cols = df.columns
    for i in range(0, df.shape[1]):
        for j in range(0, i+1):
            pairs_to_drop.add((cols[i], cols[j]))
    return pairs_to_drop

def get_top_abs_correlations(df, n=5):
    au_corr = df.corr().unstack()
    labels_to_drop = get_redundant_pairs(df)
    au_corr = au_corr.drop(labels=labels_to_drop).reset_index()
    au_corr.columns = ['0','1','r']
    au_corr['abs(r)'] = au_corr['r'].abs()
    au_corr.sort_values(by='abs(r)',ascending=False, inplace=True)
    return au_corr[0:n]
#============================================================
print("Top AU_r Correlations")

df_seg = df[(df['segment'] == 'S2')]
df_t = df_seg[df_seg['filetype']=='W-T']
df_b = df_seg[df_seg['filetype']=='W-B']
df_rt = df_t[df_t.columns[df_t.columns.str.contains('_r')]]
df_rb = df_b[df_b.columns[df_b.columns.str.contains('_r')]]
t_corr = get_top_abs_correlations(df_rt, 15)
b_corr = get_top_abs_correlations(df_rb, 15)

print('W-T correlations')
display(t_corr)

print('\nW-B correlations')
display(b_corr)

Top AU_r Correlations
W-T correlations


Unnamed: 0,0,1,r,abs(r)
0,AU01_r,AU02_r,0.684,0.684
58,AU06_r,AU07_r,0.563,0.563
61,AU06_r,AU12_r,0.558,0.558
91,AU10_r,AU12_r,0.537,0.537
60,AU06_r,AU10_r,0.534,0.534
133,AU25_r,AU26_r,0.504,0.504
100,AU12_r,AU14_r,0.423,0.423
92,AU10_r,AU14_r,0.371,0.371
130,AU23_r,AU25_r,-0.328,0.328
122,AU17_r,AU23_r,0.325,0.325



W-B correlations


Unnamed: 0,0,1,r,abs(r)
61,AU06_r,AU12_r,0.725,0.725
0,AU01_r,AU02_r,0.713,0.713
91,AU10_r,AU12_r,0.57,0.57
60,AU06_r,AU10_r,0.526,0.526
58,AU06_r,AU07_r,0.526,0.526
100,AU12_r,AU14_r,0.443,0.443
133,AU25_r,AU26_r,0.413,0.413
122,AU17_r,AU23_r,0.41,0.41
92,AU10_r,AU14_r,0.405,0.405
67,AU06_r,AU25_r,0.357,0.357
