## PCA 18 Model

In [1]:
import os
import pickle
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from dtw import *
from tqdm import tqdm_notebook as tqdm

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [2]:
def visualize_labels(y, alt=False):
    color = 'cividis' if alt else 'plasma'
    
    idx = np.arange(len(y))

    fig, ax1 = plt.subplots()

    im = ax1.imshow(y[np.newaxis,:], cmap=color, aspect="auto")
    ax1.set_yticks([])
    fig.colorbar(im, ax=ax1)

    plt.tight_layout()
    plt.show()

In [3]:
def visualize_matrix(Y, alt=False):
    color = 'cividis' if alt else 'plasma'
    
    fig, ax1 = plt.subplots()
    
    im = ax1.imshow(Y, cmap=color, interpolation='nearest', aspect="auto")
    fig.colorbar(im, ax=ax1)
    
    plt.tight_layout()
    plt.show()

In [4]:
#task = 'task1_sandwich_openface'
#task = 'task2_bart_openface'
task = 'task3_jenga_openface'

model_path = './models/{0}/'.format(task)
path = './data/openface/{0}/'.format(task)
results_path = './data/final/{0}_pca.csv'.format(task)

## Pairs

In [5]:
num_paid_participants = 22
num_sona_participants = 17

task1_pairs = [('{0:02}01_sandwich_cut.csv'.format(i),'{0:02}02_sandwich_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task1_pairs += [('Sona{0:02}01_sandwich_cut.csv'.format(i),'Sona{0:02}02_sandwich_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

task2_pairs = [('{0:02}01_bart_cut.csv'.format(i),'{0:02}02_bart_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task2_pairs += [('Sona{0:02}01_bart_cut.csv'.format(i),'Sona{0:02}02_bart_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

task3_pairs = [('{0:02}01_jenga_cut.csv'.format(i),'{0:02}02_jenga_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task3_pairs += [('Sona{0:02}01_jenga_cut.csv'.format(i),'Sona{0:02}02_jenga_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

pairs = {
    'task1_sandwich_openface': task1_pairs,
    'task2_bart_openface': task2_pairs,
    'task3_jenga_openface': task3_pairs
}

## PCA

In [6]:
indicies = ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

In [7]:
import pickle
# scikit-learn v0.24.1
PCA_n_18 = pickle.load(open(model_path + 'pca_n_18.p', 'rb'))

In [8]:
data = []
for i in tqdm(range(0,len(pairs[task]))):
    (p1, p2) = pairs[task][i]
    
    print(p1,p2)
    
    file1 = os.path.join(path,p1)
    file2 = os.path.join(path,p2)

    # load facial landmarks
    success1 = True
    try:
        df1 = pd.read_csv(file1, error_bad_lines=False, warn_bad_lines=True)   
        face_lmk_1 = df1[indicies]
    except:
        success1 = False
    
    if success1:
        nans = df1[df1[indicies].isnull().any(axis=1)]
        if not nans.empty:
            dropList = nans.index.tolist()
            for r in dropList:
                warnings.warn('Dropping row due to NaN, {0}'.format(r))
            face_lmk_1.drop(dropList, inplace=True)
            
        del df1
        
        X1 = np.asarray(face_lmk_1.to_numpy()).astype('float32')
        
        del face_lmk_1
    
    success2 = True
    try:
        df2 = pd.read_csv(file2, error_bad_lines=False, warn_bad_lines=True)
        face_lmk_2 = df2[indicies]
    except:
        success2 = False

    if success2:
        nans = df2[df2[indicies].isnull().any(axis=1)]
        if not nans.empty:
            dropList = nans.index.tolist()
            for r in dropList:
                warnings.warn('Dropping row due to NaN, {0}'.format(r))
            face_lmk_2.drop(dropList, inplace=True)
            
        del df2

        X2 = np.asarray(face_lmk_2.to_numpy()).astype('float32')
    
        del face_lmk_2
    
    if success1 and success2:
        # Run model
        tl1 = PCA_n_18.transform(X1)
        tl2 = PCA_n_18.transform(X2)
        
        del X1
        del X2

        # dtw
        alignment = dtw(tl2, tl1, keep_internals=True, 
                    open_begin=True, open_end=True, 
                    step_pattern='asymmetric')

        # results
        ave_cost = np.mean(alignment.costMatrix)
        min_dist = alignment.distance
        nor_dist = alignment.normalizedDistance
        
        del alignment

        data.append([True,True,p1,p2,ave_cost,min_dist,nor_dist])
    else:
        data.append([success1,success2,p1,p2])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/37 [00:00<?, ?it/s]

0101_jenga_cut.csv 0102_jenga_cut.csv
0201_jenga_cut.csv 0202_jenga_cut.csv
0301_jenga_cut.csv 0302_jenga_cut.csv
0401_jenga_cut.csv 0402_jenga_cut.csv
0501_jenga_cut.csv 0502_jenga_cut.csv
0601_jenga_cut.csv 0602_jenga_cut.csv
0701_jenga_cut.csv 0702_jenga_cut.csv
0801_jenga_cut.csv 0802_jenga_cut.csv
0901_jenga_cut.csv 0902_jenga_cut.csv
1001_jenga_cut.csv 1002_jenga_cut.csv
1101_jenga_cut.csv 1102_jenga_cut.csv
1201_jenga_cut.csv 1202_jenga_cut.csv
1301_jenga_cut.csv 1302_jenga_cut.csv
1401_jenga_cut.csv 1402_jenga_cut.csv
1501_jenga_cut.csv 1502_jenga_cut.csv
1601_jenga_cut.csv 1602_jenga_cut.csv
1701_jenga_cut.csv 1702_jenga_cut.csv
1801_jenga_cut.csv 1802_jenga_cut.csv
1901_jenga_cut.csv 1902_jenga_cut.csv
2001_jenga_cut.csv 2002_jenga_cut.csv
2101_jenga_cut.csv 2102_jenga_cut.csv
Sona0101_jenga_cut.csv Sona0102_jenga_cut.csv
Sona0201_jenga_cut.csv Sona0202_jenga_cut.csv
Sona0301_jenga_cut.csv Sona0302_jenga_cut.csv
Sona0401_jenga_cut.csv Sona0402_jenga_cut.csv
Sona0501_jenga_cut

In [11]:
outDf = pd.DataFrame(data, columns=['found1','found2','file1','file2','aveCost','minDist','norDist'])
outDf.to_csv(results_path, index=False)

In [12]:
outDf.head()

Unnamed: 0,found1,found2,file1,file2,aveCost,minDist,norDist
0,True,False,0101_jenga_cut.csv,0102_jenga_cut.csv,,,
1,True,True,0201_jenga_cut.csv,0202_jenga_cut.csv,10454530.0,18701700.0,2159.050874
2,True,True,0301_jenga_cut.csv,0302_jenga_cut.csv,4655169.0,6682540.0,694.001421
3,True,True,0401_jenga_cut.csv,0402_jenga_cut.csv,5572548.0,9991830.0,1017.912591
4,True,False,0501_jenga_cut.csv,0502_jenga_cut.csv,,,
