## UMAP PCA 32 Model

In [1]:
import os
import pickle
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from dtw import *
from tqdm import tqdm_notebook as tqdm

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [2]:
def visualize_labels(y, alt=False):
    color = 'cividis' if alt else 'plasma'
    
    idx = np.arange(len(y))

    fig, ax1 = plt.subplots()

    im = ax1.imshow(y[np.newaxis,:], cmap=color, aspect="auto")
    ax1.set_yticks([])
    fig.colorbar(im, ax=ax1)

    plt.tight_layout()
    plt.show()

In [3]:
def visualize_matrix(Y, alt=False):
    color = 'cividis' if alt else 'plasma'
    
    fig, ax1 = plt.subplots()
    
    im = ax1.imshow(Y, cmap=color, interpolation='nearest', aspect="auto")
    fig.colorbar(im, ax=ax1)
    
    plt.tight_layout()
    plt.show()

In [4]:
#task = 'task1_sandwich_openface'
task = 'task2_bart_openface'
#task = 'task3_jenga_openface'

model_path = './models/{0}/'.format(task)
path = './data/openface/{0}/'.format(task)
results_path = './data/final/{0}_umap_pca.csv'.format(task)

## Pairs

In [5]:
num_paid_participants = 22
num_sona_participants = 17

task1_pairs = [('{0:02}01_sandwich_cut.csv'.format(i),'{0:02}02_sandwich_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task1_pairs += [('Sona{0:02}01_sandwich_cut.csv'.format(i),'Sona{0:02}02_sandwich_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

task2_pairs = [('{0:02}01_bart_cut.csv'.format(i),'{0:02}02_bart_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task2_pairs += [('Sona{0:02}01_bart_cut.csv'.format(i),'Sona{0:02}02_bart_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

task3_pairs = [('{0:02}01_jenga_cut.csv'.format(i),'{0:02}02_jenga_cut.csv'.format(i)) for i in range(1,num_paid_participants)]
task3_pairs += [('Sona{0:02}01_jenga_cut.csv'.format(i),'Sona{0:02}02_jenga_cut.csv'.format(i)) for i in range(1,num_sona_participants)]

pairs = {
    'task1_sandwich_openface': task1_pairs,
    'task2_bart_openface': task2_pairs,
    'task3_jenga_openface': task3_pairs
}

## UMAP

In [6]:
indicies = ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

In [7]:
import pickle

# scikit-learn v0.24.1
PCA_n_32 = pickle.load(open(model_path + 'pca_n_32.p', 'rb'))
umap_3 = pickle.load(open(model_path+'umap_3_pca_32.p','rb'))

Fri Apr 23 20:24:15 2021 Building and compiling search function


In [8]:
data = []
for i in tqdm(range(0,len(pairs[task]))):
    (p1, p2) = pairs[task][i]
    
    print(p1,p2)
    
    file1 = os.path.join(path,p1)
    file2 = os.path.join(path,p2)

    # load facial landmarks
    success1 = True
    try:
        df1 = pd.read_csv(file1, error_bad_lines=False, warn_bad_lines=True)   
        face_lmk_1 = df1[indicies]
    except:
        success1 = False
    
    if success1:
        nans = df1[df1[indicies].isnull().any(axis=1)]
        if not nans.empty:
            dropList = nans.index.tolist()
            for r in dropList:
                warnings.warn('Dropping row due to NaN, {0}'.format(r))
            face_lmk_1.drop(dropList, inplace=True)
            
        del df1
        
        X1 = np.asarray(face_lmk_1.to_numpy()).astype('float32')
        
        del face_lmk_1
    
    success2 = True
    try:
        df2 = pd.read_csv(file2, error_bad_lines=False, warn_bad_lines=True)
        face_lmk_2 = df2[indicies]
    except:
        success2 = False

    if success2:
        nans = df2[df2[indicies].isnull().any(axis=1)]
        if not nans.empty:
            dropList = nans.index.tolist()
            for r in dropList:
                warnings.warn('Dropping row due to NaN, {0}'.format(r))
            face_lmk_2.drop(dropList, inplace=True)
            
        del df2

        X2 = np.asarray(face_lmk_2.to_numpy()).astype('float32')
    
        del face_lmk_2
    
    if success1 and success2:
        # Run model
        tl1 = umap_3.transform(PCA_n_32.transform(X1))
        tl2 = umap_3.transform(PCA_n_32.transform(X2))
        
        del X1
        del X2

        # dtw
        alignment = dtw(tl2, tl1, keep_internals=True, 
                    open_begin=True, open_end=True, 
                    step_pattern='asymmetric')

        # results
        ave_cost = np.mean(alignment.costMatrix)
        min_dist = alignment.distance
        nor_dist = alignment.normalizedDistance
        
        del alignment

        data.append([True,True,p1,p2,ave_cost,min_dist,nor_dist])
    else:
        data.append([success1,success2,p1,p2])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/37 [00:00<?, ?it/s]

0101_bart_cut.csv 0102_bart_cut.csv
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
0201_bart_cut.csv 0202_bart_cut.csv
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


0401_bart_cut.csv 0402_bart_cut.csv
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
0501_bart_cut.csv 0502_bart_cut.csv
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6  /  30 epochs
	completed  9  /  30 epochs
	completed  12  /  30 epochs
	completed  15  /  30 epochs
	completed  18  /  30 epochs
	completed  21  /  30 epochs
	completed  24  /  30 epochs
	completed  27  /  30 epochs
	completed  0  /  30 epochs
	completed  3  /  30 epochs
	completed  6 

Sona0401_bart_cut.csv Sona0402_bart_cut.csv
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
Sona0501_bart_cut.csv Sona0502_bart_cut.csv
Sona0601_bart_cut.csv Sona0602_bart_cut.csv
Sona0701_bart_cut.csv Sona0702_bart_cut.csv
Sona0801_bart_cut.csv Sona0802_bart_cut.csv
Sona0901_bart_cut.csv Sona0902_bart_cut.csv
Sona1001_bart_cut.csv Sona1002_bart_cut.csv
Sona1101_bart_cut.csv Sona1102_bart_cut.csv
Sona1201_bart_cut.csv Sona1202_bart_cut.csv
Sona13

In [9]:
outDf = pd.DataFrame(data, columns=['found1','found2','file1','file2','aveCost','minDist','norDist'])
outDf.to_csv(results_path, index=False)

In [10]:
outDf.head()

Unnamed: 0,found1,found2,file1,file2,aveCost,minDist,norDist
0,True,True,0101_bart_cut.csv,0102_bart_cut.csv,44926.432223,70093.376376,6.683835
1,True,True,0201_bart_cut.csv,0202_bart_cut.csv,67503.237652,129144.016939,9.846296
2,False,True,0301_bart_cut.csv,0302_bart_cut.csv,,,
3,True,True,0401_bart_cut.csv,0402_bart_cut.csv,47516.162515,72081.704151,3.560294
4,True,True,0501_bart_cut.csv,0502_bart_cut.csv,56046.181998,87750.633095,2.925606
