This notebook generates the training data for all models (PCA, KMeans, Autoencoder) used during analysis

# All Data

In [1]:
task_name = 'all_data'

In [2]:
model_path = './models/{0}/'.format(task_name)

In [3]:
import os
import numpy as np
import pandas as pd

import warnings
from tqdm import tqdm

In [4]:
path = './data/openface'
tasks = ['task1_sandwich_openface','task2_bart_openface','task3_jenga_openface']

files = []
for task in tasks:
    subpath = os.path.join(path,task)
    files += [os.path.join(task,f) for f in os.listdir(subpath) if os.path.isfile(os.path.join(subpath,f))]

print('Num Files:',len(files))

Num Files: 93


In [5]:
from sklearn.model_selection import train_test_split

indicies = ['frame'] + ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

flag = True
Train = None
Test = None
Valid = None

for f in tqdm(list(files)):
    
    # Load CSV
    print(f)
    df = pd.read_csv(
            os.path.join(path,f), 
            error_bad_lines=False, 
            warn_bad_lines=True)
    
    face_lmk = df[indicies]
    
    # Filter NaN rows
    nans = df[df[indicies].isnull().any(axis=1)]
    if not nans.empty:
        dropList = nans.index.tolist()
        for r in dropList:
            warnings.warn('Dropping row due to NaN, {0}'.format(r))
        face_lmk.drop(dropList, inplace=True)
        
    face_lmk.insert(0,'filename',[f for i in range(0,face_lmk.shape[0])],True)
        
    print('Full:',face_lmk.shape)
    
    # Create Training, Test, Validation subsets
    train, tmp = train_test_split(face_lmk, test_size=0.33)
    test, val = train_test_split(tmp, test_size=0.5)
    print('Train:',train.shape,'Test:',test.shape,'Validation',val.shape)
    
    if flag:
        flag = False
        Train = train
        Test = test
        Valid = val
    else:
        Train = Train.append(train)
        Test = Test.append(test)
        Valid = Valid.append(val)

# Final Dataset (shuffled)
Train = Train.sample(frac=1).reset_index(drop=True)
Test = Test.sample(frac=1).reset_index(drop=True)
Valid = Valid.sample(frac=1).reset_index(drop=True)

print('\n\nFINAL')
print('Train:',Train.shape,'Test:',Test.shape,'Validation',Valid.shape)

  0%|          | 0/93 [00:00<?, ?it/s]

task1_sandwich_openface/1501_sandwich_cut.csv


  1%|          | 1/93 [00:01<01:46,  1.16s/it]

Full: (7224, 138)
Train: (4840, 138) Test: (1192, 138) Validation (1192, 138)
task1_sandwich_openface/0501_sandwich_cut.csv


  2%|▏         | 2/93 [00:02<01:49,  1.20s/it]

Full: (9426, 138)
Train: (6315, 138) Test: (1555, 138) Validation (1556, 138)
task1_sandwich_openface/0702_sandwich_cut.csv


  3%|▎         | 3/93 [00:03<01:46,  1.19s/it]

Full: (8501, 138)
Train: (5695, 138) Test: (1403, 138) Validation (1403, 138)
task1_sandwich_openface/1101_sandwich_cut.csv


  4%|▍         | 4/93 [00:04<01:53,  1.28s/it]

Full: (10804, 138)
Train: (7238, 138) Test: (1783, 138) Validation (1783, 138)
task1_sandwich_openface/Sona0402_sandwich_cut.csv


  5%|▌         | 5/93 [00:05<01:40,  1.14s/it]

Full: (6001, 138)
Train: (4020, 138) Test: (990, 138) Validation (991, 138)
task1_sandwich_openface/1702_Sandwich_Cut.csv


  6%|▋         | 6/93 [00:06<01:31,  1.06s/it]

Full: (6674, 138)
Train: (4471, 138) Test: (1101, 138) Validation (1102, 138)
task1_sandwich_openface/0701_sandwich_cut.csv


  8%|▊         | 7/93 [00:07<01:33,  1.09s/it]

Full: (8250, 138)
Train: (5527, 138) Test: (1361, 138) Validation (1362, 138)
task1_sandwich_openface/1301_sandwich_cut.csv


b'Skipping line 4053: expected 714 fields, saw 1233\nSkipping line 4064: expected 714 fields, saw 722\n'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
  9%|▊         | 8/93 [00:08<01:29,  1.06s/it]

Full: (6570, 138)
Train: (4401, 138) Test: (1084, 138) Validation (1085, 138)
task1_sandwich_openface/0402_sandwich_cut.csv


 10%|▉         | 9/93 [00:09<01:22,  1.02it/s]

Full: (4974, 138)
Train: (3332, 138) Test: (821, 138) Validation (821, 138)
task1_sandwich_openface/1502_sandwich_cut.csv


 11%|█         | 10/93 [00:10<01:24,  1.01s/it]

Full: (7219, 138)
Train: (4836, 138) Test: (1191, 138) Validation (1192, 138)
task1_sandwich_openface/1402_sandwich_cut.csv


 12%|█▏        | 11/93 [00:11<01:25,  1.05s/it]

Full: (7518, 138)
Train: (5037, 138) Test: (1240, 138) Validation (1241, 138)
task1_sandwich_openface/1001_sandwich_cut.csv


 13%|█▎        | 12/93 [00:13<01:29,  1.10s/it]

Full: (8028, 138)
Train: (5378, 138) Test: (1325, 138) Validation (1325, 138)
task1_sandwich_openface/0502_sandwich_cut.csv


 14%|█▍        | 13/93 [00:14<01:34,  1.18s/it]

Full: (9457, 138)
Train: (6336, 138) Test: (1560, 138) Validation (1561, 138)
task1_sandwich_openface/Sona0202_sandwich_cut.csv


 15%|█▌        | 14/93 [00:15<01:29,  1.13s/it]

Full: (6405, 138)
Train: (4291, 138) Test: (1057, 138) Validation (1057, 138)
task1_sandwich_openface/0901_sandwich_cut.csv


 16%|█▌        | 15/93 [00:17<01:41,  1.30s/it]

Full: (13190, 138)
Train: (8837, 138) Test: (2176, 138) Validation (2177, 138)
task1_sandwich_openface/Sona0401_sandwich_cut.csv


 17%|█▋        | 16/93 [00:18<01:31,  1.19s/it]

Full: (5606, 138)
Train: (3756, 138) Test: (925, 138) Validation (925, 138)
task1_sandwich_openface/0101_sandwich_cut.csv


 18%|█▊        | 17/93 [00:19<01:22,  1.09s/it]

Full: (5732, 138)
Train: (3840, 138) Test: (946, 138) Validation (946, 138)
task1_sandwich_openface/sona0101_sandwich_cut.csv


 19%|█▉        | 18/93 [00:19<01:18,  1.04s/it]

Full: (4700, 138)
Train: (3149, 138) Test: (775, 138) Validation (776, 138)
task1_sandwich_openface/0801_sandwich_cut.csv


 20%|██        | 19/93 [00:21<01:30,  1.23s/it]

Full: (11169, 138)
Train: (7483, 138) Test: (1843, 138) Validation (1843, 138)
task1_sandwich_openface/0201_sandwich_cut.csv
Full: (9953, 138)
Train: (6668, 138) Test: (1642, 138) Validation (1643, 138)


 22%|██▏       | 20/93 [00:22<01:31,  1.26s/it]

task1_sandwich_openface/0302_sandwich_cut.csv
Full: (8333, 138)
Train: (5583, 138) Test: (1375, 138) Validation (1375, 138)


 23%|██▎       | 21/93 [00:24<01:33,  1.30s/it]

task1_sandwich_openface/sona0102_sandwich_cut.csv
Full: (4735, 138)
Train: (3172, 138) Test: (781, 138) Validation (782, 138)


 24%|██▎       | 22/93 [00:25<01:23,  1.18s/it]

task1_sandwich_openface/1701_Sandwich_Cut.csv
Full: (6185, 138)
Train: (4143, 138) Test: (1021, 138) Validation (1021, 138)


 25%|██▍       | 23/93 [00:26<01:21,  1.16s/it]

task1_sandwich_openface/0102_sandwich_cut.csv
Full: (5692, 138)
Train: (3813, 138) Test: (939, 138) Validation (940, 138)


 26%|██▌       | 24/93 [00:27<01:13,  1.07s/it]

task1_sandwich_openface/0602_sandwich_cut.csv
Full: (10711, 138)
Train: (7176, 138) Test: (1767, 138) Validation (1768, 138)


 27%|██▋       | 25/93 [00:28<01:23,  1.22s/it]

task1_sandwich_openface/Sona0201_sandwich_cut.csv
Full: (6358, 138)
Train: (4259, 138) Test: (1049, 138) Validation (1050, 138)


 28%|██▊       | 26/93 [00:29<01:21,  1.22s/it]

task1_sandwich_openface/0802_sandwich_cut.csv
Full: (11239, 138)
Train: (7530, 138) Test: (1854, 138) Validation (1855, 138)


 29%|██▉       | 27/93 [00:31<01:30,  1.37s/it]

task1_sandwich_openface/0301_sandwich_cut.csv
Full: (8261, 138)
Train: (5534, 138) Test: (1363, 138) Validation (1364, 138)


 30%|███       | 28/93 [00:33<01:30,  1.39s/it]

task1_sandwich_openface/0601_sandwich_cut.csv
Full: (11538, 138)
Train: (7730, 138) Test: (1904, 138) Validation (1904, 138)


 31%|███       | 29/93 [00:34<01:36,  1.51s/it]

task1_sandwich_openface/Sona0302_sandwich_cut.csv
Full: (7300, 138)
Train: (4891, 138) Test: (1204, 138) Validation (1205, 138)


 32%|███▏      | 30/93 [00:36<01:31,  1.45s/it]

task1_sandwich_openface/1002_sandwich_cut.csv
Full: (8236, 138)
Train: (5518, 138) Test: (1359, 138) Validation (1359, 138)


 33%|███▎      | 31/93 [00:37<01:29,  1.45s/it]

task1_sandwich_openface/1401_sandwich_cut.csv
Full: (7476, 138)
Train: (5008, 138) Test: (1234, 138) Validation (1234, 138)


 34%|███▍      | 32/93 [00:39<01:28,  1.45s/it]

task2_bart_openface/Sona0201_bart_cut.csv
Full: (13937, 138)
Train: (9337, 138) Test: (2300, 138) Validation (2300, 138)


 35%|███▌      | 33/93 [00:41<01:35,  1.59s/it]

task2_bart_openface/0201_bart_cut.csv
Full: (13184, 138)
Train: (8833, 138) Test: (2175, 138) Validation (2176, 138)


 37%|███▋      | 34/93 [00:43<01:43,  1.75s/it]

task2_bart_openface/1101_bart_cut.csv
Full: (14445, 138)
Train: (9678, 138) Test: (2383, 138) Validation (2384, 138)


 38%|███▊      | 35/93 [00:45<01:45,  1.82s/it]

task2_bart_openface/Sona0101_bart_cut.csv
Full: (13946, 138)
Train: (9343, 138) Test: (2301, 138) Validation (2302, 138)


 39%|███▊      | 36/93 [00:47<01:45,  1.85s/it]

task2_bart_openface/1102_bart_cut.csv
Full: (14637, 138)
Train: (9806, 138) Test: (2415, 138) Validation (2416, 138)


 40%|███▉      | 37/93 [00:49<01:47,  1.91s/it]

task2_bart_openface/0701_bart_cut.csv
Full: (12787, 138)
Train: (8567, 138) Test: (2110, 138) Validation (2110, 138)


 41%|████      | 38/93 [00:51<01:46,  1.93s/it]

task2_bart_openface/1401_bart_cut.csv
Full: (17045, 138)
Train: (11420, 138) Test: (2812, 138) Validation (2813, 138)


 42%|████▏     | 39/93 [00:53<01:50,  2.05s/it]

task2_bart_openface/1201_bart_cut.csv
Full: (17923, 138)
Train: (12008, 138) Test: (2957, 138) Validation (2958, 138)


 43%|████▎     | 40/93 [00:55<01:55,  2.17s/it]

task2_bart_openface/0602_bart_cut.csv
Full: (16976, 138)
Train: (11373, 138) Test: (2801, 138) Validation (2802, 138)


 44%|████▍     | 41/93 [00:58<02:01,  2.34s/it]

task2_bart_openface/0902_bart_cut.csv
Full: (17520, 138)
Train: (11738, 138) Test: (2891, 138) Validation (2891, 138)


 45%|████▌     | 42/93 [01:01<02:01,  2.39s/it]

task2_bart_openface/Sona0401_bart_cut.csv
Full: (8084, 138)
Train: (5416, 138) Test: (1334, 138) Validation (1334, 138)


 46%|████▌     | 43/93 [01:02<01:45,  2.12s/it]

task2_bart_openface/0402_bart_cut.csv
Full: (20246, 138)
Train: (13564, 138) Test: (3341, 138) Validation (3341, 138)


 47%|████▋     | 44/93 [01:05<01:56,  2.37s/it]

task2_bart_openface/1202_bart_cut.csv
Full: (17951, 138)
Train: (12027, 138) Test: (2962, 138) Validation (2962, 138)


 48%|████▊     | 45/93 [01:08<01:56,  2.43s/it]

task2_bart_openface/0102_bart_cut.csv
Full: (10487, 138)
Train: (7026, 138) Test: (1730, 138) Validation (1731, 138)


 49%|████▉     | 46/93 [01:10<01:48,  2.31s/it]

task2_bart_openface/1002_bart_cut.csv
Full: (19483, 138)
Train: (13053, 138) Test: (3215, 138) Validation (3215, 138)


 51%|█████     | 47/93 [01:12<01:53,  2.46s/it]

task2_bart_openface/1702_Bart_Cut.csv
Full: (18037, 138)
Train: (12084, 138) Test: (2976, 138) Validation (2977, 138)


 52%|█████▏    | 48/93 [01:15<01:53,  2.52s/it]

task2_bart_openface/1001_bart_cut.csv
Full: (19442, 138)
Train: (13026, 138) Test: (3208, 138) Validation (3208, 138)


 53%|█████▎    | 49/93 [01:18<01:54,  2.60s/it]

task2_bart_openface/0802_bart_cut.csv
Full: (9285, 138)
Train: (6220, 138) Test: (1532, 138) Validation (1533, 138)


 54%|█████▍    | 50/93 [01:20<01:41,  2.37s/it]

task2_bart_openface/Sona0102_bart_cut.csv
Full: (13973, 138)
Train: (9361, 138) Test: (2306, 138) Validation (2306, 138)


 55%|█████▍    | 51/93 [01:22<01:38,  2.35s/it]

task2_bart_openface/0502_bart_cut.csv
Full: (29994, 138)
Train: (20095, 138) Test: (4949, 138) Validation (4950, 138)


 56%|█████▌    | 52/93 [01:27<02:12,  3.24s/it]

task2_bart_openface/0302_bart_cut.csv




Full: (9935, 138)
Train: (6656, 138) Test: (1639, 138) Validation (1640, 138)


 57%|█████▋    | 53/93 [01:30<01:58,  2.95s/it]

task2_bart_openface/0601_bart_cut.csv
Full: (17303, 138)
Train: (11593, 138) Test: (2855, 138) Validation (2855, 138)


 58%|█████▊    | 54/93 [01:33<01:57,  3.00s/it]

task2_bart_openface/1301_bart_cut.csv
Full: (12346, 138)
Train: (8271, 138) Test: (2037, 138) Validation (2038, 138)


 59%|█████▉    | 55/93 [01:35<01:45,  2.78s/it]

task2_bart_openface/Sona0302_bart_cut.csv
Full: (25085, 138)
Train: (16806, 138) Test: (4139, 138) Validation (4140, 138)


 60%|██████    | 56/93 [01:39<01:51,  3.01s/it]

task2_bart_openface/0401_bart_cut.csv
Full: (20110, 138)
Train: (13473, 138) Test: (3318, 138) Validation (3319, 138)


 61%|██████▏   | 57/93 [01:42<01:53,  3.14s/it]

task2_bart_openface/1701_Bart_Cut .csv
Full: (17671, 138)
Train: (11839, 138) Test: (2916, 138) Validation (2916, 138)


 62%|██████▏   | 58/93 [01:45<01:49,  3.13s/it]

task2_bart_openface/0101_bart_cut.csv
Full: (11665, 138)
Train: (7815, 138) Test: (1925, 138) Validation (1925, 138)


 63%|██████▎   | 59/93 [01:48<01:39,  2.94s/it]

task2_bart_openface/1302_bart_cut.csv
Full: (12961, 138)
Train: (8683, 138) Test: (2139, 138) Validation (2139, 138)


 65%|██████▍   | 60/93 [01:50<01:32,  2.80s/it]

task2_bart_openface/0202_bart_cut.csv
Full: (13116, 138)
Train: (8787, 138) Test: (2164, 138) Validation (2165, 138)


 66%|██████▌   | 61/93 [01:53<01:27,  2.75s/it]

task2_bart_openface/Sona0402_bart_cut.csv
Full: (7828, 138)
Train: (5244, 138) Test: (1292, 138) Validation (1292, 138)


 67%|██████▋   | 62/93 [01:55<01:18,  2.53s/it]

task2_bart_openface/0801_bart_cut.csv
Full: (9149, 138)
Train: (6129, 138) Test: (1510, 138) Validation (1510, 138)


 68%|██████▊   | 63/93 [01:57<01:12,  2.42s/it]

task2_bart_openface/0702_bart_cut.csv
Full: (13363, 138)
Train: (8953, 138) Test: (2205, 138) Validation (2205, 138)


 69%|██████▉   | 64/93 [02:00<01:12,  2.50s/it]

task2_bart_openface/1402_bart_cut.csv
Full: (17106, 138)
Train: (11461, 138) Test: (2822, 138) Validation (2823, 138)


 70%|██████▉   | 65/93 [02:03<01:13,  2.64s/it]

task2_bart_openface/0501_bart_cut.csv
Full: (29969, 138)
Train: (20079, 138) Test: (4945, 138) Validation (4945, 138)


 71%|███████   | 66/93 [02:07<01:28,  3.29s/it]

task2_bart_openface/Sona0202_bart_cut.csv
Full: (13904, 138)
Train: (9315, 138) Test: (2294, 138) Validation (2295, 138)


 72%|███████▏  | 67/93 [02:10<01:20,  3.11s/it]

task2_bart_openface/Sona0301_bart_cut.csv
Full: (25035, 138)
Train: (16773, 138) Test: (4131, 138) Validation (4131, 138)


 73%|███████▎  | 68/93 [02:14<01:23,  3.34s/it]

task3_jenga_openface/1301_jenga_cut.csv
Full: (9455, 138)
Train: (6334, 138) Test: (1560, 138) Validation (1561, 138)


 74%|███████▍  | 69/93 [02:16<01:12,  3.03s/it]

task3_jenga_openface/1302_jenga_cut.csv
Full: (9391, 138)
Train: (6291, 138) Test: (1550, 138) Validation (1550, 138)


 75%|███████▌  | 70/93 [02:19<01:04,  2.81s/it]

task3_jenga_openface/Sona0202_jenga_cut.csv
Full: (8981, 138)
Train: (6017, 138) Test: (1482, 138) Validation (1482, 138)


 76%|███████▋  | 71/93 [02:21<00:58,  2.64s/it]

task3_jenga_openface/Sona_jenga_0301.csv
Full: (9043, 138)
Train: (6058, 138) Test: (1492, 138) Validation (1493, 138)


 77%|███████▋  | 72/93 [02:23<00:53,  2.54s/it]

task3_jenga_openface/1702_Jenga_Cut .csv
Full: (7627, 138)
Train: (5110, 138) Test: (1258, 138) Validation (1259, 138)


 78%|███████▊  | 73/93 [02:25<00:48,  2.42s/it]

task3_jenga_openface/0202_Jenga_cut.csv
Full: (8662, 138)
Train: (5803, 138) Test: (1429, 138) Validation (1430, 138)


 80%|███████▉  | 74/93 [02:27<00:44,  2.36s/it]

task3_jenga_openface/1801_Jenga Cut .csv
Full: (10984, 138)
Train: (7359, 138) Test: (1812, 138) Validation (1813, 138)


 81%|████████  | 75/93 [02:30<00:43,  2.41s/it]

task3_jenga_openface/0402_Jenga_cut.csv
Full: (9816, 138)
Train: (6576, 138) Test: (1620, 138) Validation (1620, 138)


 82%|████████▏ | 76/93 [02:32<00:40,  2.41s/it]

task3_jenga_openface/Sona0302_jenga_cut.csv
Full: (9019, 138)
Train: (6042, 138) Test: (1488, 138) Validation (1489, 138)


 83%|████████▎ | 77/93 [02:35<00:38,  2.39s/it]

task3_jenga_openface/Sona0102_jenga_cut.csv
Full: (7790, 138)
Train: (5219, 138) Test: (1285, 138) Validation (1286, 138)


 84%|████████▍ | 78/93 [02:37<00:35,  2.34s/it]

task3_jenga_openface/1701_Jenga_Cut .csv
Full: (7904, 138)
Train: (5295, 138) Test: (1304, 138) Validation (1305, 138)


 85%|████████▍ | 79/93 [02:39<00:32,  2.34s/it]

task3_jenga_openface/Sona0201_jenga_cut.csv
Full: (8814, 138)
Train: (5905, 138) Test: (1454, 138) Validation (1455, 138)


 86%|████████▌ | 80/93 [02:42<00:30,  2.38s/it]

task3_jenga_openface/0401_Jenga_cut.csv
Full: (9411, 138)
Train: (6305, 138) Test: (1553, 138) Validation (1553, 138)


 87%|████████▋ | 81/93 [02:44<00:28,  2.42s/it]

task3_jenga_openface/0501_Jenga_cut.csv
Full: (9179, 138)
Train: (6149, 138) Test: (1515, 138) Validation (1515, 138)


 88%|████████▊ | 82/93 [02:47<00:26,  2.44s/it]

task3_jenga_openface/0201_Jenga_cut.csv
Full: (8636, 138)
Train: (5786, 138) Test: (1425, 138) Validation (1425, 138)


 89%|████████▉ | 83/93 [02:49<00:24,  2.44s/it]

task3_jenga_openface/1201_jenga_cut.csv
Full: (9290, 138)
Train: (6224, 138) Test: (1533, 138) Validation (1533, 138)


 90%|█████████ | 84/93 [02:52<00:22,  2.45s/it]

task3_jenga_openface/1402_jenga_cut.csv
Full: (9072, 138)
Train: (6078, 138) Test: (1497, 138) Validation (1497, 138)


 91%|█████████▏| 85/93 [02:54<00:19,  2.49s/it]

task3_jenga_openface/1502_jenga_cut.csv
Full: (9143, 138)
Train: (6125, 138) Test: (1509, 138) Validation (1509, 138)


 92%|█████████▏| 86/93 [02:57<00:17,  2.50s/it]

task3_jenga_openface/1501_jenga_cut.csv
Full: (9119, 138)
Train: (6109, 138) Test: (1505, 138) Validation (1505, 138)


 94%|█████████▎| 87/93 [02:59<00:15,  2.50s/it]

task3_jenga_openface/0302_Jenga_cut.csv
Full: (9629, 138)
Train: (6451, 138) Test: (1589, 138) Validation (1589, 138)


 95%|█████████▍| 88/93 [03:02<00:12,  2.51s/it]

task3_jenga_openface/1202_jenga_cut.csv
Full: (9282, 138)
Train: (6218, 138) Test: (1532, 138) Validation (1532, 138)


 96%|█████████▌| 89/93 [03:05<00:10,  2.56s/it]

task3_jenga_openface/0101_Jenga_cut.csv
Full: (8983, 138)
Train: (6018, 138) Test: (1482, 138) Validation (1483, 138)


 97%|█████████▋| 90/93 [03:07<00:07,  2.56s/it]

task3_jenga_openface/Sona0101_jenga_cut.csv
Full: (7591, 138)
Train: (5085, 138) Test: (1253, 138) Validation (1253, 138)


 98%|█████████▊| 91/93 [03:10<00:05,  2.54s/it]

task3_jenga_openface/1401_jenga_cut.csv
Full: (9065, 138)
Train: (6073, 138) Test: (1496, 138) Validation (1496, 138)


 99%|█████████▉| 92/93 [03:12<00:02,  2.56s/it]

task3_jenga_openface/0301_Jenga_cut.csv
Full: (9610, 138)
Train: (6438, 138) Test: (1586, 138) Validation (1586, 138)


100%|██████████| 93/93 [03:15<00:00,  2.10s/it]




FINAL
Train: (706727, 138) Test: (174058, 138) Validation (174104, 138)


In [6]:
Train.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,task3_jenga_openface/1801_Jenga Cut .csv,140.0,739.5,714.5,696.2,691.5,709.9,749.4,806.7,876.1,...,1140.2,1120.9,1057.3,1082.2,1080.2,1073.8,1039.4,1094.7,1102.8,1106.7
1,task2_bart_openface/1201_bart_cut.csv,14904.0,866.0,863.5,868.5,881.1,894.8,916.0,938.3,964.9,...,756.5,747.1,729.3,731.8,734.6,734.9,731.7,737.1,736.4,733.4
2,task3_jenga_openface/1801_Jenga Cut .csv,376.0,1344.7,1334.4,1323.0,1310.8,1296.2,1278.9,1260.4,1241.8,...,697.3,694.5,688.0,696.5,695.7,692.7,676.9,692.3,695.4,696.0
3,task3_jenga_openface/1401_jenga_cut.csv,1669.0,930.2,934.6,943.5,956.5,971.9,990.7,1008.9,1027.9,...,658.6,652.1,641.9,646.5,647.6,645.8,635.5,646.4,647.7,646.1
4,task2_bart_openface/0202_bart_cut.csv,3423.0,903.1,906.9,915.2,927.1,941.2,960.6,981.9,1006.1,...,735.8,733.5,723.4,721.5,719.8,715.0,704.9,715.0,719.5,721.2


In [7]:
Test.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,task2_bart_openface/1301_bart_cut.csv,9199.0,764.8,764.4,768.1,776.2,788.2,807.6,827.8,852.4,...,474.2,463.5,439.3,437.0,438.7,437.0,438.0,456.7,458.4,456.7
1,task3_jenga_openface/0301_Jenga_cut.csv,7285.0,784.4,784.0,791.9,803.2,813.9,826.6,842.0,862.7,...,959.3,956.4,947.9,940.3,940.3,939.7,947.3,940.3,940.8,939.9
2,task2_bart_openface/0302_bart_cut.csv,3323.0,804.9,800.1,798.9,799.9,803.2,811.6,823.4,839.9,...,752.9,746.7,739.8,741.0,744.5,745.2,750.5,743.7,742.6,739.2
3,task1_sandwich_openface/0201_sandwich_cut.csv,1938.0,918.6,921.9,930.0,941.7,953.2,965.5,975.7,986.3,...,402.6,399.4,393.2,391.2,392.4,392.3,390.0,392.6,392.5,390.9
4,task2_bart_openface/0502_bart_cut.csv,6892.0,671.2,665.9,666.7,674.7,687.2,708.6,734.8,766.5,...,752.2,742.9,722.3,723.3,725.6,724.7,727.5,726.7,726.8,723.8


In [8]:
Valid.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,task1_sandwich_openface/0501_sandwich_cut.csv,7834.0,585.4,584.7,591.3,606.6,618.5,628.0,634.5,644.8,...,870.3,860.0,841.5,846.6,849.1,848.0,844.1,854.7,855.1,851.6
1,task2_bart_openface/0701_bart_cut.csv,857.0,715.5,701.2,697.3,699.5,700.4,709.9,733.2,764.8,...,860.8,847.4,820.8,821.7,829.6,832.3,841.4,833.0,832.6,824.6
2,task2_bart_openface/0401_bart_cut.csv,12047.0,716.3,715.0,716.3,720.9,728.1,738.4,749.7,762.1,...,608.5,601.2,587.9,591.3,593.7,592.8,590.0,598.9,599.9,597.2
3,task3_jenga_openface/0501_Jenga_cut.csv,8368.0,740.2,766.2,794.1,821.6,847.7,872.1,892.2,909.0,...,1272.9,1262.8,1242.7,1259.2,1259.7,1255.3,1231.6,1259.0,1263.7,1263.1
4,task2_bart_openface/Sona0201_bart_cut.csv,6171.0,710.3,708.8,714.1,722.3,730.7,742.0,756.7,778.7,...,671.5,667.0,654.1,650.4,651.0,649.1,651.9,651.8,653.1,651.7


In [9]:
save_path = './data/tmp_analysis/{0}'.format(task_name)
save_train_path = save_path + '_train.csv'
save_test_path = save_path + '_test.csv'
save_valid_path = save_path + '_valid.csv'


Train.to_csv(save_train_path)
Test.to_csv(save_test_path)
Valid.to_csv(save_valid_path)

# Task 1 Only

In [10]:
task_name = 'task1_sandwich_openface'

In [11]:
model_path = './models/{0}/'.format(task_name)

In [12]:
import os
import numpy as np
import pandas as pd

import warnings
from tqdm import tqdm_notebook as tqdm

path = './data/openface/{0}'.format(task_name)

files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path,f))]
print('Num Files:',len(files))

Num Files: 32


In [13]:
from sklearn.model_selection import train_test_split

indicies = ['frame'] + ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

flag = True
Train = None
Test = None
Valid = None

for f in tqdm(list(files)):
    
    # Load CSV
    print(f)
    df = pd.read_csv(
            os.path.join(path,f), 
            error_bad_lines=False, 
            warn_bad_lines=True)
    
    face_lmk = df[indicies]
    
    # Filter NaN rows
    nans = df[df[indicies].isnull().any(axis=1)]
    if not nans.empty:
        dropList = nans.index.tolist()
        for r in dropList:
            warnings.warn('Dropping row due to NaN, {0}'.format(r))
        face_lmk.drop(dropList, inplace=True)
        
    face_lmk.insert(0,'filename',[f for i in range(0,face_lmk.shape[0])],True)
        
    print('Full:',face_lmk.shape)
    
    # Create Training, Test, Validation subsets
    train, tmp = train_test_split(face_lmk, test_size=0.33)
    test, val = train_test_split(tmp, test_size=0.5)
    print('Train:',train.shape,'Test:',test.shape,'Validation',val.shape)
    
    if flag:
        flag = False
        Train = train
        Test = test
        Valid = val
    else:
        Train = Train.append(train)
        Test = Test.append(test)
        Valid = Valid.append(val)

# Final Dataset (shuffled)
Train = Train.sample(frac=1).reset_index(drop=True)
Test = Test.sample(frac=1).reset_index(drop=True)
Valid = Valid.sample(frac=1).reset_index(drop=True)

print('\n\nFINAL')
print('Train:',Train.shape,'Test:',Test.shape,'Validation',Valid.shape)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


  0%|          | 0/32 [00:00<?, ?it/s]

1501_sandwich_cut.csv
Full: (7224, 138)
Train: (4840, 138) Test: (1192, 138) Validation (1192, 138)
0501_sandwich_cut.csv
Full: (9426, 138)
Train: (6315, 138) Test: (1555, 138) Validation (1556, 138)
0702_sandwich_cut.csv
Full: (8501, 138)
Train: (5695, 138) Test: (1403, 138) Validation (1403, 138)
1101_sandwich_cut.csv
Full: (10804, 138)
Train: (7238, 138) Test: (1783, 138) Validation (1783, 138)
Sona0402_sandwich_cut.csv
Full: (6001, 138)
Train: (4020, 138) Test: (990, 138) Validation (991, 138)
1702_Sandwich_Cut.csv
Full: (6674, 138)
Train: (4471, 138) Test: (1101, 138) Validation (1102, 138)
0701_sandwich_cut.csv
Full: (8250, 138)
Train: (5527, 138) Test: (1361, 138) Validation (1362, 138)
1301_sandwich_cut.csv


b'Skipping line 4053: expected 714 fields, saw 1233\nSkipping line 4064: expected 714 fields, saw 722\n'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Full: (6570, 138)
Train: (4401, 138) Test: (1084, 138) Validation (1085, 138)
0402_sandwich_cut.csv
Full: (4974, 138)
Train: (3332, 138) Test: (821, 138) Validation (821, 138)
1502_sandwich_cut.csv
Full: (7219, 138)
Train: (4836, 138) Test: (1191, 138) Validation (1192, 138)
1402_sandwich_cut.csv
Full: (7518, 138)
Train: (5037, 138) Test: (1240, 138) Validation (1241, 138)
1001_sandwich_cut.csv
Full: (8028, 138)
Train: (5378, 138) Test: (1325, 138) Validation (1325, 138)
0502_sandwich_cut.csv
Full: (9457, 138)
Train: (6336, 138) Test: (1560, 138) Validation (1561, 138)
Sona0202_sandwich_cut.csv
Full: (6405, 138)
Train: (4291, 138) Test: (1057, 138) Validation (1057, 138)
0901_sandwich_cut.csv
Full: (13190, 138)
Train: (8837, 138) Test: (2176, 138) Validation (2177, 138)
Sona0401_sandwich_cut.csv
Full: (5606, 138)
Train: (3756, 138) Test: (925, 138) Validation (925, 138)
0101_sandwich_cut.csv
Full: (5732, 138)
Train: (3840, 138) Test: (946, 138) Validation (946, 138)
sona0101_sandwich_c

In [14]:
Train.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,0701_sandwich_cut.csv,863.0,828.4,805.4,791.8,782.1,783.8,797.9,808.2,825.1,...,870.7,846.0,811.5,839.1,851.6,859.1,873.5,863.7,856.1,843.9
1,0101_sandwich_cut.csv,201.0,722.1,722.1,726.9,735.5,745.0,757.4,772.2,789.5,...,332.7,330.4,319.5,310.3,309.1,306.8,308.4,316.6,318.9,319.7
2,0802_sandwich_cut.csv,1276.0,732.7,725.7,722.4,723.5,731.3,747.3,766.6,791.8,...,516.3,507.0,488.8,498.8,503.0,503.8,503.6,504.7,504.1,499.9
3,1701_Sandwich_Cut.csv,5531.0,271.4,265.9,267.7,283.1,298.5,317.2,336.4,363.0,...,803.7,792.1,780.5,777.8,777.7,771.8,755.9,775.9,780.7,778.6
4,0402_sandwich_cut.csv,2228.0,518.1,515.4,517.9,526.8,536.9,547.2,556.0,565.9,...,737.6,732.0,721.5,720.4,721.0,719.3,714.2,723.6,724.4,722.7


In [15]:
Test.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,Sona0401_sandwich_cut.csv,2392.0,711.6,715.7,721.6,731.7,745.2,761.0,778.4,795.4,...,512.4,499.8,478.4,493.4,497.5,495.6,483.8,500.0,501.9,497.3
1,0402_sandwich_cut.csv,2527.0,551.7,547.8,550.7,562.6,576.3,588.9,596.7,606.4,...,788.3,780.6,768.5,773.5,775.1,774.1,764.5,778.0,777.7,775.2
2,1101_sandwich_cut.csv,10581.0,590.2,628.2,664.4,697.6,727.7,760.5,789.2,816.4,...,919.8,919.4,909.8,901.6,897.0,883.9,873.0,887.0,901.8,905.4
3,Sona0401_sandwich_cut.csv,1432.0,677.5,682.0,688.3,698.2,710.9,726.9,745.5,762.3,...,519.4,508.3,489.4,498.6,502.5,499.8,489.5,503.9,506.5,501.8
4,0102_sandwich_cut.csv,31.0,926.5,929.3,934.9,942.9,952.4,965.4,980.4,996.8,...,632.5,631.8,625.3,621.7,620.6,617.8,612.8,619.4,622.0,623.1


In [16]:
Valid.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,Sona0302_sandwich_cut.csv,6249.0,792.2,789.9,791.9,799.6,811.4,825.8,839.9,854.4,...,729.9,722.3,711.6,711.5,714.9,715.3,716.5,718.1,716.7,711.8
1,0602_sandwich_cut.csv,7743.0,432.2,441.1,458.6,485.1,510.1,536.8,559.2,588.8,...,758.7,754.2,744.1,745.7,748.4,750.5,739.9,752.6,749.2,745.3
2,0301_sandwich_cut.csv,257.0,893.8,889.7,890.9,898.3,907.7,924.3,948.3,979.5,...,944.0,941.8,932.1,926.5,926.4,925.4,927.8,924.2,925.7,925.3
3,0502_sandwich_cut.csv,6180.0,416.8,414.6,416.7,431.4,451.4,475.3,503.9,534.3,...,766.7,756.8,733.5,736.6,738.0,735.7,722.9,740.8,741.7,738.7
4,1702_Sandwich_Cut.csv,5992.0,628.7,629.4,636.9,649.3,670.0,695.9,716.1,738.1,...,803.5,784.1,758.8,781.9,791.0,792.6,787.0,794.3,791.8,782.1


In [17]:
save_path = './data/tmp_analysis/{0}'.format(task_name)
save_train_path = save_path + '_train.csv'
save_test_path = save_path + '_test.csv'
save_valid_path = save_path + '_valid.csv'


Train.to_csv(save_train_path)
Test.to_csv(save_test_path)
Valid.to_csv(save_valid_path)

# Task 2 Only

In [18]:
task_name = 'task2_bart_openface'

In [19]:
model_path = './models/{0}/'.format(task_name)

In [20]:
import os
import numpy as np
import pandas as pd

import warnings
from tqdm import tqdm_notebook as tqdm

path = './data/openface/{0}'.format(task_name)

files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path,f))]
print('Num Files:',len(files))

Num Files: 36


In [21]:
from sklearn.model_selection import train_test_split

indicies = ['frame'] + ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

flag = True
Train = None
Test = None
Valid = None

for f in tqdm(list(files)):
    
    # Load CSV
    print(f)
    df = pd.read_csv(
            os.path.join(path,f), 
            error_bad_lines=False, 
            warn_bad_lines=True)
    
    face_lmk = df[indicies]
    
    # Filter NaN rows
    nans = df[df[indicies].isnull().any(axis=1)]
    if not nans.empty:
        dropList = nans.index.tolist()
        for r in dropList:
            warnings.warn('Dropping row due to NaN, {0}'.format(r))
        face_lmk.drop(dropList, inplace=True)
        
    face_lmk.insert(0,'filename',[f for i in range(0,face_lmk.shape[0])],True)
        
    print('Full:',face_lmk.shape)
    
    # Create Training, Test, Validation subsets
    train, tmp = train_test_split(face_lmk, test_size=0.33)
    test, val = train_test_split(tmp, test_size=0.5)
    print('Train:',train.shape,'Test:',test.shape,'Validation',val.shape)
    
    if flag:
        flag = False
        Train = train
        Test = test
        Valid = val
    else:
        Train = Train.append(train)
        Test = Test.append(test)
        Valid = Valid.append(val)

# Final Dataset (shuffled)
Train = Train.sample(frac=1).reset_index(drop=True)
Test = Test.sample(frac=1).reset_index(drop=True)
Valid = Valid.sample(frac=1).reset_index(drop=True)

print('\n\nFINAL')
print('Train:',Train.shape,'Test:',Test.shape,'Validation',Valid.shape)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


  0%|          | 0/36 [00:00<?, ?it/s]

Sona0201_bart_cut.csv
Full: (13937, 138)
Train: (9337, 138) Test: (2300, 138) Validation (2300, 138)
0201_bart_cut.csv
Full: (13184, 138)
Train: (8833, 138) Test: (2175, 138) Validation (2176, 138)
1101_bart_cut.csv
Full: (14445, 138)
Train: (9678, 138) Test: (2383, 138) Validation (2384, 138)
Sona0101_bart_cut.csv
Full: (13946, 138)
Train: (9343, 138) Test: (2301, 138) Validation (2302, 138)
1102_bart_cut.csv
Full: (14637, 138)
Train: (9806, 138) Test: (2415, 138) Validation (2416, 138)
0701_bart_cut.csv
Full: (12787, 138)
Train: (8567, 138) Test: (2110, 138) Validation (2110, 138)
1401_bart_cut.csv
Full: (17045, 138)
Train: (11420, 138) Test: (2812, 138) Validation (2813, 138)
1201_bart_cut.csv
Full: (17923, 138)
Train: (12008, 138) Test: (2957, 138) Validation (2958, 138)
0602_bart_cut.csv
Full: (16976, 138)
Train: (11373, 138) Test: (2801, 138) Validation (2802, 138)
0902_bart_cut.csv
Full: (17520, 138)
Train: (11738, 138) Test: (2891, 138) Validation (2891, 138)
Sona0401_bart_cut.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Full: (9935, 138)
Train: (6656, 138) Test: (1639, 138) Validation (1640, 138)
0601_bart_cut.csv
Full: (17303, 138)
Train: (11593, 138) Test: (2855, 138) Validation (2855, 138)
1301_bart_cut.csv
Full: (12346, 138)
Train: (8271, 138) Test: (2037, 138) Validation (2038, 138)
Sona0302_bart_cut.csv
Full: (25085, 138)
Train: (16806, 138) Test: (4139, 138) Validation (4140, 138)
0401_bart_cut.csv
Full: (20110, 138)
Train: (13473, 138) Test: (3318, 138) Validation (3319, 138)
1701_Bart_Cut .csv
Full: (17671, 138)
Train: (11839, 138) Test: (2916, 138) Validation (2916, 138)
0101_bart_cut.csv
Full: (11665, 138)
Train: (7815, 138) Test: (1925, 138) Validation (1925, 138)
1302_bart_cut.csv
Full: (12961, 138)
Train: (8683, 138) Test: (2139, 138) Validation (2139, 138)
0202_bart_cut.csv
Full: (13116, 138)
Train: (8787, 138) Test: (2164, 138) Validation (2165, 138)
Sona0402_bart_cut.csv
Full: (7828, 138)
Train: (5244, 138) Test: (1292, 138) Validation (1292, 138)
0801_bart_cut.csv
Full: (9149, 138)
T

In [22]:
Train.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,0401_bart_cut.csv,2830,754.8,756.5,761.1,768.5,778.6,790.7,801.5,812.6,...,610.8,604.7,594.6,598.4,600.7,599.4,594.3,600.0,601.1,598.5
1,Sona0202_bart_cut.csv,1747,702.1,697.8,696.5,699.2,704.1,713.1,725.0,739.1,...,738.4,731.6,720.4,725.7,728.0,728.3,730.0,728.3,727.9,725.6
2,0502_bart_cut.csv,18082,745.2,743.3,746.8,756.6,771.4,795.1,823.3,854.0,...,767.8,757.7,736.1,737.4,739.3,737.7,735.0,739.4,740.4,737.9
3,1702_Bart_Cut.csv,4712,434.8,448.7,471.8,504.1,535.5,571.2,603.2,639.7,...,735.2,745.0,738.7,710.4,701.8,689.9,669.2,690.4,701.6,709.8
4,Sona0302_bart_cut.csv,19673,822.8,817.9,819.3,830.0,841.4,858.3,874.7,899.1,...,748.8,740.7,728.9,727.5,730.0,728.2,726.7,732.3,732.5,728.0


In [23]:
Test.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,0501_bart_cut.csv,3885,784.1,784.9,793.0,814.4,831.1,848.6,871.0,906.1,...,883.7,877.3,858.6,856.5,856.1,853.5,845.5,857.9,859.5,857.6
1,1101_bart_cut.csv,13959,1107.0,1118.1,1131.2,1145.1,1160.4,1182.5,1208.3,1236.2,...,935.0,926.4,910.5,918.1,921.1,917.0,903.1,914.1,918.1,914.9
2,1402_bart_cut.csv,3436,760.3,758.2,761.5,770.3,779.7,789.7,800.4,811.3,...,465.5,462.0,452.4,449.9,449.8,446.7,441.5,448.9,451.5,451.2
3,Sona0202_bart_cut.csv,4670,725.2,722.3,723.0,728.4,737.5,748.9,763.7,781.1,...,848.7,842.7,831.4,836.3,837.4,836.3,834.0,836.3,836.8,835.4
4,0202_bart_cut.csv,4693,892.6,898.6,910.0,925.4,942.5,961.9,980.3,1000.7,...,766.3,763.4,754.0,754.0,753.2,750.1,739.6,748.9,751.7,752.4


In [24]:
Valid.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,Sona0301_bart_cut.csv,24518,549.2,551.2,559.1,570.3,586.8,610.0,634.8,663.1,...,769.1,762.9,747.3,742.9,743.9,741.7,744.0,743.3,745.1,743.4
1,1702_Bart_Cut.csv,5822,587.0,599.3,618.1,641.2,669.4,706.1,744.3,782.5,...,734.2,726.0,709.1,709.6,709.4,702.7,680.2,703.1,708.5,707.9
2,0502_bart_cut.csv,4102,647.3,643.2,644.8,653.5,667.5,687.1,711.2,738.7,...,748.4,739.9,718.9,718.8,720.6,719.1,718.8,721.2,722.1,719.9
3,0102_bart_cut.csv,3934,1062.4,1061.1,1063.5,1070.3,1079.4,1091.0,1102.1,1114.5,...,726.7,721.6,712.2,715.6,717.2,717.0,713.8,718.1,718.1,716.3
4,0402_bart_cut.csv,10188,669.1,669.6,674.4,684.4,696.5,710.8,724.1,739.2,...,767.8,762.4,750.5,751.0,751.3,749.3,740.8,752.7,754.1,753.3


In [25]:
save_path = './data/tmp_analysis/{0}'.format(task_name)
save_train_path = save_path + '_train.csv'
save_test_path = save_path + '_test.csv'
save_valid_path = save_path + '_valid.csv'


Train.to_csv(save_train_path)
Test.to_csv(save_test_path)
Valid.to_csv(save_valid_path)

# Task 3 Only

In [26]:
task_name = 'task3_jenga_openface'

In [27]:
model_path = './models/{0}/'.format(task_name)

In [28]:
import os
import numpy as np
import pandas as pd

import warnings
from tqdm import tqdm_notebook as tqdm

path = './data/openface/{0}'.format(task_name)

files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path,f))]
print('Num Files:',len(files))

Num Files: 25


In [29]:
from sklearn.model_selection import train_test_split

indicies = ['frame'] + ['x_{0}'.format(i) for i in range(0,68)] + ['y_{0}'.format(i) for i in range(0,68)]

flag = True
Train = None
Test = None
Valid = None

for f in tqdm(list(files)):
    
    # Load CSV
    print(f)
    df = pd.read_csv(
            os.path.join(path,f), 
            error_bad_lines=False, 
            warn_bad_lines=True)
    
    face_lmk = df[indicies]
    
    # Filter NaN rows
    nans = df[df[indicies].isnull().any(axis=1)]
    if not nans.empty:
        dropList = nans.index.tolist()
        for r in dropList:
            warnings.warn('Dropping row due to NaN, {0}'.format(r))
        face_lmk.drop(dropList, inplace=True)
        
    face_lmk.insert(0,'filename',[f for i in range(0,face_lmk.shape[0])],True)
        
    print('Full:',face_lmk.shape)
    
    # Create Training, Test, Validation subsets
    train, tmp = train_test_split(face_lmk, test_size=0.33)
    test, val = train_test_split(tmp, test_size=0.5)
    print('Train:',train.shape,'Test:',test.shape,'Validation',val.shape)
    
    if flag:
        flag = False
        Train = train
        Test = test
        Valid = val
    else:
        Train = Train.append(train)
        Test = Test.append(test)
        Valid = Valid.append(val)

# Final Dataset (shuffled)
Train = Train.sample(frac=1).reset_index(drop=True)
Test = Test.sample(frac=1).reset_index(drop=True)
Valid = Valid.sample(frac=1).reset_index(drop=True)

print('\n\nFINAL')
print('Train:',Train.shape,'Test:',Test.shape,'Validation',Valid.shape)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


  0%|          | 0/25 [00:00<?, ?it/s]

1301_jenga_cut.csv
Full: (9455, 138)
Train: (6334, 138) Test: (1560, 138) Validation (1561, 138)
1302_jenga_cut.csv
Full: (9391, 138)
Train: (6291, 138) Test: (1550, 138) Validation (1550, 138)
Sona0202_jenga_cut.csv
Full: (8981, 138)
Train: (6017, 138) Test: (1482, 138) Validation (1482, 138)
Sona_jenga_0301.csv
Full: (9043, 138)
Train: (6058, 138) Test: (1492, 138) Validation (1493, 138)
1702_Jenga_Cut .csv
Full: (7627, 138)
Train: (5110, 138) Test: (1258, 138) Validation (1259, 138)
0202_Jenga_cut.csv
Full: (8662, 138)
Train: (5803, 138) Test: (1429, 138) Validation (1430, 138)
1801_Jenga Cut .csv
Full: (10984, 138)
Train: (7359, 138) Test: (1812, 138) Validation (1813, 138)
0402_Jenga_cut.csv
Full: (9816, 138)
Train: (6576, 138) Test: (1620, 138) Validation (1620, 138)
Sona0302_jenga_cut.csv
Full: (9019, 138)
Train: (6042, 138) Test: (1488, 138) Validation (1489, 138)
Sona0102_jenga_cut.csv
Full: (7790, 138)
Train: (5219, 138) Test: (1285, 138) Validation (1286, 138)
1701_Jenga_Cut

In [30]:
Train.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,1202_jenga_cut.csv,5005,804.0,856.7,911.9,964.6,1016.4,1071.5,1120.4,1164.9,...,1001.8,1016.5,1010.4,952.4,942.5,935.2,951.1,967.6,974.8,984.5
1,Sona0201_jenga_cut.csv,121,480.4,483.7,494.4,510.0,523.8,540.2,555.6,581.3,...,522.2,517.1,501.9,498.3,498.3,495.5,495.3,498.4,500.2,499.4
2,0401_Jenga_cut.csv,5679,770.8,770.1,771.9,778.8,789.2,801.6,815.1,829.9,...,822.0,816.7,804.8,808.9,811.4,811.0,806.6,812.7,812.8,809.3
3,1401_jenga_cut.csv,583,765.6,767.1,772.9,783.6,797.0,812.2,829.2,848.5,...,601.1,595.9,584.9,588.3,588.3,582.7,568.5,582.6,587.6,586.8
4,1202_jenga_cut.csv,4904,910.6,910.3,909.1,911.5,922.1,944.7,972.0,999.8,...,1105.7,1083.3,1041.0,1064.5,1068.9,1066.8,1054.8,1084.5,1088.1,1084.9


In [31]:
Test.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,Sona0302_jenga_cut.csv,6824,329.3,320.5,318.7,329.9,343.9,359.3,375.4,394.3,...,917.7,907.7,896.5,893.8,898.2,900.2,900.8,903.4,899.9,892.7
1,1201_jenga_cut.csv,5684,594.3,594.0,605.7,630.1,655.3,679.3,696.8,718.8,...,853.1,846.7,833.9,832.3,833.9,834.3,825.8,834.7,833.7,831.2
2,Sona0101_jenga_cut.csv,557,926.9,929.6,936.0,945.6,958.0,977.8,998.3,1024.0,...,787.1,776.5,761.2,765.3,768.8,769.2,766.7,769.1,768.7,765.1
3,0402_Jenga_cut.csv,852,621.4,616.5,617.2,626.8,640.6,657.9,673.6,689.0,...,796.4,783.7,763.7,776.7,778.8,775.9,755.5,782.5,784.2,781.5
4,Sona0201_jenga_cut.csv,3117,491.6,496.0,506.0,520.7,531.8,545.3,561.5,580.6,...,536.6,530.1,517.3,515.1,515.4,513.0,508.9,514.0,516.6,515.6


In [32]:
Valid.head()

Unnamed: 0,filename,frame,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67
0,0402_Jenga_cut.csv,4491,649.8,644.6,643.1,650.0,660.4,673.6,686.8,699.8,...,815.1,797.0,773.4,793.6,797.9,798.1,782.4,805.5,804.0,798.9
1,1302_jenga_cut.csv,2789,945.7,940.7,938.6,939.7,942.2,947.6,953.8,961.0,...,603.7,596.6,586.1,591.2,594.2,595.7,600.1,597.9,596.5,593.4
2,Sona0302_jenga_cut.csv,1434,370.5,359.7,353.5,359.9,372.6,386.0,396.8,410.5,...,855.6,846.9,836.7,835.1,837.6,839.4,845.5,841.3,837.7,832.9
3,0402_Jenga_cut.csv,1069,589.4,589.9,594.4,607.4,625.0,646.2,667.8,693.8,...,881.4,871.6,854.2,867.8,869.5,869.2,848.8,873.7,873.0,870.3
4,0302_Jenga_cut.csv,3020,935.7,934.9,937.9,945.8,954.6,966.2,976.1,988.5,...,856.7,847.5,835.3,841.4,844.2,845.0,843.3,844.9,844.4,841.6


In [33]:
save_path = './data/tmp_analysis/{0}'.format(task_name)
save_train_path = save_path + '_train.csv'
save_test_path = save_path + '_test.csv'
save_valid_path = save_path + '_valid.csv'


Train.to_csv(save_train_path)
Test.to_csv(save_test_path)
Valid.to_csv(save_valid_path)