In [1]:
import os
import gc
import shutil
if os.getenv("COLAB_RELEASE_TAG"):
    # Colab
    print("RUNING ON COLAB")
    from google.colab import drive
    drive.mount('/content/drive')
    BASE_DIR = "/content/drive/MyDrive/GaggleSignLang/asl-signs"
    WORKING_DIR = "/content/asl-work"
    ARCHIVE_DIR = "/content/drive/MyDrive/GaggleSignLang"
    MODEL_DIR = "/content/drive/MyDrive/GaggleSignLang/models"
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE'):
    # KAGGLE
    print("RUNNING ON KAGGLE")
    BASE_DIR = "/kaggle/input/asl-signs"
    WORKING_DIR = "/kaggle/working"
    ARCHIVE_DIR = "/kaggle/working"
    MODEL_DIR  = "/kaggle/working"
else: 
    # Jupiter  MacOS
    print("RUNING JUPITER LOCAL")
    BASE_DIR = "/Users/johnhanratty/ASLtest/asl-signs"  #"/Users/johnhanratty/ASLtest/asl-signs"
    WORKING_DIR = "/Users/johnhanratty/ASLtest"
    ARCHIVE_DIR = "/Users/johnhanratty/ASLtest"
    MODEL_DIR = "/Users/johnhanratty/ASLtest/models"

import time

import json
from tqdm import tqdm
import numpy as np
import pandas as pd
import pickle
from random import seed, sample

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import normalize

!pip install tsai --quiet
from tsai.all import *
from fastai.callback.tracker import EarlyStoppingCallback


import warnings
warnings.filterwarnings(action='ignore')

LANDMARK_FILES_DIR = f'{BASE_DIR}/train_landmark_files'
TRAIN_FILE = f"{BASE_DIR}/train.csv"

FRAMES_OUT = 32 # 16
PTS_IN_FRAME = 345
DIMC = [0,1,2]
DIMS = len(DIMC)
WORKERS = 0   # dataoader work var  0 for MAC, 4 for online


print('done')

RUNING ON COLAB
Mounted at /content/drive
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.9/298.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[?25hdone


# Voting Selection

In [13]:
# with open(f"{MODEL_DIR}/pred_tsai.pkl", 'rb') as f2:
#         plist_tsai = pickle.load(f2)

with open(f"{MODEL_DIR}/pred_Trans96.pkl", 'rb') as f2:
        plist_trans = pickle.load(f2)

with open(f"{MODEL_DIR}/pred_incept96.pkl", 'rb') as f2:
        plist_incept = pickle.load(f2)

with open(f"{MODEL_DIR}/pred_cnn32.pkl", 'rb') as f2:
        plist_cnn = pickle.load(f2)



#plist_cnn = plist_cnn.drop(["truth"], axis=1)

ens = pd.concat([plist_cnn, plist_trans["Trans"], plist_incept["incept"]], axis=1)

ens['voteI'] = np.where(ens['incept'] == ens['Trans'], ens['incept'],
              np.where(ens['incept'] == ens['cnn'], ens['cnn'], 
              np.where(ens['cnn'] == ens["Trans"], ens["Trans"], ens['incept'])))
ens['voteT'] = np.where(ens['incept'] == ens['Trans'], ens['incept'],
              np.where(ens['incept'] == ens['cnn'], ens['cnn'], 
              np.where(ens['cnn'] == ens["Trans"], ens["Trans"], ens['Trans'])))
ens['voteC'] = np.where(ens['incept'] == ens['Trans'], ens['incept'],
              np.where(ens['incept'] == ens['cnn'], ens['cnn'], 
              np.where(ens['cnn'] == ens["Trans"], ens["Trans"], ens['cnn'])))

ens = ens.apply(pd.to_numeric)
print(ens.head())


   truth  cnn  Trans  incept  voteI  voteT  voteC
0  206.0   78    247     206    206    247     78
1   20.0   96     84      84     84     84     84
2  178.0   76    223      41     41    223     76
3  114.0  114    145     114    114    114    114
4  221.0  221    221     158    221    221    221


In [14]:
ens[100:150]

Unnamed: 0,truth,cnn,Trans,incept,voteI,voteT,voteC
100,206.0,28,206,206,206,206,206
101,222.0,222,191,191,191,191,191
102,194.0,194,194,194,194,194,194
103,6.0,6,226,76,76,226,6
104,103.0,103,103,103,103,103,103
105,214.0,214,214,214,214,214,214
106,50.0,50,50,50,50,50,50
107,128.0,129,129,129,129,129,129
108,101.0,101,101,101,101,101,101
109,64.0,182,182,197,182,182,182


In [16]:

print("VOTE Incept Default",
np.mean(np.where(ens["truth"] == ens["voteI"], True, False)))
print("VOTE Trans Default",
np.mean(np.where(ens["truth"] == ens["voteT"], True, False)))
print("VOTE CNN Default",
np.mean(np.where(ens["truth"] == ens["voteC"], True, False)))

print("CNN",
np.mean(np.where(ens["truth"] == ens["cnn"], True, False)))
print("Incept",
np.mean(np.where(ens["truth"] == ens["incept"], True, False)))
print("Trans",
np.mean(np.where(ens["truth"] == ens["Trans"], True, False)))





VOTE Incept Default 0.643663561953147
VOTE Trans Default 0.6254586508608524
VOTE CNN Default 0.6872001128986734
CNN 0.7015241320914479
Incept 0.5988569009314141
Trans 0.4834180073384138


In [None]:
# 96 frames, 120 pca
# VOTE Incept Default 0.7441433813152696
# VOTE Trans Default 0.7497177533163986
# VOTE CNN Default 0.7700395145357042
# CNN 0.7015241320914479
# Incept 0.7441433813152696
# Trans 0.7497177533163986


# 64 TSAI, 32 CNN  TST
# VOTE Incept Default 0.7204346598927462
# VOTE Trans Default 0.7105560259666949
# VOTE CNN Default 0.7488004515946938
# CNN 0.6898814563928873
# Incept 0.7204346598927462
# Trans 0.7105560259666949



# 64 TSAI, 32 CNN  TSTPlus
# VOTE Incept Default 0.7212813999435507
# VOTE Trans Default 0.721916454981654
# VOTE CNN Default 0.7523990968106125
# CNN 0.6898814563928873
# Incept 0.7212813999435507
# Trans 0.721916454981654

# Highest Prob Selection

In [None]:
import pickle

with open(f"{MODEL_DIR}/prob_trans.pkl", 'rb') as f2:
        probt = pickle.load(f2)

with open(f"{MODEL_DIR}/prob_incept.pkl", 'rb') as f3:
       probi = pickle.load(f3)

with open(f"{MODEL_DIR}/prob_cnn32.pkl", 'rb') as f3:
       probc = pickle.load(f3)

if isinstance(probt, Tensor):
  probt = probt.numpy()
if isinstance(probi, Tensor):
  probi = probi.numpy()
if isinstance(probc, Tensor):
  probc = probc.numpy()
print("PROBT type", type(probt), "softmax:", np.mean(np.sum(probt, axis=1)))
print("PROBI type", type(probi), "softmax:", np.mean(np.sum(probi, axis=1)))
print("PROBC type", type(probc), "softmax:", np.mean(np.sum(probc, axis=1)))

m = nn.Softmax(dim=1)
probc = m(torch.tensor(probc)).numpy()


# To get truth
with open(f"{MODEL_DIR}/pred_tsai.pkl", 'rb') as f2:
        plist_tsai = pickle.load(f2)

ens = pd.DataFrame(plist_tsai['truth'])
ens['t'] = probt.argmax(axis=1)
ens['tp'] = probt.max(axis=1)
ens['i'] = probi.argmax(axis=1)
ens['ip'] = probi.max(axis=1)
ens['c'] = probc.argmax(axis=1)
ens['cp'] = probc.max(axis=1)

ens

eprob = np.column_stack([probt.max(axis=1), probi.max(axis=1), probc.max(axis=1)])
epred = np.column_stack([probt.argmax(axis=1), probi.argmax(axis=1), probc.argmax(axis=1)])
eout  = np.argmax(eprob, axis=1)
eprob = eprob[0:10]
epred = epred[0:10]
eout = eout[0:10]
print("EPRED", epred.shape)
print(epred)
print("EPROB", eprob.shape)
print(eprob)
print("EOUT", eout.shape, eout[0:10])

pp = 

print(epred[:, eout])

#plist_cnn = plist_cnn.drop(["truth"], axis=1)

#ens = pd.concat([plist_tsai, plist_cnn["cnn"]], axis=1)





PROBT type <class 'numpy.ndarray'> softmax: 1.0
PROBI type <class 'numpy.ndarray'> softmax: 1.0
PROBC type <class 'numpy.ndarray'> softmax: -3713.34
EPRED (10, 3)
[[247 247  78]
 [ 46  85  96]
 [191 178  76]
 [114 114 114]
 [221 221 221]
 [230 230 230]
 [ 25 122  25]
 [ 97 196 231]
 [125 220 249]
 [ 43 191 191]]
EPROB (10, 3)
[[0.741533   0.53766155 0.7230887 ]
 [0.33487386 0.46151057 0.6304178 ]
 [0.2704267  0.9697276  0.05071425]
 [0.97291416 0.99965155 0.9970029 ]
 [0.9873121  0.9999882  0.95460415]
 [0.63865566 0.94743997 0.9545088 ]
 [0.8941998  0.486318   0.7862482 ]
 [0.44970638 0.34417856 0.17610334]
 [0.20417285 0.20785488 0.18814285]
 [0.69940597 0.9106271  0.47976372]]
EOUT (10,) [0 2 1 1 1 2 0 0 1 1]
[[247  78 247 247 247  78 247 247 247 247]
 [ 46  96  85  85  85  96  46  46  85  85]
 [191  76 178 178 178  76 191 191 178 178]
 [114 114 114 114 114 114 114 114 114 114]
 [221 221 221 221 221 221 221 221 221 221]
 [230 230 230 230 230 230 230 230 230 230]
 [ 25  25 122 122 12

In [None]:
epred.shape


(14172, 3)

In [None]:
y_hat_incept = mod_incept(torch.tensor(Testx).long())
y_hat_trans = mod_trans(torch.tensor(Testx).long())

RuntimeError: ignored

In [None]:
# FIX TRUTH
preds['truth']=testy.astype(int)
preds.predi = preds.predi.astype(float).astype(int)
preds.predt = preds.predt.astype(float).astype(int)

sm = nn.Softmax(dim=1) 
prob_cnns = sm(torch.Tensor(prob_cnn)).numpy()
preds['predc'] = np.argmax(prob_cnns, axis=1)

preds["probi"] = prob_in.numpy().max(axis=1)
preds["probt"] = prob_tran.numpy().max(axis=1)
preds["probc"] = prob_cnns.max(axis=1)


In [None]:
preds.head()


Unnamed: 0,truth,nans,idx,predt,predi,predc,probi,probt,probc
80305,206,20,80305,247,206,78,0.627538,0.711359,0.384106
80306,20,42,80306,20,96,96,0.426852,0.784956,0.486097
80307,178,16,80307,26,178,200,0.641837,0.267397,0.072054
80308,114,50,80308,114,114,114,0.888171,0.965296,0.977423
80309,221,64,80309,221,221,221,0.954264,0.982313,0.877944


In [None]:
print("SINGLE MODEL ACCURACY")
print("Tran", np.mean(preds['truth'] == preds['predt']), 
      "In", np.mean(preds['truth'] == preds['predi']),
      "CNN",  np.mean(preds['truth'] == preds['predc']))


SINGLE MODEL ACCURACY
Tran 0.735182049110923 In 0.7398391193903472 CNN 0.6808495625176404


In [None]:
# Choose highest prob prediction from ALL models
def highest_prob(row):
  if (row['probi'] > row['probt']) & (row['probi'] > row['probc']):
    return row['predi']
  elif row['probt'] > row['probc']:
    return row['predt']
  else:
    return row['predc']

print("ALL MODELS")
preds["hpall"] = preds.apply(lambda row: highest_prob(row), axis=1)

print(np.mean(preds.truth==preds.hpall))

ALL MODELS
0.749435506632797


In [None]:
# Choose highest prob prediction from PROBI & PROBT
def highest_prob(row):
  if (row['probi'] > row['probt']):
    return row['predi']
  else:
    return row['predt']

preds["hpit"] = preds.apply(lambda row: highest_prob(row), axis=1)
print("I & T Models")
print(np.mean(preds.truth==preds.hpit))

I & T Models
0.7452018063787751


In [None]:
# Choose highest prob prediction from PROBI & PROBC
def highest_prob(row):
  if (row['probi'] > row['probc']):
    return row['predi']
  else:
    return row['predc']

preds["hpic"] = preds.apply(lambda row: highest_prob(row), axis=1)
print("i & c Models")
print(np.mean(preds.truth==preds.hpic))

i & c Models
0.7444961896697714


In [None]:
# Choose highest prob prediction from PROBT & PROBC
def highest_prob(row):
  if (row['probt'] > row['probc']):
    return row['predt']
  else:
    return row['predc']

preds["hptc"] = preds.apply(lambda row: highest_prob(row), axis=1)

print(np.mean(preds.truth==preds.hptc))

0.7353231724527237


In [None]:
preds['incep'] = preds.truth == preds.predi
preds['trans'] = preds.truth == preds.predt
preds['cnnag'] = preds.truth == preds.predc

In [None]:
preds.head()

Unnamed: 0,truth,nans,idx,predt,predi,predc,probi,probt,probc,hpall,hpit,hpic,hptc,incep,trans,cnnag
80305,206,20,80305,247,206,247,0.627538,0.711359,0.735845,247.0,247.0,247.0,247.0,True,False,False
80306,20,42,80306,20,96,96,0.426852,0.784956,0.357823,20.0,20.0,96.0,20.0,False,True,False
80307,178,16,80307,26,178,151,0.641837,0.267397,0.04281,178.0,178.0,178.0,26.0,True,False,False
80308,114,50,80308,114,114,114,0.888171,0.965296,0.973645,114.0,114.0,114.0,114.0,True,True,True
80309,221,64,80309,221,221,221,0.954264,0.982313,0.869086,221.0,221.0,221.0,221.0,True,True,True


In [None]:
fcnt = preds[preds.nans < 6].groupby('nans').agg(iacc = ("incep","mean"),
                                                 cnt = ("idx","count"))

fcnt

Unnamed: 0_level_0,iacc,cnt
nans,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.833333,6
1,0.75,8
2,0.863636,22
3,0.666667,21
4,0.689655,29
5,0.775,40


In [None]:
preds[preds.nans < 4].groupby('truth').agg(iacc = ("incep","mean"),
                                cnt = ("idx","count"))



Unnamed: 0_level_0,iacc,cnt
truth,Unnamed: 1_level_1,Unnamed: 2_level_1
9,1.0,1
19,1.0,1
22,1.0,1
37,1.0,1
38,1.0,1
44,1.0,1
48,1.0,1
51,1.0,1
62,1.0,2
69,1.0,1


In [None]:
# ENSEMBLE VOTING
print("2 out of 3",)