# The ensemble approach is to take the data, and train each of the N classifiers on a different random arrangement of 6 landmarks. 

In [90]:
# first get all X and y data from the all_points_folds
import numpy as np 
import pickle, random 
import cv2

X, y = [], []

for file in os.listdir("all_points_folds"): 
    with open(f"all_points_folds/{file}", 'rb') as f: 
        X_y = pickle.load(f)
        X.append(X_y[0])
        y.append(X_y[1]) 

In [91]:
import random 
def shuffle(X, y, seed = None):
    if seed == None:  
        seed = random.randrange(0, 100)
        print(f"using seed {seed}")
    np.random.seed(seed) 
    new_X = np.concatenate([X_i for X_i in X])
    new_y = np.concatenate([y_i for y_i in y])
    N = np.random.permutation(new_X.shape[0])
    new_X = new_X[N]
    new_y = new_y[N]
    new_X = new_X.reshape(5, 20, 90, 126)
    new_y = new_y.reshape(5, 20)
    return new_X, new_y
X, y = shuffle(X, y, seed = 68)

In [92]:
# create the randomly generated hand arrangments for 200 models

NUM_ARRANGEMENTS = 50
NUM_SAMPLE = 3
hand_arrangements = [] # (NUM_ARRANGEMENTS, 6) matrix

for arrangement in range(NUM_ARRANGEMENTS): 
    hand_arrangements.append(random.sample(list(range(0, 21)), NUM_SAMPLE))

def generate_sub_X(hand_arrangement, X): 
    columns_of_landmarks = [] # to access 
    for arrangement in hand_arrangement: 
        for start_point in [0, 21, 42, 63, 84, 105]: 
            columns_of_landmarks.append(start_point + arrangement) 
    return X.reshape(100, 90, 126)[:, :, tuple(columns_of_landmarks)] 

In [93]:
Xs = []
for hand_arrangement in hand_arrangements:
    Xs.append(generate_sub_X(hand_arrangement, X))

In [96]:
import tensorflow as tf 
def make_model(): 
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=False), 
        tf.keras.layers.Dropout(0.3), 
        tf.keras.layers.Dense(1, activation = "sigmoid")
    ])

    model.compile(loss = "binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    
    return model 

def train_models():
    models = [] 
    checkpoint = tf.keras.callbacks.ModelCheckpoint("best_ensemble.h5", save_best_only=True, monitor = "val_accuracy")
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_accuracy", patience=10)
    callbacks = [checkpoint, early_stopping] 
    val_accs = []
    for i in range(len(hand_arrangements)): 
        model = make_model() 
        temp_X = Xs[i] 

        temp_X = temp_X.reshape(5, 20, 90, 6 * NUM_SAMPLE)
        X_test, y_test = temp_X[4], y[4]
        X_train = np.concatenate([X_j for j, X_j in enumerate(temp_X) if 4 !=j])
        y_train = np.concatenate([y_j for j, y_j in enumerate(y) if 4 !=j ]) 

        print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
        # the next thing after this is to train the model 
        model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 75, callbacks = callbacks)
        model.load_weights("best_ensemble.h5")
        
        _, accuracy, __, ____ = model.evaluate(X_test, y_test)
        val_accs.append(accuracy)
        models.append(model)
    return models, val_accs

In [97]:
def aggregate_accuracy(Xs, models): 
    # get X_test for each model 
    preds = np.zeros((20, 1)) 
    for X, model in zip(Xs, models): 
        temp_X = X.reshape(5, 20, 90, 6 * NUM_SAMPLE) 
        X_test, y_test = temp_X[4], y[4]
        preds += model.predict(X_test)
    preds = preds / len(models)
    return (np.round_(preds.flatten()) == y[4]).mean()

In [98]:
models, val_accs = train_models() 

(80, 90, 18) (20, 90, 18) (80,) (20,)
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
(80, 90, 18) (20, 90, 18) (80,) (20,)
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
(80, 90, 18) (20, 90, 18) (80,) (20,)
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
(80, 90, 18) (20, 90, 18) (80,) (20,)
Epoch 1/75
Epoch 2/75
Epoch 3/75
Ep

In [99]:
aggregate_accuracy(Xs, models) 

0.85

In [101]:
# how do you evaluate thse models 
# can you even run cross validation? 

[0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,
 0.8500000238418579,
 0.8500000238418579,
 0.8500000238418579,
 0.800000011920929,
 0.800000011920929,