In [81]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from enum import IntEnum
import time

class Algorithms(IntEnum):
    LINEAR_REGRESSION = 0
    CNN = 1
    ANN = 2
    ANN_NOPCA = 3
    ALL = 4

WEBCAM_ID = 2
RESOLUTION = 32
ALGORITHM = Algorithms.LINEAR_REGRESSION
TARGET_COUNT = 6
TARGET_NAMES = ['smile', 'mouth_open', 'puff', 'frown', 'left', 'right']
FEATURE_COUNT = RESOLUTION * RESOLUTION  
BATCH_SIZE = 32  # Batch size for model training


FEATURE_COLUMNS = slice(0, FEATURE_COUNT)
LABEL_COLUMNS = slice(-2, None)


In [82]:
train_data = pd.read_csv('./training_set.csv')
X_train = train_data.iloc[:, :-TARGET_COUNT]
y_train = train_data.iloc[:, -TARGET_COUNT:]
test_data = pd.read_csv('./test_set.csv')
X_test = test_data.iloc[:, :-TARGET_COUNT]
y_test = test_data.iloc[:, -TARGET_COUNT:]
train_data_pca = pd.read_csv('./training_set_pca.csv')
X_train_pca = train_data_pca.iloc[:, :-TARGET_COUNT]
test_data_pca = pd.read_csv('./test_set_pca.csv')
X_test_pca = test_data_pca.iloc[:, :-TARGET_COUNT]


In [83]:
# Linear regression

# Train a linear regression model to predict the multiple outputs
start_time = time.time()
regression_model = MultiOutputRegressor(LinearRegression()).fit(X_train_pca, y_train)
end_time = time.time()
lr_train_time = end_time - start_time
print(f"Time to train: {round(lr_train_time, 3)} s")
# Find training error
y_pred = regression_model.predict(X_train_pca)
lr_train_mse = mean_squared_error(y_train, y_pred)
print(f'Training error (MSE): {lr_train_mse}')

# Find test error
start_time = time.time()
y_pred = regression_model.predict(X_test_pca)
end_time = time.time()
lr_predict_time = end_time - start_time
lr_test_mse = mean_squared_error(y_test, y_pred)
print(f'Test error (MSE): {lr_test_mse}')
print(f'Time to predict test set: {round(lr_predict_time, 3)} s')

Time to train: 0.0 s
Training error (MSE): 0.01674216131187532
Test error (MSE): 0.016164379887499372
Time to predict test set: 0.0 s


In [84]:
# Convolutional neural network
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Ensure that data contains the correct number of columns expected as per RESOLUTION and TARGET_COUNT
if len(train_data.columns) != RESOLUTION * RESOLUTION + TARGET_COUNT:
    print("Error: Data does not match the expected format.")

# Split the data
cnn_X = train_data.iloc[:, :-TARGET_COUNT].values
cnn_y = train_data.iloc[:, -TARGET_COUNT:].values

# Check if y contains more than one column and it's not already in the correct categorical format
if cnn_y.ndim == 1 or cnn_y.shape[1] == 1:
    cnn_y = to_categorical(cnn_y)  # This assumes y contains class indices as integers from 0 to num_classes-1

num_classes = cnn_y.shape[1]

# Reshape X to fit the model's input requirements: (num_samples, RESOLUTION, RESOLUTION, 1)
cnn_X = cnn_X.reshape(-1, RESOLUTION, RESOLUTION, 1)

# Split the data
cnn_X_train, cnn_X_test, cnn_y_train, cnn_y_test = train_test_split(cnn_X, cnn_y, test_size=0.2, random_state=42)
# Define the CNN model architecture
#works best with 1 conv layer
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(RESOLUTION, RESOLUTION, 1)),
    # MaxPooling2D(2, 2),# Max pooling layer
    # Conv2D(64, (3, 3), activation='relu', input_shape=(RESOLUTION, RESOLUTION, 1)),
    # MaxPooling2D(2, 2),# Max pooling layer
    Flatten(),# flatten the 3D output to 1D
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # number of classes here
    #softmax activation function instead of sigmoid
])

# Compile the cnn_model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the cnn_model
start_time = time.time()
history = cnn_model.fit(cnn_X_train, cnn_y_train, epochs=18, batch_size=42, validation_data=(cnn_X_test, cnn_y_test))
end_time = time.time()
cnn_train_time = end_time - start_time
print(f'Time to train: {cnn_train_time} s')


# Find training error
y_pred = cnn_model.predict(X_train.values.reshape(-1, RESOLUTION, RESOLUTION, 1))
cnn_train_mse = mean_squared_error(y_train, y_pred)
print(f'Training error (MSE): {cnn_train_mse}')

# Find test error
start_time = time.time()
y_pred = cnn_model.predict(X_test.values.reshape(-1, RESOLUTION, RESOLUTION, 1))
end_time = time.time()
cnn_predict_time = end_time - start_time
cnn_test_mse = mean_squared_error(y_test, y_pred)
print(f'Test error (MSE): {cnn_test_mse}')
print(f'Time to predict test set: {round(cnn_predict_time, 3)} s')

Epoch 1/18



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.2881 - loss: 1.1265 - val_accuracy: 0.4583 - val_loss: 0.8200
Epoch 2/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5080 - loss: 0.7334 - val_accuracy: 0.6528 - val_loss: 0.6080
Epoch 3/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6211 - loss: 0.5279 - val_accuracy: 0.7778 - val_loss: 0.4348
Epoch 4/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7035 - loss: 0.4112 - val_accuracy: 0.6111 - val_loss: 0.4564
Epoch 5/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.6404 - loss: 0.4287 - val_accuracy: 0.7917 - val_loss: 0.3500
Epoch 6/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6394 - loss: 0.4546 - val_accuracy: 0.8194 - val_loss: 0.3747
Epoch 7/18
[1m14/14[0m [32m━━━━━━━━━━━━━━━

In [85]:
# Artificial neural network
from keras.models import Sequential
from keras.layers import Dense

# number of input and outputs respectively
n_features = X_train_pca.shape[1]
n_targets = y_train.shape[1]

ann_model = Sequential()
ann_model.add(Dense(20, input_dim=n_features, activation='relu'))
ann_model.add(Dense(n_targets, activation='linear')) # Output layer

# Compile
ann_model.compile(loss='mean_squared_error', optimizer='adam')
# Train
start_time = time.time()
ann_model.fit(X_train_pca, y_train, epochs=50, batch_size=10)
end_time = time.time()
ann_train_time = end_time - start_time
print(f'Time to train: {ann_train_time} s')

# Find training error
y_pred = ann_model.predict(X_train_pca)
ann_train_mse = mean_squared_error(y_train, y_pred)
print(f'Training error (MSE): {ann_train_mse}')

# Find test error
start_time = time.time()
y_pred = ann_model.predict(X_test_pca)
end_time = time.time()
ann_predict_time = end_time - start_time
ann_test_mse = mean_squared_error(y_test, y_pred)
print(f'Test error (MSE): {ann_test_mse}')
print(f'Time to predict test set: {round(ann_predict_time, 3)} s')


Epoch 1/50



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 932us/step - loss: 0.4290
Epoch 2/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 751us/step - loss: 0.1254
Epoch 3/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466us/step - loss: 0.0515
Epoch 4/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step - loss: 0.0299
Epoch 5/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 862us/step - loss: 0.0251
Epoch 6/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 801us/step - loss: 0.0183
Epoch 7/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 844us/step - loss: 0.0153
Epoch 8/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - loss: 0.0146
Epoch 9/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 666us/step - loss: 0.0139
Epoch 10/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 678us/step - loss: 0.0123


In [91]:
# Artificial neural network (No PCA)
from keras.models import Sequential
from keras.layers import Dense

# number of input and outputs respectively
n_features = X_train.shape[1]
n_targets = y_train.shape[1]

annnopca_model = Sequential()
annnopca_model.add(Dense(FEATURE_COUNT//8, input_dim=n_features, activation='relu'))
annnopca_model.add(Dense(n_targets, activation='linear')) # Output layer

# Compile
annnopca_model.compile(loss='mean_squared_error', optimizer='adam')
# Train
start_time = time.time()
annnopca_model.fit(X_train, y_train, epochs=50, batch_size=10)
end_time = time.time()
annnopca_train_time = end_time - start_time
print(f'Time to train: {annnopca_train_time} s')

# Find training error
y_pred = annnopca_model.predict(X_train)
annnopca_train_mse = mean_squared_error(y_train, y_pred)
print(f'Training error (MSE): {annnopca_train_mse}')

# Find test error
start_time = time.time()
y_pred = annnopca_model.predict(X_test)
end_time = time.time()
annnopca_predict_time = end_time - start_time
annnopca_test_mse = mean_squared_error(y_test, y_pred)
print(f'Test error (MSE): {annnopca_test_mse}')
print(f'Time to predict test set: {round(annnopca_predict_time, 3)} s')


Epoch 1/50



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1511  
Epoch 2/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 937us/step - loss: 0.0150
Epoch 3/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0096
Epoch 4/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0075
Epoch 5/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 966us/step - loss: 0.0063
Epoch 6/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0048
Epoch 7/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0045
Epoch 8/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0046
Epoch 9/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0042
Epoch 10/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0027
Epoch 11/50
[

In [92]:
import plotly.graph_objects as go

# Define the models and their corresponding training times and MSEs
models = ['LR', 'CNN', 'ANN', 'ANN_NoPCA']
train_times = [lr_train_time, cnn_train_time, ann_train_time, annnopca_train_time]
predict_times = [lr_predict_time, cnn_predict_time, ann_predict_time, annnopca_predict_time]
train_mses = [lr_train_mse, cnn_train_mse, ann_train_mse, annnopca_train_mse]
test_mses = [lr_test_mse, cnn_test_mse, ann_test_mse, annnopca_test_mse]

# Plot training times
fig = go.Figure(data=[go.Bar(x=models, y=train_times, text=['{:.2f}'.format(val) for val in train_times], textposition='auto')])
fig.update_layout(title_text='Training Times for Different Models (Lower is better)', xaxis_title="Models", yaxis_title="Training Time (s)")
fig.show()

# Calculate time to predict one datapoint for each model by dividing the total prediction time by the number of datapoints
predict_times = [time * 1000 / len(y_test) for time in predict_times]  # convert to ms by multiplying by 1000

# Plot prediction times
fig = go.Figure(data=[go.Bar(x=models, y=predict_times, text=['{:.2f}'.format(val) for val in predict_times], textposition='auto')])
fig.update_layout(title_text='Prediction Times for Different Models (Lower is better)', xaxis_title="Models", yaxis_title="Average Time to Predict One Frame (ms)")
fig.show()

# Plot training and test MSEs
fig = go.Figure(data=[
    go.Bar(name='Training MSE', x=models, y=train_mses),
    go.Bar(name='Test MSE', x=models, y=test_mses)
])
# Change the bar mode
fig.update_layout(barmode='group', title_text='Training and Test MSE for Different Models (Lower is better)', xaxis_title="Models", yaxis_title="Mean Squared Error")
fig.show()