# General aim of the notebook

This notebook implements the different machine learning methods tested for the classification of the emotions. The parameters of the models have already been optimized, their optimization process can be found in other notebooks.

The notebook trains and tests the models.

# How to use this notebook

1. Change the datapath and load the data


2. Run the preprocessing


3. Train one of the models, skip the others


4. Run the two cells of the section Analysis of Prediction, they will return a table with precision per emotion

In [8]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

## Load the data

In [9]:
#--------data paths------------------------------------------------------------------------------------------
data_train_path = 'C:/Users/manon/Desktop/Projet_2 _ML/code/datas/Split_48_neutre/Combined_All_Train_80.csv'
data_test_path = 'C:/Users/manon/Desktop/Projet_2 _ML/code/datas/Split_48_neutre/Combined_All_Test_80.csv'

#--------loading the data and spliting between features and predictions--------------------------------------
tx_train = pd.read_csv(data_train_path,sep=",",squeeze=True)
X_train=tx_train.iloc[:, :48]
ytr = pd.read_csv(data_train_path,sep=",",usecols=[48],squeeze=True)

tx_test = pd.read_csv(data_test_path,sep=",",squeeze=True)
X_test=tx_test.iloc[:, :48]
yte = pd.read_csv(data_test_path,sep=",",usecols=[48],squeeze=True)

#---------Adding features name (i.e. number of the brain region represented by the feature)------------------
def add_column_names(data):
    
    liste=[]
    for i in range(48):
        liste.append(str(i+1))
    data.columns=liste
    return data,liste

X_train,liste = add_column_names(X_train)
X_test,liste = add_column_names(X_test)

#---------Convert y to int to be compatible with future prediction--------------------------------------------
ytr = ytr.astype(np.int64)
yte = yte.astype(np.int64)

# Preprocessing
###  1. Data standardization

In [10]:
def standardize(x):
    """Standardize the original data set."""
    mean_x = np.mean(x)
    x = x - mean_x
    std_x = np.std(x)
    x = x / std_x
    return x

In [11]:
Xtrain=standardize(X_train)
X_test=standardize(X_test)

# Training
### 1. Random Forest

In [13]:
classifier = RandomForestClassifier(n_estimators = 500, criterion = 'gini', max_depth=9,random_state = 42)
classifier.fit(X_train, ytr)
Y_pred = classifier.predict(X_test)
print("Accuracy:",metrics.accuracy_score(yte, Y_pred))

Accuracy: 0.330603889457523


### 2. Decision Tree Classifier

In [7]:
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 4, criterion='gini').fit(X_train, ytr)
Y_pred = dtree_model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(yte, Y_pred))

Accuracy: 0.283179802115319


### 3. SVM

In [14]:
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 0.025).fit(X_train, ytr) #c=0.02 for linear,0.33 for poly, 0.01 for sigmoid
Y_pred = svm_model_linear.predict(X_test)
accuracy = svm_model_linear.score(X_test, yte)
print("Accuracy:",accuracy)

Accuracy: 0.2811327192084613


### 4. KNN

In [8]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 873, weights='distance').fit(X_train, ytr)
Y_pred = knn.predict(X_test)
accuracy = knn.score(X_test, yte)
print("Accuracy:",accuracy)

Accuracy: 0.2739679290344592


### 5. Convolutional NN

In [27]:
X_train1=tf.expand_dims(X_train, axis=-1)
X_test1=tf.expand_dims(X_test, axis=-1)
y_train1 = tf.keras.utils.to_categorical(ytr,num_classes=14)
y_test1 = tf.keras.utils.to_categorical(yte,num_classes=14)

n_samples, n_features = X_train1.shape[0], X_train1.shape[1]
n_outputs=14

In [31]:
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense

model = tf.keras.Sequential()
model.add(Conv1D(filters=12, kernel_size=5, activation='tanh',kernel_initializer='he_uniform',input_shape=(n_features, 1)))
model.add(Conv1D(filters=64, kernel_size=5, activation='tanh',kernel_initializer='he_uniform'))
model.add(Dropout(0.4))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(60, input_dim=199, activation='tanh'))
model.add(Dense(14, activation='softmax'))
    
    # Compile model
optimizer = tf.keras.optimizers.Nadam(lr=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [32]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy, model):
    epochs, batch_size = 10, 1
    n_samples, n_features = X_train.shape[0], X_train.shape[1]
    n_outputs=14
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size)
    return accuracy

In [33]:
evaluate_model(X_train1, y_train1, X_test1, y_test1, model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.2787444591522217

In [34]:
Y_pred = model.predict(X_test)
Y_pred = np.argmax(Y_pred,axis=1)

### 6. Fully Connected NN

In [58]:
X_train2=tf.expand_dims(X_train, axis=-1)
X_test2=tf.expand_dims(X_test, axis=-1)
y_train2 = tf.keras.utils.to_categorical(ytr,num_classes=14)
y_test2 = tf.keras.utils.to_categorical(yte,num_classes=14)

n_samples, n_features = X_train2.shape[0], X_train2.shape[1]
n_outputs=14

In [59]:
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense

model = tf.keras.Sequential()
model.add(Dense(6, input_dim=48, activation='relu',kernel_initializer='zero'))
model.add(Dense(14, activation='softmax'))
    
# Compile model
optimizer = tf.keras.optimizers.Adagrad(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

  super(Adagrad, self).__init__(name, **kwargs)


In [60]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy, model):
    epochs, batch_size = 10, 100
    n_samples, n_features = X_train.shape[0], X_train.shape[1]
    n_outputs=14
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size)
    return accuracy

In [61]:
evaluate_model(X_train2, y_train2, X_test2, y_test2, model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.26475605368614197

In [62]:
Y_pred = model.predict(X_test2)
Y_pred = np.argmax(Y_pred,axis=1)

### 7. Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,ytr)
#y_pred_test = gnb.predict(X_test)
accuracy = gnb.score(X_test, yte)
print("Accuracy:",accuracy)

### 8. Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'saga',multi_class='auto')
logreg.fit(X_train,ytr)
accuracy = logreg.score(X_test, yte)
print("Accuracy:",accuracy)

# Analysis of predictions

In [35]:
Yte = np.expand_dims(yte, axis=1)
Yte=pd.DataFrame(Yte,columns = ['Emotions'])
Y_pred=pd.DataFrame(Y_pred,columns = ['Emotions'])

dict = pd.DataFrame({0:'Anger',1:'Sad',2:'Guilt',3:'Shame',4:'Disgust',5:'Anxiety',6:'Fear',7:'Surprise',8:'Contempt',9:'Satisfaction',
            10:'WarmHeart.',11:'Happiness',12:'Love',13:'Neutral'}, index=[0])

Yte=Yte.replace({"Emotions": dict})
Y_pred=Y_pred.replace({"Emotions": dict})


In [36]:
from sklearn import metrics
print(metrics.classification_report(Yte, Y_pred))

              precision    recall  f1-score   support

       Anger       0.30      0.73      0.42        60
     Anxiety       0.21      0.35      0.26       148
    Contempt       0.29      0.28      0.29       240
     Disgust       0.34      0.27      0.30       192
        Fear       0.20      0.15      0.17       156
   Happiness       0.39      0.34      0.36       776
        Love       0.18      0.13      0.15       104
     Neutral       0.23      0.24      0.23       208
         Sad       0.43      0.56      0.49       168
Satisfaction       0.19      0.13      0.15       315
       Shame       0.19      0.25      0.22       304
    Surprise       0.29      0.31      0.30        72
  WarmHeart.       0.11      0.10      0.10       188

    accuracy                           0.28      2931
   macro avg       0.26      0.30      0.27      2931
weighted avg       0.28      0.28      0.27      2931

