# Machine Learning Models Evaluation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from utils.evaluation import print_metrics
from utils.cross_validation import avg_spearman_over_k_folds

Load train and test datasets.

In [2]:
X_train = pd.read_csv("../features/weebit_train_with_features.csv", index_col=0)
X_test = pd.read_csv("../features/weebit_test_with_features.csv", index_col=0)

# get Y
y_train = X_train["Level"]
y_test = X_test["Level"]

# remove Y and Text columns 
X_train.drop(columns=['Text', 'Level'], inplace=True)
X_test.drop(columns=['Text', 'Level'], inplace=True)

# whole set; used in cross-validation
X = pd.concat([X_train, X_test]).reset_index(drop=True)
y = pd.concat([y_train, y_test]).reset_index(drop=True)

## 1. Random Forest Classifier

In [3]:
from models.random_forest import RandomForest

In [4]:
rf = RandomForest(save_model=True)

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [5]:
print_metrics(y_test, y_pred, classification=True)

Spearman's correlation coef: 0.756070591196144
-----------
R^2 = 0.5195215307278667
-----------
Confusion matrix:
[[ 80  28   7   5   2]
 [ 19 101  24   8   6]
 [  0  20 119   4  17]
 [  6   9   6  82  25]
 [  3   8  14  27 108]]
-----------
Accuracy: 0.6730769230769231
F1(micro)0.6730769230769231
F1(macro)0.6730471319212145
-----------


Find mean Spearman correlation over k folds.

In [6]:
rf = RandomForest()

print("Mean Spearman correlation: " + str(avg_spearman_over_k_folds(X, y, rf, k=5)))

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Mean Spearman correlation: 0.7177805825145648


## 2. XGBoost Classifier

In [7]:
from models.xgboost import XGBoost

In [8]:
xgboost = XGBoost(save_model=True)

xgboost.fit(X_train, y_train)
y_pred = xgboost.predict(X_test)

In [9]:
print_metrics(y_test, y_pred, classification=True)

Spearman's correlation coef: 0.7890308371249726
-----------
R^2 = 0.5811758475813705
-----------
Confusion matrix:
[[ 80  31   5   4   2]
 [ 17 112  18   5   6]
 [  3  27 114   3  13]
 [  5   7   6  82  28]
 [  2   7  10  27 114]]
-----------
Accuracy: 0.6895604395604396
F1(micro)0.6895604395604396
F1(macro)0.6893224224903831
-----------


Find mean Spearman correlation over k folds.

In [10]:
xgboost = XGBoost()

print("Mean Spearman correlation: " + str(avg_spearman_over_k_folds(X, y, xgboost, k=5)))

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Mean Spearman correlation: 0.7508209121592035


## 3. Support Vector Classifier

In [11]:
from models.support_vector_machine import SupportVectorMachine

In [12]:
svm = SupportVectorMachine(save_model=True)

svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

In [13]:
print_metrics(y_test, y_pred, classification=True)

Spearman's correlation coef: 0.7779563580116718
-----------
R^2 = 0.552120364926271
-----------
Confusion matrix:
[[ 77  36   3   4   2]
 [ 23 103  21   8   3]
 [  3  27 101   7  22]
 [  7   8  10  76  27]
 [  2   3  15  28 112]]
-----------
Accuracy: 0.6442307692307693
F1(micro)0.6442307692307693
F1(macro)0.6434704376279996
-----------


In [14]:
svm = SupportVectorMachine()

print("Mean Spearman correlation: " + str(avg_spearman_over_k_folds(X, y, svm, k=5)))

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Mean Spearman correlation: 0.7408351660777365


## 4. Multilayer Perceptron

In [15]:
from models.multilayer_perceptron import MultilayerPerceptron

Using TensorFlow backend.


In [16]:
mlp = MultilayerPerceptron(input_dim=X_train.shape[1], save_model=True, verbose=0)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

In [17]:
print_metrics(y_test, y_pred, classification=True)

Spearman's correlation coef: 0.7808762832151066
-----------
R^2 = 0.552829035234932
-----------
Confusion matrix:
[[ 87  27   3   4   1]
 [ 30 104  17   4   3]
 [  2  32 101   4  21]
 [  6   8   9  82  23]
 [  4   8   9  32 107]]
-----------
Accuracy: 0.6607142857142857
F1(micro)0.6607142857142857
F1(macro)0.6622114858221293
-----------


In [19]:
mlp = MultilayerPerceptron(input_dim=X.shape[1])

print("Mean Spearman correlation: " + str(avg_spearman_over_k_folds(X, y, mlp, k=5)))

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Mean Spearman correlation: 0.7623367972063997
