# Test Set Evaluation of Top Performing Models

## Libraries

In [24]:
import numpy as np
import matplotlib.pyplot as plt   
%matplotlib inline
from music21 import *
from IPython.display import Audio
from intervaltree import Interval,IntervalTree
import pandas as pd
import seaborn as sns

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
import sklearn.model_selection as model_selection
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
import tensorflow as tf
from sklearn.metrics import f1_score

from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import warnings
warnings.simplefilter(action='ignore')

### Import csv features which were extracted from earlier Data PreProcessing notebook

In [25]:
X_train_w = pd.read_csv('../data/X_train_w.csv',index_col = 0)
X_train_m = pd.read_csv('../data/X_train_m.csv',index_col = 0)
X_train_c = pd.read_csv('../data/X_train_c.csv',index_col = 0)
X_test_w = pd.read_csv('../data/X_test_w.csv',index_col = 0)
X_test_m = pd.read_csv('../data/X_test_m.csv',index_col = 0)
X_test_c = pd.read_csv('../data/X_test_c.csv',index_col = 0)
y_train_w = pd.read_csv('../data/y_train_w.csv',index_col = 0)
y_train_m = pd.read_csv('../data/y_train_m.csv',index_col = 0)
y_train_c = pd.read_csv('../data/y_train_c.csv',index_col = 0)
y_test_w = pd.read_csv('../data/y_test_w.csv',index_col = 0)
y_test_m = pd.read_csv('../data/y_test_m.csv',index_col = 0)
y_test_c = pd.read_csv('../data/y_test_c.csv',index_col = 0)


## Cross Validation Set Accuracy Results
| Model               | Feature Set | Model Variant                                           | Highest CV Score | Mean CV Score |
|---------------------|-------------|---------------------------------------------------------|------------------|---------------|
| Logistic Regression | Spectral    | Baseline                                                | 94.23%           | 89.75%        |
| Logistic Regression | Spectral    | PCA                                                     | 71.15%           | 68.83%        |
| Logistic Regression | Midi        | Baseline                                                | 76.92%           | 71.10%        |
| Logistic Regression | Midi        | PCA                                                     | 48.08%           | 41.49%        |
| Logistic Regression | Combined    | Baseline                                                | 92.31%           | 85.56%        |
| Logistic Regression | Combined    | PCA                                                     | 48.08%           | 45.64%        |
| Naive Bayes         | Spectral    | Baseline                                                | 71.69%           | 68.41%        |
| Naive Bayes         | Midi        | Baseline                                                | 75.00%           | 59.00%        |
| Naive Bayes         | Combined    | Baseline                                                | 75.47%           | 73.00%        |
| Decision Tree       | Spectral    | Baseline                                                | 80.77%           | 75.68%        |
| Decision Tree       | Midi        | Baseline                                                | 86.79%           | 84.03%        |
| Decision Tree       | Combined    | Baseline                                                | 94.23%           | 84.83%        |
| Random Forest       | Spectral    | Baseline                                                | 86.54%           | 83.29%        |
| Random Forest       | Spectral    | w/SMOTE                                                 | 100%             | 99.56%        |
| Random Forest       | Spectral    | w/SMOTE +  Hyperparameter Tuning                        | ?                | ?             |
| Random Forest       | Midi        | Baseline                                                | 94.34%           | 89.75%        |
| Random Forest       | Midi        | w/SMOTE                                                 | 99.56%           | 99.34%        |
| Random Forest       | Midi        | w/SMOTE + Hyperparameter Tuning                         | ?                | ?             |
| Random Forest       | Combined    | Baseline                                                | 90.39%           | 86.72%        |
| Random Forest       | Combined    | w/SMOTE                                                 | 100%             | 99.29%        |
| Random Forest       | Combined    | w/SMOTE + Hyperparameter Tuning                         | ?                | ?             |
| SVM                 | Spectral    | Baseline                                                | 67.39%           | 64.97%        |
| SVM                 | Spectral    | w/Hyperparameter Tuning                                 | 95.65%           | 91.35%        |
| SVM                 | Spectral    | w/Hyperparamater Tuning + PCA                           | 94.38%           | 90.47%        |
| SVM                 | Spectral    | w/Hyperparameter Tuning + PCA + SMOTE                   | 100%             | 99.38%        |
| SVM                 | Spectral    | w/Hyperparameter Tuning +  PCA + SMOTE + MinMaxScaler   | 77.33%           | 72.98%        |
| SVM                 | Spectral    | w/Hyperparameter Tuning +  PCA + SMOTE + StandardScaler | 98.67%           | 97.24%        |
| SVM                 | Midi        | Baseline                                                | 73.91%           | 73.17%        |
| SVM                 | Midi        | w/Hyperparameter Tuning                                 | ?                | ?             |
| SVM                 | Midi        | w/SMOTE                                                 | ?                | ?             |
| SVM                 | Midi        | w/PCA                                                   | ?                | ?             |
| SVM                 | Combined    | Baseline                                                | 73.91%           | 73.17%        |
| SVM                 | Combined    | w/Hyperparameter Tuning                                 | ?                | ?             |
| SVM                 | Combined    | w/SMOTE                                                 | ?                | ?             |
| SVM                 | Combined    | w/PCA                                                   | ?                | ?             |
| Neural Network      | Spectral    | Baseline (ADAM, 5 Hidden Layers)                        | 100%             | 100%          |
| Neural Network      | Midi        | Baseline (ADAM, 5 Hidden Layers)                        | 84.85%           | 83.55%        |
| Neural Network      | Combined    | Baseline (ADAM, 5 Hidden Layers)                        | 86.15%           | 81.82%        |

## Top Performing Models

| Model               | Feature Set | Model Variant                                            | Max CV Score | Mean CV Score |
|---------------------|-------------|----------------------------------------------------------|--------------|---------------|
| Neural Network      | Spectral    | Baseline (ADAM, 5 Hidden Layers)                         | 100%         | 100%          |
| Random Forest       | Spectral    | w/SMOTE                                                  | 100%         | 99.56%        |
| SVM                 | Spectral    | w/Hyperparameter   Tuning + PCA + SMOTE                  | 100%         | 99.38%        |
| Random Forest       | Midi        | w/SMOTE                                                  | 99.56%       | 99.34%        |
| Random Forest       | Combined    | w/SMOTE                                                  | 100%         | 99.29%        |
| SVM                 | Spectral    | w/Hyperparameter   Tuning + PCA + SMOTE + StandardScaler | 98.67%       | 97.24%        |
| SVM                 | Spectral    | w/Hyperparameter   Tuning                                | 95.65%       | 91.35%        |
| SVM                 | Spectral    | w/Hyperparamater   Tuning + PCA                          | 94.38%       | 90.47%        |
| Logistic Regression | Spectral    | Baseline                                                 | 94.23%       | 89.75%        |
| Random Forest       | Midi        | Baseline                                                 | 94.34%       | 89.75%        |

### Neural Network - Spectral - Baseline

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(176, activation='relu'),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(90, activation='relu'),
    tf.keras.layers.Dense(80, activation='relu'),
    tf.keras.layers.Dense(70, activation='relu'),
    tf.keras.layers.Dense(60, activation='relu'),
    tf.keras.layers.Dense(21)
])
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
history = model.fit(X_train_w, y_train_w, epochs=200)
predictions = rf.predict(X_test_w)
f1 = f1_score(y_test_w, predictions)
print("F1 score = {:.5f}: {:.2%}".format(accuracy))

### Random Forest - Spectral - w/SMOTE

In [30]:
# todo: I think we need to re-import the data for this cell, if this is run multiple times then y_train_w gets overwritten
y_train_w = pd.read_csv('../data/y_train_w.csv',index_col = 0)

#Drop any ensemble types with counts below 6. This because Expected n_neighbors <= n_samples,  
#but n_samples = 3, n_neighbors = 6
counts = y_train_w['ensemble'].value_counts()
X_train_w = X_train_w[~y_train_w['ensemble'].isin(counts[counts < 6].index)]
y_train_w = y_train_w[~y_train_w['ensemble'].isin(counts[counts < 6].index)]

# Fit and apply the transform
X_SMOTE_w, y_SMOTE_w = SMOTE(random_state=101).fit_resample(X_train_w, y_train_w)

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=1)
rf.fit(X_SMOTE_w, y_SMOTE_w)
predictions = rf.predict(X_test_w)
f1 = f1_score(y_test_w, predictions)
print('Test Set F1 score = {:.5f}'.format(f1))

TypeError: 'numpy.float64' object is not callable

### SVM - Spectral - w/Hyperparameter Tuning + PCA + SMOTE

In [None]:
#Drop any ensemble types with counts below 6
counts = y_train_w['ensemble'].value_counts()
X_train_w_smote = X_train_w[~y_train_w['ensemble'].isin(counts[counts < 6].index)]
y_train_w_smote = y_train_w[~y_train_w['ensemble'].isin(counts[counts < 6].index)]

# fit and apply the transform
X_SMOTE, y_SMOTE = SMOTE().fit_resample(X_train_w_smote, y_train_w_smote.values.ravel())

#Perform PCA with 15 components
pca=PCA(n_components = 15)
pca.fit(X_SMOTE)
train_pca_w_smote = pca.transform(X_SMOTE)
X_test_w_pca = pca.transform(X_test_w)


from sklearn.model_selection import GridSearchCV
  
# defining parameter range
param_grid = {'C': np.logspace(-4, 4, 20), 
              'gamma': np.logspace(-3, 2, 6),
              'kernel': ['linear','rbf','poly']} 
  
grid = GridSearchCV(SVC(), param_grid , cv=5)
  
# fitting the model for grid search
grid.fit(train_pca_w_smote, y_SMOTE)

print(grid.best_params_)

#Create SVM model with best hyperparameters, PCA and SMOTE

svm = make_pipeline(SVC(kernel=grid.best_params_['kernel'], C = grid.best_params_['C'] , gamma=grid.best_params_['gamma']))
svm.fit(train_pca_w_smote, y_SMOTE)
predictions = svm.predict(X_test_w_pca)
f1_score = f1_score(y_test_w, predictions)
print("Test Set F1-score: {:.2%}".format(f1_score))


### Random Forest - Midi - w/SMOTE

In [27]:
# todo: I think we need to re-import the data for this cell, if this is run multiple times then y_train_w gets overwritten
y_train_m = pd.read_csv('../data/y_train_m.csv',index_col = 0)
y_test_m = pd.read_csv('../data/y_test_m.csv',index_col = 0)

#Drop any ensemble types with counts below 6. This because Expected n_neighbors <= n_samples,  
#but n_samples = 3, n_neighbors = 6
counts = y_train_m['ensemble'].value_counts()
X_train_m = X_train_m[~y_train_m['ensemble'].isin(counts[counts < 6].index)]
y_train_m = y_train_m[~y_train_m['ensemble'].isin(counts[counts < 6].index)]

# Fit and apply the transform
X_SMOTE_m, y_SMOTE_m = SMOTE(random_state=101).fit_resample(X_train_m, y_train_m)

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=1)
rf.fit(X_SMOTE_m, y_SMOTE_m)
predictions = rf.predict(X_test_m)
f1 = f1_score(y_test_m, predictions, average = 'weighted')

print("Test Set F1-score: {:.2%}".format(f1))


TypeError: 'numpy.float64' object is not callable

### Random Forest - Combined - w/SMOTE

In [None]:
# todo: I think we need to re-import the data for this cell, if this is run multiple times then y_train_w gets overwritten
y_train_c = pd.read_csv('../data/y_train_c.csv',index_col = 0)

#Drop any ensemble types with counts below 6. This because Expected n_neighbors <= n_samples,  
#but n_samples = 3, n_neighbors = 6
counts = y_train_c['ensemble'].value_counts()
X_train_c = X_train_c[~y_train_c['ensemble'].isin(counts[counts < 6].index)]
y_train_c = y_train_c[~y_train_c['ensemble'].isin(counts[counts < 6].index)]

X_SMOTE_c, y_SMOTE_c = SMOTE(random_state=101).fit_resample(X_train_c, y_train_c)

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=1)
rf.fit(X_SMOTE_c, y_SMOTE_c)
predictions = rf.predict(X_test_c)
f1_score = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1_score))

### SVM - Spectral - w/Hyperparameter Tuning + PCA + SMOTE + StandardScaler	

In [None]:
#### Standard Scaler Application

#Perform PCA with 15 components
pca=PCA(n_components = 15)
pca.fit(X_SMOTE)
train_pca_w_smote = pca.transform(X_SMOTE)
X_test_w_pca = pca.transform(X_test_w)


#Create SVM model with best hyperparameters, PCA and SMOTE and StandardScaler

svm = make_pipeline(StandardScaler() ,SVC(kernel=grid.best_params_['kernel'], C = grid.best_params_['C'] , gamma=grid.best_params_['gamma']))
svm.fit(train_pca_w_smote, y_SMOTE)


predictions = svm.predict(X_test_w_pca)
f1_score = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1_score))


### SVM - Spectral - w/Hyperparameter Tuning

In [None]:
#Find best hyperparameters
param_grid = {'C': np.logspace(-4, 4, 20), 
              'gamma': np.logspace(-3, 2, 6),                     
              'kernel': ['linear','rbf','poly']} 
  
grid = GridSearchCV(SVC(), param_grid, cv=5)
  
# fitting the model for grid search
grid.fit(X_train_w, y_train_w.values.ravel())

print(grid.best_params_)


#Create Model with best hyperparameters

svm = make_pipeline(SVC(kernel=grid.best_params_['kernel'], C = grid.best_params_['C'] , gamma=grid.best_params_['gamma']))
svm.fit(X_train_w, y_train_w.values.ravel())
predictions = svm.predict(X_test_w)
f1_score = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1_score))

### SVM - Spectral - W/Hyperparameter Tuning + PCA

In [None]:
#Perform PCA with 15 components
pca=PCA(n_components = 15)
pca.fit(X_train_w)
train_pca_w = pca.transform(X_train_w)
X_test_w_pca = pca.transform(X_test_w)

from sklearn.model_selection import GridSearchCV
  
# defining parameter range
param_grid = {'C': np.logspace(-4, 4, 20), 
              'gamma': np.logspace(-3, 2, 6),
              'kernel': ['linear','rbf','poly']} 
  
grid = GridSearchCV(SVC(), param_grid , cv=5)
  
# fitting the model for grid search
grid.fit(train_pca_w, y_train_w.values.ravel())

print(grid.best_params_)

#Create Model with best hyperparameters and PCA
svm = make_pipeline(SVC(kernel=grid.best_params_['kernel'], C = grid.best_params_['C'] , gamma=grid.best_params_['gamma']))
svm.fit(train_pca_w, y_train_w.values.ravel())
predictions = svm.predict(X_test_w_pca)
f1_score = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1_score))


### Logistic Regression - Spectral - Baseline

In [None]:
#Create Baseline Logistic Model
lr = LogisticRegression(C = 0.5, solver='liblinear', multi_class='auto')
lr.fit(np.array(X_train_w), np.array(y_train_w))
predictions = lr.predict(X_test_w)
f1 = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1))


### Random Forest - Midi - Baseline

In [29]:
rf = RandomForestClassifier()
rf.fit(X_train_m, y_train_m)
predictions = rf.predict(X_test_m)
f1 = f1_score(y_test_w, predictions,average='weighted')
print("Test Set F1-score: {:.2%}".format(f1))

TypeError: 'numpy.float64' object is not callable

## Final Test Results

| Model               | Feature Set | Model Variant                                            | Mean CV Score | Test Score |
|---------------------|-------------|----------------------------------------------------------|---------------|------------|
| Neural Network      | Spectral    | Baseline (ADAM, 5 Hidden Layers)                         | 100%          | ?          |
| Random Forest       | Spectral    | w/SMOTE                                                  | 99.56%        | 75.76%     |
| SVM                 | Spectral    | w/Hyperparameter   Tuning + PCA + SMOTE                  | 99.38%        | 78.79%     |
| Random Forest       | Midi        | w/SMOTE                                                  | 99.34%        | 83.33%     |
| Random Forest       | Combined    | w/SMOTE                                                  | 99.29%        | 80.30%     |
| SVM                 | Spectral    | w/Hyperparameter   Tuning + PCA + SMOTE + StandardScaler | 97.24%        | 74.24%     |
| SVM                 | Spectral    | w/Hyperparameter   Tuning                                | 91.35%        | 77.27%     |
| SVM                 | Spectral    | w/Hyperparamater   Tuning + PCA                          | 90.47%        | 74.24%     |
| Logistic Regression | Spectral    | Baseline                                                 | 89.75%        | 78.79%     |
| Random Forest       | Midi        | Baseline                                                 | 89.75%        | 83.33%     |