###  Dataset
---
We used this [face dataset](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data) which consists of 48x48 grayscale images of faces with labels in one of 7 categories: 0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral.

An example of a face in this dataset:
<img src = Images/ExampleFace.jpg alt = "Example Face" width =200>


In [40]:
#imports
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA, NMF, KernelPCA
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from PIL import Image
import matplotlib.pyplot as plt
import pickle

In [2]:
# fix random seed for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)

In [75]:
# call this function to load all models and data from disk
def load_everything(datapath):
    x_train_sc = pickle.load(open(datapath + "x_train_sc.pkl", 'rb'))
    x_test_sc = pickle.load(open(datapath + "x_test_sc.pkl", 'rb'))
    y_test = pickle.load(open(datapath + "y_test.pkl", 'rb'))
    pca = pickle.load(open(datapath + "pca.pkl", 'rb'))
    x_train_pca = pickle.load(open(datapath + "x_train_pca.pkl", 'rb'))
    x_test_pca = pickle.load(open(datapath + "x_test_pca.pkl", 'rb'))
    nmf = pickle.load(open(datapath + "nmf.pkl", 'rb'))
    x_train_nmf = pickle.load(open(datapath + "x_train_nmf.pkl", 'rb'))
    x_test_nmf = pickle.load(open(datapath + "x_test_nmf.pkl", 'rb'))
    kpca = pickle.load(open(datapath + "kpca.pkl", 'rb'))
    x_train_kpca = pickle.load(open(datapath + "x_train_kpca.pkl", 'rb'))
    x_test_kpca = pickle.load(open(datapath + "x_test_kpca.pkl", 'rb'))
    # load models
    lrc_pca = pickle.load(open(datapath + "lrc_pca.pkl", 'rb'))
    lrc_nmf = pickle.load(open(datapath + "lrc_nmf.pkl", 'rb'))
    lrc_kpca = pickle.load(open(datapath + "lrc_kpca.pkl", 'rb'))
    svc_pca = pickle.load(open(datapath + "svc_pca.pkl", 'rb'))
    svc_nmf = pickle.load(open(datapath + "svc_nmf.pkl", 'rb'))
    svc_kpca = pickle.load(open(datapath + "svc_kpca.pkl", 'rb'))
    mlp_pca = pickle.load(open(datapath + "mlp_pca.pkl", 'rb'))
    mlp_nmf = pickle.load(open(datapath + "mlp_nmf.pkl", 'rb'))
    mlp_kpca = pickle.load(open(datapath + "mlp_kpca.pkl", 'rb'))
    rfc_pca = pickle.load(open(datapath + "rfc_pca.pkl", 'rb'))
    rfc_nmf = pickle.load(open(datapath + "rfc_nmf.pkl", 'rb'))
    rfc_kpca = pickle.load(open(datapath + "rfc_kpca.pkl", 'rb'))
    knc_pca = pickle.load(open(datapath + "knc_pca.pkl", 'rb'))
    knc_nmf = pickle.load(open(datapath + "knc_nmf.pkl", 'rb'))
    knc_kpca = pickle.load(open(datapath + "knc_kpca.pkl", 'rb'))
    
    return x_train_sc, x_test_sc, y_test, pca, x_train_pca, x_test_pca, nmf, x_train_nmf, x_test_nmf, \
            kpca, x_train_kpca, x_test_kpca, lrc_pca, lrc_nmf, lrc_kpca, svc_pca, svc_nmf, svc_kpca, \
            mlp_pca, mlp_nmf, mlp_kpca, rfc_pca, rfc_nmf, rfc_kpca, knc_pca, knc_nmf, knc_kpca

In [76]:
# uncomment this code to load models and data from disk, make sure to pass correct directory path to load_everything function
# x_train_sc, x_test_sc, y_test, pca, x_train_pca, x_test_pca, nmf, x_train_nmf, x_test_nmf, \
# kpca, x_train_kpca, x_test_kpca, lrc_pca, lrc_nmf, lrc_kpca, svc_pca, svc_nmf, svc_kpca, \
# mlp_pca, mlp_nmf, mlp_kpca, rfc_pca, rfc_nmf, rfc_kpca, knc_pca, knc_nmf, knc_kpca = load_everything("../data/processed/")

In [3]:
# reading and cleaning data
total = pd.read_csv('../data/raw/icml_face_data.csv')
total = total.drop(columns = [' Usage'])

total_y = total['emotion']
total_x = total[' pixels'].str.split(' ',expand=True).astype(dtype = 'uint8')

x_train, x_test, y_train, y_test  = train_test_split(total_x, total_y, test_size=0.25, random_state=42)

print("Training Components: "+ str(y_train.size))
print("Test Components: "+ str(y_test.size))

Training Components: 26915
Test Components: 8972


In [4]:
# fitting data
sc = MinMaxScaler()

x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)


###  PCA

In [5]:
# applying pca

pca = PCA(n_components=255).fit(x_train_sc) #n_components = (255 explains 95%) (881 explains 99%)
x_train_pca = pca.transform(x_train_sc)
x_test_pca = pca.transform(x_test_sc)

explained_variance = pca.explained_variance_ratio_

### NMF

In [6]:
# apply nmf
nmf = NMF(n_components=255, init='nndsvd', random_state=seed)
x_train_nmf = nmf.fit_transform(x_train_sc)
x_test_nmf = nmf.transform(x_test_sc)
H = nmf.components_



### Kernel PCA

In [7]:
kpca = KernelPCA(n_components=7, kernel='rbf', random_state=seed)
x_train_kpca = kpca.fit_transform(x_train_sc)
x_test_kpca = kpca.transform(x_test_sc)

### Logistic Regression Results

In [41]:
# logistic regression on pca representation
lrc_pca = LogisticRegression(C=1.0, penalty = 'l2', solver = 'saga', tol=0.0001, max_iter=800, random_state=seed)
lrc_pca.fit(x_train_pca, y_train)
predictions_lrc_pca = lrc_pca.predict(x_test_pca)

print(confusion_matrix(y_test, predictions_lrc_pca))
print(classification_report(y_test, predictions_lrc_pca, zero_division=0))

[[ 193    4  105  411  247   80  195]
 [  16    6   27   37   18    9   17]
 [ 116    3  187  401  221  175  201]
 [ 106    1   93 1543  240  102  163]
 [ 149    2  109  448  428   80  283]
 [  38    2  112  212   74  446  106]
 [  89    2   86  482  271  109  527]]
              precision    recall  f1-score   support

           0       0.27      0.16      0.20      1235
           1       0.30      0.05      0.08       130
           2       0.26      0.14      0.18      1304
           3       0.44      0.69      0.53      2248
           4       0.29      0.29      0.29      1499
           5       0.45      0.45      0.45       990
           6       0.35      0.34      0.34      1566

    accuracy                           0.37      8972
   macro avg       0.34      0.30      0.30      8972
weighted avg       0.35      0.37      0.35      8972



In [44]:
# logistic regression on nmf representation
lrc_nmf = LogisticRegression(C=1.0, penalty = 'l2', solver = 'saga', tol=0.0001, max_iter=800, random_state=seed)
lrc_nmf.fit(x_train_nmf, y_train)
predictions_lrc_nmf = lrc_nmf.predict(x_test_nmf)

print(confusion_matrix(y_test, predictions_lrc_nmf))
print(classification_report(y_test, predictions_lrc_nmf, zero_division=0))

[[ 123    0   64  507  269   91  181]
 [   9    0   15   56   23   11   16]
 [  75    0  119  492  255  171  192]
 [  51    0   71 1652  252   76  146]
 [  84    0   67  523  482   71  272]
 [  17    0   86  254  101  420  112]
 [  54    0   63  545  290   86  528]]
              precision    recall  f1-score   support

           0       0.30      0.10      0.15      1235
           1       0.00      0.00      0.00       130
           2       0.25      0.09      0.13      1304
           3       0.41      0.73      0.53      2248
           4       0.29      0.32      0.30      1499
           5       0.45      0.42      0.44       990
           6       0.36      0.34      0.35      1566

    accuracy                           0.37      8972
   macro avg       0.29      0.29      0.27      8972
weighted avg       0.34      0.37      0.33      8972



In [46]:
# logistic regression on kernel pca representation
lrc_kpca = LogisticRegression(C=1.0, penalty = 'l2', solver = 'saga', tol=0.0001, max_iter=800, random_state=seed)
lrc_kpca.fit(x_train_kpca, y_train)
predictions_lrc_kpca = lrc_kpca.predict(x_test_kpca)

print(confusion_matrix(y_test, predictions_lrc_kpca))
print(classification_report(y_test, predictions_lrc_kpca, zero_division=0))

[[   4    0   11  932  182   65   41]
 [   2    0    4   93   12   13    6]
 [   7    0   15  966  155   97   64]
 [   5    0   24 1787  232  103   97]
 [   6    0    9 1097  273   44   70]
 [   1    0   14  744   40  152   39]
 [   1    0    7 1134  235   80  109]]
              precision    recall  f1-score   support

           0       0.15      0.00      0.01      1235
           1       0.00      0.00      0.00       130
           2       0.18      0.01      0.02      1304
           3       0.26      0.79      0.40      2248
           4       0.24      0.18      0.21      1499
           5       0.27      0.15      0.20       990
           6       0.26      0.07      0.11      1566

    accuracy                           0.26      8972
   macro avg       0.20      0.17      0.13      8972
weighted avg       0.23      0.26      0.18      8972



### Support Vector Machine Classifier Results

In [47]:
# support vector machine on pca representation
svc_pca = LinearSVC(random_state=seed)
svc_pca.fit(x_train_pca, y_train)
predictions_svc_pca = svc_pca.predict(x_test_pca)

print(confusion_matrix(y_test, predictions_svc_pca))
print(classification_report(y_test, predictions_svc_pca, zero_division=0))

[[ 132    0   56  533  184  134  196]
 [  12    0   19   54   11   15   19]
 [  79    0  109  492  185  226  213]
 [  64    0   46 1696  183  116  143]
 [  95    0   56  577  342  122  307]
 [  18    0   54  247   72  501   98]
 [  61    0   41  593  210  155  506]]
              precision    recall  f1-score   support

           0       0.29      0.11      0.16      1235
           1       0.00      0.00      0.00       130
           2       0.29      0.08      0.13      1304
           3       0.40      0.75      0.53      2248
           4       0.29      0.23      0.25      1499
           5       0.39      0.51      0.44       990
           6       0.34      0.32      0.33      1566

    accuracy                           0.37      8972
   macro avg       0.29      0.29      0.26      8972
weighted avg       0.33      0.37      0.32      8972





In [48]:
# support vector machine on nmf representation
svc_nmf = LinearSVC(random_state=seed)
svc_nmf.fit(x_train_nmf, y_train)
predictions_svc_nmf = svc_nmf.predict(x_test_nmf)

print(confusion_matrix(y_test, predictions_svc_nmf))
print(classification_report(y_test, predictions_svc_nmf, zero_division=0))

[[ 110    0   36  547  229  133  180]
 [   7    0   14   58   19   14   18]
 [  66    0   97  502  238  204  197]
 [  51    0   41 1717  215   96  128]
 [  80    0   47  562  443  111  256]
 [  14    0   52  265   87  479   93]
 [  55    0   34  605  261  121  490]]
              precision    recall  f1-score   support

           0       0.29      0.09      0.14      1235
           1       0.00      0.00      0.00       130
           2       0.30      0.07      0.12      1304
           3       0.40      0.76      0.53      2248
           4       0.30      0.30      0.30      1499
           5       0.41      0.48      0.45       990
           6       0.36      0.31      0.33      1566

    accuracy                           0.37      8972
   macro avg       0.29      0.29      0.27      8972
weighted avg       0.34      0.37      0.33      8972



In [49]:
# support vector machine on kernel pca representation
svc_kpca = LinearSVC(random_state=seed)
svc_kpca.fit(x_train_kpca, y_train)
predictions_svc_kpca = svc_kpca.predict(x_test_kpca)

print(confusion_matrix(y_test, predictions_svc_kpca))
print(classification_report(y_test, predictions_svc_kpca, zero_division=0))

[[   1    0   12  965  180   39   38]
 [   0    0    3   94   15   11    7]
 [   3    0   12 1035  159   34   61]
 [   3    0   24 1861  233   51   76]
 [   3    0    9 1131  278   12   66]
 [   0    0   12  810   45   88   35]
 [   0    0    7 1198  236   25  100]]
              precision    recall  f1-score   support

           0       0.10      0.00      0.00      1235
           1       0.00      0.00      0.00       130
           2       0.15      0.01      0.02      1304
           3       0.26      0.83      0.40      2248
           4       0.24      0.19      0.21      1499
           5       0.34      0.09      0.14       990
           6       0.26      0.06      0.10      1566

    accuracy                           0.26      8972
   macro avg       0.19      0.17      0.12      8972
weighted avg       0.23      0.26      0.17      8972



### MLP Results

In [50]:
# multilayer perceptron on pca representation
mlp_pca = MLPClassifier(random_state=seed, hidden_layer_sizes=(500), max_iter=300)
mlp_pca.fit(x_train_pca, y_train)
predictions_mlp_pca = mlp_pca.predict(x_test_pca)

print(confusion_matrix(y_test, predictions_mlp_pca))
print(classification_report(y_test, predictions_mlp_pca, zero_division=0))

[[ 377    9  166  212  216   78  177]
 [  19   54   12   16   16   10    3]
 [ 145   15  456  203  222  126  137]
 [ 177    5  159 1346  215   97  249]
 [ 196   12  201  248  518   83  241]
 [  69    2  107   91   59  589   73]
 [ 179    9  174  270  278   98  558]]
              precision    recall  f1-score   support

           0       0.32      0.31      0.31      1235
           1       0.51      0.42      0.46       130
           2       0.36      0.35      0.35      1304
           3       0.56      0.60      0.58      2248
           4       0.34      0.35      0.34      1499
           5       0.54      0.59      0.57       990
           6       0.39      0.36      0.37      1566

    accuracy                           0.43      8972
   macro avg       0.43      0.42      0.43      8972
weighted avg       0.43      0.43      0.43      8972



In [51]:
# multilayer perceptron on nmf representation
mlp_nmf = MLPClassifier(random_state=seed, hidden_layer_sizes=(500), max_iter=300)
mlp_nmf.fit(x_train_nmf, y_train)
predictions_mlp_nmf = mlp_nmf.predict(x_test_nmf)

print(confusion_matrix(y_test, predictions_mlp_nmf))
print(classification_report(y_test, predictions_mlp_nmf, zero_division=0))



[[ 375    9  175  212  173   56  235]
 [  24   43   14   16    8    9   16]
 [ 179   11  393  227  190  108  196]
 [ 189    9  139 1331  179   72  329]
 [ 231   13  186  269  429   66  305]
 [  62   10  111  115   57  526  109]
 [ 171   11  151  323  201   84  625]]
              precision    recall  f1-score   support

           0       0.30      0.30      0.30      1235
           1       0.41      0.33      0.36       130
           2       0.34      0.30      0.32      1304
           3       0.53      0.59      0.56      2248
           4       0.35      0.29      0.31      1499
           5       0.57      0.53      0.55       990
           6       0.34      0.40      0.37      1566

    accuracy                           0.41      8972
   macro avg       0.41      0.39      0.40      8972
weighted avg       0.41      0.41      0.41      8972



In [52]:
# multilayer perceptron on kernel pca representation
mlp_kpca = MLPClassifier(random_state=seed, hidden_layer_sizes=(500), max_iter=300)
mlp_kpca.fit(x_train_kpca, y_train)
predictions_mlp_kpca = mlp_kpca.predict(x_test_kpca)

print(confusion_matrix(y_test, predictions_mlp_kpca))
print(classification_report(y_test, predictions_mlp_kpca, zero_division=0))

[[ 115    0  100  405  307   96  212]
 [   8    0    7   59   18   19   19]
 [  74    0  171  450  297  136  176]
 [  92    0  142 1093  403  146  372]
 [  95    0   86  488  485   53  292]
 [  42    0  101  364  101  240  142]
 [  79    0   88  586  274  105  434]]
              precision    recall  f1-score   support

           0       0.23      0.09      0.13      1235
           1       0.00      0.00      0.00       130
           2       0.25      0.13      0.17      1304
           3       0.32      0.49      0.38      2248
           4       0.26      0.32      0.29      1499
           5       0.30      0.24      0.27       990
           6       0.26      0.28      0.27      1566

    accuracy                           0.28      8972
   macro avg       0.23      0.22      0.22      8972
weighted avg       0.27      0.28      0.26      8972





### Random Forest Classifier

In [53]:
# random forest classifier on pca representation
rfc_pca = RandomForestClassifier(random_state=seed)
rfc_pca.fit(x_train_pca, y_train)
predictions_rfc_pca = rfc_pca.predict(x_test_pca)

print(confusion_matrix(y_test, predictions_rfc_pca))
print(classification_report(y_test, predictions_rfc_pca, zero_division=0))

[[ 200    0   50  663  170   21  131]
 [   6   33    7   54   14    2   14]
 [  50    0  290  642  167   41  114]
 [  33    0   56 1855  156   20  128]
 [  57    0   70  820  360    8  184]
 [  26    0   52  345   61  424   82]
 [  44    0   61  862  187   15  397]]
              precision    recall  f1-score   support

           0       0.48      0.16      0.24      1235
           1       1.00      0.25      0.40       130
           2       0.49      0.22      0.31      1304
           3       0.35      0.83      0.50      2248
           4       0.32      0.24      0.28      1499
           5       0.80      0.43      0.56       990
           6       0.38      0.25      0.30      1566

    accuracy                           0.40      8972
   macro avg       0.55      0.34      0.37      8972
weighted avg       0.45      0.40      0.37      8972



In [54]:
# random forest classifier on nmf representation
rfc_nmf = RandomForestClassifier(random_state=seed)
rfc_nmf.fit(x_train_nmf, y_train)
predictions_rfc_nmf = rfc_nmf.predict(x_test_nmf)

print(confusion_matrix(y_test, predictions_rfc_nmf))
print(classification_report(y_test, predictions_rfc_nmf, zero_division=0))

[[ 186    0   63  601  192   30  163]
 [   4   26   10   58   18    2   12]
 [  75    0  268  559  199   67  136]
 [  42    0   66 1788  175   32  145]
 [  57    0   71  711  440   17  203]
 [  32    0   58  326   56  451   67]
 [  57    0   64  784  207   23  431]]
              precision    recall  f1-score   support

           0       0.41      0.15      0.22      1235
           1       1.00      0.20      0.33       130
           2       0.45      0.21      0.28      1304
           3       0.37      0.80      0.51      2248
           4       0.34      0.29      0.32      1499
           5       0.73      0.46      0.56       990
           6       0.37      0.28      0.32      1566

    accuracy                           0.40      8972
   macro avg       0.52      0.34      0.36      8972
weighted avg       0.43      0.40      0.37      8972



In [55]:
# random forest classifier on kernel pca representation
rfc_kpca = RandomForestClassifier(random_state=seed)
rfc_kpca.fit(x_train_kpca, y_train)
predictions_rfc_kpca = rfc_kpca.predict(x_test_kpca)

print(confusion_matrix(y_test, predictions_rfc_kpca))
print(classification_report(y_test, predictions_rfc_kpca, zero_division=0))

[[ 302    0   81  475  178   52  147]
 [   9   44   10   37   14    8    8]
 [  81    1  374  465  181   81  121]
 [ 131    0  133 1368  251   94  271]
 [ 121    0   93  605  430   35  215]
 [  37    0   79  298   61  448   67]
 [ 110    0   92  664  217   65  418]]
              precision    recall  f1-score   support

           0       0.38      0.24      0.30      1235
           1       0.98      0.34      0.50       130
           2       0.43      0.29      0.35      1304
           3       0.35      0.61      0.44      2248
           4       0.32      0.29      0.30      1499
           5       0.57      0.45      0.51       990
           6       0.34      0.27      0.30      1566

    accuracy                           0.38      8972
   macro avg       0.48      0.35      0.39      8972
weighted avg       0.39      0.38      0.37      8972



### K nearest neighbors Classifier

In [56]:
# K nearest neighbors classifier on pca representation
knc_pca = KNeighborsClassifier()
knc_pca.fit(x_train_pca, y_train)
predictions_knc_pca = knc_pca.predict(x_test_pca)

print(confusion_matrix(y_test, predictions_knc_pca))
print(classification_report(y_test, predictions_knc_pca, zero_division=0))

[[ 370   19  134  288  167   65  192]
 [  17   48    9   30   10    5   11]
 [ 167   26  378  281  169  100  183]
 [ 233   42  222 1065  201  114  371]
 [ 207   13  178  401  378   52  270]
 [ 105   19  118  183   79  368  118]
 [ 189   28  138  422  198   90  501]]
              precision    recall  f1-score   support

           0       0.29      0.30      0.29      1235
           1       0.25      0.37      0.30       130
           2       0.32      0.29      0.30      1304
           3       0.40      0.47      0.43      2248
           4       0.31      0.25      0.28      1499
           5       0.46      0.37      0.41       990
           6       0.30      0.32      0.31      1566

    accuracy                           0.35      8972
   macro avg       0.33      0.34      0.33      8972
weighted avg       0.35      0.35      0.34      8972



In [57]:
# K nearest neighbors classifier on nmf representation
knc_nmf = KNeighborsClassifier()
knc_nmf.fit(x_train_nmf, y_train)
predictions_knc_nmf = knc_nmf.predict(x_test_nmf)

print(confusion_matrix(y_test, predictions_knc_nmf))
print(classification_report(y_test, predictions_knc_nmf, zero_division=0))

[[ 402   15  124  315  190   51  138]
 [  21   34    8   32   18    6   11]
 [ 243   19  294  336  207   70  135]
 [ 349   37  202 1028  290   83  259]
 [ 289   17  173  411  363   54  192]
 [ 143   11  132  240   99  259  106]
 [ 269   18  152  457  210   66  394]]
              precision    recall  f1-score   support

           0       0.23      0.33      0.27      1235
           1       0.23      0.26      0.24       130
           2       0.27      0.23      0.25      1304
           3       0.36      0.46      0.41      2248
           4       0.26      0.24      0.25      1499
           5       0.44      0.26      0.33       990
           6       0.32      0.25      0.28      1566

    accuracy                           0.31      8972
   macro avg       0.30      0.29      0.29      8972
weighted avg       0.31      0.31      0.31      8972



In [58]:
# K nearest neighbors classifier on kernel pca representation
knc_kpca = KNeighborsClassifier()
knc_kpca.fit(x_train_kpca, y_train)
predictions_knc_kpca = knc_kpca.predict(x_test_kpca)

print(confusion_matrix(y_test, predictions_knc_kpca))
print(classification_report(y_test, predictions_knc_kpca, zero_division=0))

[[381  10 159 323 153  88 121]
 [ 16  40  13  29  15  10   7]
 [235  19 361 315 158 109 107]
 [389  25 307 851 253 177 246]
 [279   9 201 436 334  83 157]
 [131  22 137 222  88 318  72]
 [265  16 192 480 211 129 273]]
              precision    recall  f1-score   support

           0       0.22      0.31      0.26      1235
           1       0.28      0.31      0.30       130
           2       0.26      0.28      0.27      1304
           3       0.32      0.38      0.35      2248
           4       0.28      0.22      0.25      1499
           5       0.35      0.32      0.33       990
           6       0.28      0.17      0.21      1566

    accuracy                           0.29      8972
   macro avg       0.28      0.28      0.28      8972
weighted avg       0.29      0.29      0.28      8972



### Save data and models

In [61]:
# save data and representation methods
# pickle.dump(x_train_sc, open('../data/processed/x_train_sc.pkl', 'wb'))
# pickle.dump(x_test_sc, open('../data/processed/x_test_sc.pkl', 'wb'))
# pickle.dump(y_test, open('../data/processed/y_test.pkl', 'wb'))
# pickle.dump(pca, open('../data/processed/pca.pkl', 'wb'))
# pickle.dump(x_train_pca, open('../data/processed/x_train_pca.pkl', 'wb'))
# pickle.dump(x_test_pca, open('../data/processed/x_test_pca.pkl', 'wb'))
# pickle.dump(nmf, open('../data/processed/nmf.pkl', 'wb'))
# pickle.dump(x_train_nmf, open('../data/processed/x_train_nmf.pkl', 'wb'))
# pickle.dump(x_test_nmf, open('../data/processed/x_test_nmf.pkl', 'wb'))
# pickle.dump(kpca, open('../data/processed/kpca.pkl', 'wb'))
# pickle.dump(x_train_kpca, open('../data/processed/x_train_kpca.pkl', 'wb'))
# pickle.dump(x_test_kpca, open('../data/processed/x_test_kpca.pkl', 'wb'))
# save classification models
# pickle.dump(lrc_pca, open('../data/processed/lrc_pca.pkl', 'wb'))
# pickle.dump(lrc_nmf, open('../data/processed/lrc_nmf.pkl', 'wb'))
# pickle.dump(lrc_kpca, open('../data/processed/lrc_kpca.pkl', 'wb'))
# pickle.dump(svc_pca, open('../data/processed/svc_pca.pkl', 'wb'))
# pickle.dump(svc_nmf, open('../data/processed/svc_nmf.pkl', 'wb'))
# pickle.dump(svc_kpca, open('../data/processed/svc_kpca.pkl', 'wb'))
# pickle.dump(mlp_pca, open('../data/processed/mlp_pca.pkl', 'wb'))
# pickle.dump(mlp_nmf, open('../data/processed/mlp_nmf.pkl', 'wb'))
# pickle.dump(mlp_kpca, open('../data/processed/mlp_kpca.pkl', 'wb'))
# pickle.dump(rfc_pca, open('../data/processed/rfc_pca.pkl', 'wb'))
# pickle.dump(rfc_nmf, open('../data/processed/rfc_nmf.pkl', 'wb'))
# pickle.dump(rfc_kpca, open('../data/processed/rfc_kpca.pkl', 'wb'))
# pickle.dump(knc_pca, open('../data/processed/knc_pca.pkl', 'wb'))
# pickle.dump(knc_nmf, open('../data/processed/knc_nmf.pkl', 'wb'))
# pickle.dump(knc_kpca, open('../data/processed/knc_kpca.pkl', 'wb'))