In [18]:
import os
import cv2
import numpy as np
import pandas as pd
from numpy.random import shuffle
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score

In [19]:
def get_data(path_data='../ATT images', test_size=0.2, is_shuffle=True):
    folders = [f's{i+1}' for i in range(40)]
    X, y = [], []

    for i in range(1, 41):
        path = path_data + f'/s{i}'
        files = os.listdir(path)
        num_files = len(files)

        if is_shuffle:
            files = shuffle(files)

        for file in os.listdir(path):
            path = path_data + f'/s{i}/' + file
            image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            pixels = np.reshape(image, [1, image.shape[0] * image.shape[1]])
            pixels = np.asarray(pixels)

            if len(X) == 0:
                X = pixels
            else:
                X = np.vstack([X, pixels])
            y.append(f's{i}')

    return X, np.asarray(y)

In [20]:
X, y = get_data()

n_components_pca = 50
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [21]:
kernel = {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}

****
****
***

# kernel = 'linear'

## Logistic Regression

In [38]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'linear')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression()
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy: 0.9724999999999999
recall: 0.9658129558129558
precision: 0.9673581438581438
f1-score: 0.9624018916876059


## Random Forest

In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'linear')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.89 [0.925, 0.8875, 0.9125, 0.9375, 0.7875]
recall: 0.8934511523907809
precision: 0.8823538011695906
f1-score: 0.8688628843350206


  _warn_prf(average, modifier, msg_start, len(result))


## Naives Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'linear')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.8775000000000001 [0.925, 0.8875, 0.8875, 0.875, 0.8125]
recall: 0.8830157866845173
precision: 0.8826273354291929
f1-score: 0.8600808785948104


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


******
******
******

# kernel = 'rbf'

## Logistic Regression

In [25]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'rbf')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.0
recall: 0.0
precision: 0.0
f1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Random Forest

In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'rbf')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.024999999999999998 [0.0375, 0.0375, 0.0125, 0.025, 0.0125]
recall: 0.027804274863098388
precision: 0.0006891450273803215
f1-score: 0.0013384544298891952


  _warn_prf(average, modifier, msg_start, len(result))


## Naives Bayes

In [27]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'rbf')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.032499999999999994 [0.0375, 0.0375, 0.0375, 0.025, 0.025]
recall: 0.027804274863098388
precision: 0.0009056482953541776
f1-score: 0.0017522699792669196


  _warn_prf(average, modifier, msg_start, len(result))


******
******
******

# kernel = 'poly'

## Logistic Regression

In [28]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'poly')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.7
recall: 0.6976102418207681
precision: 0.6934169319695636
f1-score: 0.6575568591358064


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Random Forest

In [29]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'poly')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.9 [0.9375, 0.9375, 0.8625, 0.9, 0.8625]
recall: 0.8989436955752744
precision: 0.8911899681636524
f1-score: 0.8790590122883355


## Naives Bayes

In [30]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'poly')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.865 [0.9125, 0.85, 0.875, 0.875, 0.8125]
recall: 0.8772537010694906
precision: 0.8889712483697444
f1-score: 0.8578755058805185


  _warn_prf(average, modifier, msg_start, len(result))


****
****
****

# kernel = 'sigmoid'

## Logistic Regression

In [31]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'sigmoid')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.0
recall: 0.0
precision: 0.0
f1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Random Forest

In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'sigmoid')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.0 [0.0, 0.0, 0.0, 0.0, 0.0]
recall: 0.0
precision: 0.0
f1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Naives Bayes

In [33]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'sigmoid')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)
  _warn_prf(average, modifier, msg_start, len(result))
  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)
  _warn_prf(average, modifier, msg_start, len(result))
  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)
  _warn_prf(average, modifier, msg_start, len(result))
  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.024999999999999998 [0.0375, 0.0375, 0.0125, 0.025, 0.0125]
recall: 0.027804274863098388
precision: 0.0006891450273803215
f1-score: 0.0013384544298891952


  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)
  _warn_prf(average, modifier, msg_start, len(result))


****
****
****

# kernel = 'cosine'

## Logistic Regression

In [34]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'cosine')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.31500000000000006
recall: 0.4108333333333333
precision: 0.34990674603174604
f1-score: 0.3368253968253968


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Random Forest

In [35]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'cosine')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.9025000000000001 [0.8875, 0.925, 0.9, 0.9375, 0.8625]
recall: 0.8846244929139665
precision: 0.8763712396607133
f1-score: 0.8620897463879919


  _warn_prf(average, modifier, msg_start, len(result))


## Naives Bayes

In [36]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'cosine')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy: 0.8600000000000001 [0.8625, 0.9, 0.8875, 0.8375, 0.8125]
recall: 0.8765743607384474
precision: 0.8806689927432962
f1-score: 0.8565178915798113


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


****
****
****

# kernel = 'precomputed'

## Logistic Regression

In [37]:
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'precomputed')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)

    # Logistic
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    lr.fit(X_train_reduced, y_train)
    y_pred = lr.predict(X_test_reduced)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Evaluate the model
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    
print("accuracy:", np.mean(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))


ValueError: Precomputed metric requires shape (n_queries, n_indexed). Got (320, 10304) for 320 indexed.

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'precomputed')
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    rf = RandomForestClassifier(n_estimators = 50)
    rf.fit(X_train_reduced, y_train)
    y_pred = rf.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))

## Naives Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = KernelPCA(n_components=n_components_pca, kernel = 'precomputed'v)
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
 
    # Logistic
    gnb = GaussianNB()
    gnb.fit(X_train_reduced, y_train)
    y_pred = gnb.predict(X_test_reduced)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

print("accuracy:", np.mean(accuracy_scores),list(accuracy_scores))
print("recall:", np.mean(recall_scores))
print("precision:", np.mean(precision_scores))
print("f1-score:", np.mean(f1_scores))