In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from os.path import join as p_join

import seaborn as sns


##################################
## GLOBAL SETTINGS ###############
##################################
plt.rcParams["figure.figsize"] = (12,8)

def seed_all(seed=42):

    random.seed(seed)
    np.random.seed(seed)
    print("[ Using Seed : ", seed, " ]")

####################################
#####   SEED ALL EXPERIMENTS   #####
####################################
seed_all()

In [None]:
X_path = p_join('data', 'Input.txt')
Y_path = p_join('data', 'Topology.txt')

X = pd.read_csv(X_path).values
Y = pd.read_csv(Y_path).values.squeeze()

In [None]:
###############################
##### IMPORT ML METHODS   #####
###############################
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression



#################################
####### GLOBAL CONFIG ###########
#################################
CONFIG = {'scoring': 'accuracy',
          'cv': 5,
          'n_jobs': -1}

In [None]:
def select_n_center_features(data: np.ndarray, n_features: int) -> np.ndarray:
    from copy import deepcopy

    total_components = data.shape[1]
    start = int((total_components - n_features)/2)
    res = deepcopy(data)[:, start: start + n_features]
    print(f'Selected features from indexes:  [{start}, {start + n_features})')
    return res

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from copy import deepcopy

def calc_ml_method(model, config, X, Y):
    res = {}

    scoring = config.get('scoring', 'accuracy')
    cv = config.get('cv', 5)
    n_jobs = config.get('n_jobs', 4)
    
    scores = cross_val_score(model, X, Y, cv=cv, scoring=scoring, n_jobs=n_jobs)
    res[str(scoring)] = scores
        
    return res

def greed_searc_cv(model_class, params, config, X=deepcopy(X), Y=deepcopy(Y)):
    res = {}

    scoring = config.get('scoring', 'accuracy')
    cv = config.get('cv', 5)
    n_jobs = config.get('n_jobs', 4)
    
    model = GridSearchCV(model_class,
                         params,
                         scoring=scoring,
                         cv=cv,
                         n_jobs=n_jobs)
    model.fit(X, Y)
    res[f'best_{str(scoring)}_score'] = model.best_score_
    res['best_params'] = model.best_params_
    res['cv_results'] = model.cv_results_
    
    return res    

### Test `KNeighborsClassifier`

In [None]:
calc_ml_method(KNeighborsClassifier(), CONFIG, X, Y)

### Try `KNeighborsClassifier` Grid Search

In [None]:
greed_searc_cv(KNeighborsClassifier(),
               {'n_neighbors': [1,3,5,7], 'weights':['uniform', 'distance']},
               CONFIG,
               X, Y)

### Try 16 center features

In [None]:
# Select 16 senter featrues
X = select_n_center_features(X, 16)

In [None]:
### Log regression
accuracies_log_reg = calc_ml_method(LogisticRegression(), CONFIG, X, Y)
accuracies_log_reg

In [None]:
### KNN[5]
accuracies_knn = calc_ml_method(KNeighborsClassifier(n_neighbors=5), CONFIG, X, Y)
accuracies_knn

In [None]:
### MLP
accuracies_mlp = calc_ml_method(MLPClassifier(), CONFIG, X, Y)
accuracies_mlp

In [None]:
### GaussianNB
accuracies_GNB = calc_ml_method(GaussianNB(), CONFIG, X, Y)
accuracies_GNB

In [None]:
### SVC
accuracies_SVC = calc_ml_method(svm.SVC(), CONFIG, X, Y)
accuracies_SVC