# Preprocessing

In [3]:
## Import models
from sklearn.neural_network import MLPClassifier
## Import other libraries
from sklearn.model_selection import train_test_split, cross_val_score
import torch
import numpy as np
import csv

In [9]:
## Load data
X = torch.from_numpy(np.load('data/original dataset/samples.npy'))
y = torch.from_numpy(np.load('data/original dataset/labels.npy'))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, stratify=y)

In [8]:
%cd ..

e:\Lurie Research\cchs-prediction


In [None]:
## Points dictionary
test_points = {
    "Relevant Decision Tree Points": [],
    "Highest Logistic Regression Coefficients": [],
    "Highest Ridge Regression Coefficients": [],
    
}

# Build models

In [8]:
model = MLPClassifier(
    hidden_layer_sizes=(100, 60, 30),
    activation='relu',
    solver='adam',
    batch_size=32,
    learning_rate='adaptive',
    max_iter=20000, # epochs; change to 60k
    random_state=None,
    tol=1e-4,
    validation_fraction=0.1,
    verbose=True
)

In [36]:
model.fit(X_train, y_train)

Iteration 1, loss = 0.59702263
Iteration 2, loss = 0.58068618
Iteration 3, loss = 0.57392556
Iteration 4, loss = 0.56771287
Iteration 5, loss = 0.56184740
Iteration 6, loss = 0.55376442
Iteration 7, loss = 0.53837495
Iteration 8, loss = 0.53027265
Iteration 9, loss = 0.54569598
Iteration 10, loss = 0.52490763
Iteration 11, loss = 0.51178225
Iteration 12, loss = 0.49985052
Iteration 13, loss = 0.49845219
Iteration 14, loss = 0.48607392
Iteration 15, loss = 0.50136422
Iteration 16, loss = 0.48938184
Iteration 17, loss = 0.48350117
Iteration 18, loss = 0.47395875
Iteration 19, loss = 0.47780726
Iteration 20, loss = 0.45959790
Iteration 21, loss = 0.45972186
Iteration 22, loss = 0.45538842
Iteration 23, loss = 0.44675029
Iteration 24, loss = 0.44836672
Iteration 25, loss = 0.44514143
Iteration 26, loss = 0.44922777
Iteration 27, loss = 0.44907044
Iteration 28, loss = 0.43022223
Iteration 29, loss = 0.43585718
Iteration 30, loss = 0.43362443
Iteration 31, loss = 0.43849299
Iteration 32, los

MLPClassifier(batch_size=32, hidden_layer_sizes=(100, 60, 30),
              learning_rate='adaptive', max_iter=20000, tol=1e-10,
              verbose=True)

In [37]:
y_pred = model.predict(X_test)

In [38]:
model.score(X_test, y_test)

0.8406779661016949

# Grid search

In [5]:
## Hyperparameter grid
activation = ['identity', 'logistic', 'tanh', 'relu']
solver = ['lbfgs', 'sgd', 'adam']
alpha = [0.0001, 0.001]
learning_rate = ['constant', 'invscaling', 'adaptive']
learning_rate_init = [0.0001, 0.001, 0.01]
epsilon = [1e-6]

hyperparameters = [activation, solver, alpha, learning_rate, learning_rate_init, epsilon]

In [10]:
best_acc = -1
features = []
OUTPUT_FILE = "mlp_accuracy.csv"

with open(OUTPUT_FILE, "a", newline='') as csvfile:
    csvWriter = csv.writer(csvfile)
    header = ["MLP", "Accuracy"]
    header.append(hyperparameters)
    csvWriter.writerow(header)

    for act in activation:
        for s in solver:
            for a in alpha:
                for lr in learning_rate:
                    for lri in learning_rate_init:
                        for e in epsilon:
                            model = MLPClassifier(
                                hidden_layer_sizes=(138, 100, 60,),
                                activation=act,
                                solver=s,
                                alpha=a,
                                batch_size=32,
                                learning_rate=lr,
                                learning_rate_init=lri,
                                max_iter=20000, # epochs; change to 60k
                                random_state=42,
                                tol=1e-4,
                                validation_fraction=0.1,
                                epsilon=e
                            )
                            scores = cross_val_score(model, X, y, cv=10, n_jobs=-1) # 10-fold cross-validation
                            avg_accuracy = sum(scores) / len(scores) # evaluation metric
                            features = [act, s, a, lr, lri, e]
                            row = ["MLP", avg_accuracy]
                            row.append(features)
                            csvWriter.writerow(row)
                            print(avg_accuracy, features)

                            # update best accuracy score
                            # if avg_accuracy > best_acc: 
                            #     best_acc = avg_accuracy
                            #     features = [act, s, a, lr, lri, e]
                            #     print(best_acc, features)
                            #     row = ["MLP", best_acc]
                            #     row.append(features)
                            #     csvWriter.writerow(row)

0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'constant', 0.0001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'constant', 0.001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'constant', 0.01, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'invscaling', 0.0001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'invscaling', 0.001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'invscaling', 0.01, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'adaptive', 0.0001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'adaptive', 0.001, 1e-06]
0.8672627695145856 ['identity', 'lbfgs', 0.0001, 'adaptive', 0.01, 1e-06]
0.86896229678312 ['identity', 'lbfgs', 0.001, 'constant', 0.0001, 1e-06]
0.86896229678312 ['identity', 'lbfgs', 0.001, 'constant', 0.001, 1e-06]
0.86896229678312 ['identity', 'lbfgs', 0.001, 'constant', 0.01, 1e-06]
0.86896229678312 ['identity', 'lbfgs', 0.001, 'invscaling', 0.0001, 1e-06]
0.86896229678312 ['identity'

In [None]:
#0-121 are cchs
#122-203 are fgnet
#204-1833 are utk

In [None]:
# FGNET organization
