# Best MLP model over ten random seends after binarization

## Outline

The **MLAging - SVZ all-cell** workflow consists of sections:

`30 SVZpreprocessing.R` Data preprocessing and preparation in Seurat.

`311 SVZ All-cell ELN Tuning - Before Binarization` ML model tunning using *non-binarized* HVGs and hyperparameter selection using `GridSearchCV`.

`312 SVZ All-cell ELN Tuning - After Binarization` ML model tunning using *binarized* HVGs and hyperparameter selection using `GridSearchCV`.

`321 SVZ All-cell ELN 10x` Run the best ELN model for both binarized and nonbinarized HVGs over 10 random seeds.

`322 SVZ All-cell MLP 10x - Before Binarization` Run the best MLP model for *non-binarized* HVGs over 10 random seeds.

`323 SVZ All-cell MLP 10x - After Binarization` Run the best MLP model for *binarized* HVGs over 10 random seeds  -- **this notebook:** 
 
`33 SVZ All-cell Model Result Viz` Result visulization.

`34 SVZ All-cell Stat` Stat test on whether exercise rejuvenates cells.

In [None]:
from src.keras_tuner_cv import *
from src.data_processing import *

import keras_tuner as kt
import pandas as pd
import numpy as np
import os
import click
import datetime
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
matplotlib.rcParams['pdf.fonttype'] = 42

In [None]:
from src.mlp import *

In [None]:
tf.config.experimental.list_physical_devices('GPU')

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
input_train = '../data/svz_processed/svz_ctl_train_cell_sep3integ_batch1.csv'
input_test = '../data/svz_processed/svz_ctl_test_cell_sep3integ_batch2.csv'

In [None]:
train_X, train_y, test_X, test_y, custom_cv = data_prep(input_test, input_train, "All", binarization=True)

In [None]:
parameters = Train_Params(
        learning_rate=5.156976971819789e-06,
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC(), tf.keras.metrics.AUC(num_thresholds=10000, name='AUPRC', curve='PR')],
        epochs=100,
        batch_size=32)

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='AUPRC', patience=20)

In [None]:
directory='../results/svz_int2/mlp_model_test_scores'

history = model.fit(
                train_X1, train_y1,
                epochs=parameters.epochs,
                verbose=2,
                callbacks=[early_stopping_cb]
            )

In [None]:
max_runs = 10
feature_nums = [608]

mlp_multiple_trials(max_runs, train_X, train_y, test_X, test_y, parameters, feature_nums, directory)   

In [None]:
max_runs = 10
feature_nums = [608]
loss_test = []
AUPRC_test = []
models = []

early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='AUPRC', patience=20)

for i in [0]:
    print('randam state', i)
    random.seed(42*i)
    train_X1, train_y1 = shuffle(train_X, train_y, random_state=42*i)

    model = Baseline_MLP(feature_nums)
    # change the number of neurons accordingly
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=parameters.learning_rate),
                  loss=parameters.loss,
                  metrics=parameters.metrics)
        
    history = model.fit(
                train_X1, train_y1,
                epochs=parameters.epochs,
                verbose=2,
                callbacks=[early_stopping_cb]
            )
        
    models.append(model)

    test_X, test_y = shuffle(test_X, test_y, random_state=42*i)
    test_result = model.evaluate(test_X, test_y, verbose=0)
        
    loss_test.append(test_result[0]) 
    AUPRC_test.append(test_result[3]) 
        
    print('test result:', test_result)

In [None]:
model.evaluate(test_X, test_y, verbose=0)

In [None]:
# for mlp
from sklearn.metrics import precision_recall_curve, auc, make_scorer

def calculate_NNPR(NN_model, test_X, test_y):
    y_test_prob = NN_model.predict(test_X)
    precision, recall, thresholds = precision_recall_curve(test_y, y_test_prob)
    return recall, precision, auc(recall, precision)

file = open('../results/svz_int2/mlp_pr.save', 'wb')
pickle.dump(calculate_NNPR(model, test_X, test_y), file)
file.close()