In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import time
import itertools
import pathlib
from IPython.display import display
%matplotlib inline

# Model evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
# Scalars
from sklearn.preprocessing import StandardScaler,PowerTransformer,MinMaxScaler,QuantileTransformer,normalize
# Features
from sklearn.feature_selection import VarianceThreshold, SelectKBest

# Models
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.neural_network import MLPClassifier as MLP

# required for importin modules from other directories
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
from common import misc
from common.data_parser import *
from common.model_trainer import *
from common.misc import *
from config import *
from heart_helpers import *

plt.style.use("seaborn")

In [None]:
MODEL = MLP
MODEL_TYPE = "MLP"
hidden_layers = []
for i in range(5,10):
    for j in range(5,10):
        hidden_layers.append((i,j))
print("hidden_layers:")
print(hidden_layers)
params = {
    "hidden_layer_sizes" : hidden_layers, 
    "alpha" : [0.001, 0.0001],
    #solver" : ["lbfgs", "sgd", "adam"]
    }
TEST_SIZE = 0.25
RND_STATE = 42
OUT_DIR = f"out/{MODEL_TYPE}/"

SET = "binary" # multi

In [None]:
if SET=="binary":
    df_raw = parse_heart_disease("sma")
else:
    df_raw = parse_heart_disease("big")
df_raw.info()

In [None]:
LOGX = False
YLIMS = (0.5, 1) if SET=="binary" else (0.4, 0.8)

def plot_params(results, scores="score", fileName=None, params=params, ylims=YLIMS):
    param_keys = list(params)
    first_key = param_keys[0]
    rest = param_keys[1:]

    plt.style.use('seaborn')
    if isinstance(scores, str):
        fix, ax = plt.subplots(figsize=(8,6))
        for vals in tuple(itertools.product(*tuple(x for x in tuple(params.values())[1:]))):
            label = " / ".join([str(x) for x in vals])
            filters = " & ".join([str(x)+' == "'+str(v)+'"' for x, v in zip(rest, vals)])
            results.query(filters).plot(
                x=first_key, y=scores, label=label,
                    ax=ax, marker="o", logx=LOGX);
        plt.legend()
        ax.set_title(scores, fontsize=18)

        plt.ylim(*YLIMS)
        if fileName:
            plt.savefig(fileName)
        plt.show()
        return plt.gcf() 

# Impute mode = 0

In [None]:
scaler = None #StandardScaler() # None

In [None]:
impute_mode = 0
fname = OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}.pdf" if not scaler else OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}_scaler.pdf"


df = process_heart(df_raw, impute_mode=impute_mode, scaler=scaler)
x, y = df[HEART_FEATS], df[HEART_TARGET]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=TEST_SIZE, random_state=RND_STATE)
df.info()

In [None]:
modeltrainer = ModelTrainer(
    MODEL, 
    params, 
    x_train, y_train, x_test, y_test, 
    accuracy_score,
    thread_cnt=8
    )

modeltrainer.train()
#modeltrainer.save_result("out/knn_params.csv")
result = modeltrainer.result
result.head()

SCORES = "accuracy"
plot_params(result, scores=SCORES, fileName=fname);

In [None]:
SCORES = "accuracy"
plot_params(result, scores=SCORES);

# Impute mode = 1

In [None]:
impute_mode = 1
fname = OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}.pdf" if not scaler else OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}_scaler.pdf"

df = process_heart(df_raw, impute_mode=impute_mode, scaler=scaler)
x, y = df[HEART_FEATS], df[HEART_TARGET]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=TEST_SIZE, random_state=RND_STATE)
df.info()

In [None]:
modeltrainer = ModelTrainer(
    MODEL, 
    params, 
    x_train, y_train, x_test, y_test, 
    accuracy_score,
    thread_cnt=8
    )

modeltrainer.train()
#modeltrainer.save_result("out/knn_params.csv")
result = modeltrainer.result
result.head()

SCORES = "accuracy"
plot_params(result, scores=SCORES, fileName=fname);

In [None]:
SCORES = "accuracy"
plot_params(result, scores=SCORES);

# Impute mode = 2

In [None]:
impute_mode = 2
fname = OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}.pdf" if not scaler else OUT_DIR + f"{SET}_{MODEL_TYPE}_{impute_mode}_scaler.pdf"

df = process_heart(df_raw, impute_mode=impute_mode, scaler=scaler)
x, y = df[HEART_FEATS], df[HEART_TARGET]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=TEST_SIZE, random_state=RND_STATE)
df.info()

In [None]:
modeltrainer = ModelTrainer(
    MODEL, 
    params, 
    x_train, y_train, x_test, y_test, 
    accuracy_score,
    thread_cnt=8
    )

modeltrainer.train()
#modeltrainer.save_result("out/knn_params.csv")
result = modeltrainer.result
result.head()

SCORES = "accuracy"
plot_params(result, scores=SCORES, fileName=fname);

In [None]:
SCORES = "accuracy"
plot_params(result, scores=SCORES);

# Confusion matrices

In [None]:
modeltrainer = ModelTrainer(MODEL, params, x_train, y_train, x_test, y_test, accuracy_score, thread_cnt=4)
modeltrainer.cm_setup([0, 1, 2, 3, 4])
modeltrainer.train()
modeltrainer.save_result("out/rf.csv")

cms = modeltrainer.cms

df_results = modeltrainer.result

In [None]:
modeltrainer.plot_confusion_matrix(0);

In [None]:
plot_confusion_matrix(cms[0][1], [0, 1, 2, 3, 4], normalize=True, title="Confusion matrix", cmap=plt.cm.Reds)