In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix,plot_confusion_matrix
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler, OneHotEncoder, StandardScaler
from  sklearn.neural_network import MLPClassifier
df = pd.read_csv("archive/heart_statlog_cleveland_hungary_final.csv")
num_cols = list(df.dtypes[df.dtypes != 'object'].index.values) 
num_cols.remove('target')
X = df[df.columns.difference(['target'])]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
numerical = Pipeline(steps=[
    #imputer fill na value with median
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(2))])
column_preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical, num_cols)])

clf = Pipeline(steps=[('preprocessor', column_preprocessor),
                      ('classifier', MLPClassifier(max_iter=1200))])
clf.fit(X_train, y_train) 
y_pred = clf.predict(X_test)

print(f1_score(y_test, y_pred, average='micro'))
print(precision_score(y_test, y_pred, average="macro"))
print(recall_score(y_test, y_pred, average="macro"))
print(classification_report(y_test,y_pred))

0.9327731092436976
0.9263865065751858
0.9407505910165485
              precision    recall  f1-score   support

           0       0.87      0.98      0.92        47
           1       0.98      0.90      0.94        72

    accuracy                           0.93       119
   macro avg       0.93      0.94      0.93       119
weighted avg       0.94      0.93      0.93       119



In [2]:

param_dict = { 
    'classifier__solver': [ 'sgd'],
    'classifier__learning_rate':["adaptive"], 
    'classifier__hidden_layer_sizes': [(800, 800, 800, 800,800)],
    'classifier__activation': ['relu'],
    
}

grid = GridSearchCV(clf, param_dict, cv=3, verbose=1, n_jobs=-1)
best_model = grid.fit(X_train, y_train)


Fitting 3 folds for each of 1 candidates, totalling 3 fits


In [10]:
neigh = best_model.best_estimator_
neigh.fit(X_train, y_train) 
y_pred = neigh.predict(X_test)
print(f1_score(y_test, y_pred, average='micro'))
print(precision_score(y_test, y_pred, average="macro"))
print(recall_score(y_test, y_pred, average="macro"))
print(classification_report(y_test,y_pred))

0.9327731092436976
0.9264705882352942
0.9370567375886525
              precision    recall  f1-score   support

           0       0.88      0.96      0.92        47
           1       0.97      0.92      0.94        72

    accuracy                           0.93       119
   macro avg       0.93      0.94      0.93       119
weighted avg       0.94      0.93      0.93       119



In [4]:
print(best_model.best_estimator_)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   StandardScaler()),
                                                                  ('poly',
                                                                   PolynomialFeatures())]),
                                                  ['age', 'sex',
                                                   'chest pain type',
                                                   'resting bp s',
                                                   'cholesterol',
                                                   'fasting blood sugar',
                                                   'restin

In [7]:
import joblib
joblib.dump(neigh, 'my_model.pkl', compress=9)

['my_model.pkl']