In [1]:
import pandas as pd
import pickle
import numpy as np

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.neural_network import MLPClassifier

In [2]:
df = pd.read_csv("processed.cleveland.csv", sep=None, engine="python")

df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [3]:
df.drop(["ca", "thal"], axis=1, inplace=True)
df["num"].replace([2,3,4], 1, inplace=True)

df_y = df["num"]
df.drop("num", axis=1, inplace=True)

In [4]:
num_cols = ["age", "trestbps", "chol", "thalach", "oldpeak"]
cat_cols = ["sex", "cp", "restecg", "slope"]
bool_cols = ["exang", "fbs"]

In [5]:
for i in df.columns:
    if i in num_cols:
        df[i] = pd.to_numeric(df[i], downcast="float")
    elif i in cat_cols:
        df[i] = df[i].astype("category", copy=False)
    elif i in bool_cols:
        df[i] = df[i].astype("bool", copy=False)

df.dtypes

age          float32
sex         category
cp          category
trestbps     float32
chol         float32
fbs             bool
restecg     category
thalach      float32
exang           bool
oldpeak      float32
slope       category
dtype: object

In [6]:
preprocessor = ColumnTransformer([("nums", MinMaxScaler(), num_cols),
                                 ("cats", OneHotEncoder(handle_unknown="ignore", drop="if_binary"), cat_cols)])

df_prepped = preprocessor.fit_transform(df)

mlp_clf = MLPClassifier(hidden_layer_sizes=(1000, 1000, 1000), max_iter=2500)

mlp_clf.fit(df_prepped, df_y)

In [7]:
with open("classifier", "wb") as file:
    pickle.dump(mlp_clf, file)
with open("preprocessor", "wb") as file:
    pickle.dump(preprocessor, file)