In [38]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

In [39]:
from sklearn.impute import SimpleImputer

data = pd.read_csv("../resource/asnlib/publicdata/cleveland.data.csv")

# data.dtypes

# taking care of missing values(using very high value to be ignored)
imp = SimpleImputer(missing_values=np.NaN, strategy='constant', fill_value=10000)
np_2 = imp.fit_transform(data)

# back to pandas dataframe
column_values = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 
                 'slope', 'ca', 'thal', 'num'] 

df_2 = pd.DataFrame(data=np_2,    
                  columns=column_values)

# converting 'num' to binary
df_2['num'] = (df_2['num'] > 0).astype(int)

df_3 = df_2

# getting rid of rows with '?' value
df_4 = df_3.replace("?", np.nan).dropna()

# standardizing values
num = df_4['num'].to_numpy()
df_5 = df_4.drop(['num'], axis=1)

scaler = StandardScaler()
df_stand = scaler.fit_transform(df_5)

# split data into testing and training
X_train, X_test, y_train, y_test = train_test_split(df_stand, num, test_size=0.1, random_state=0)

In [40]:
all_models = np.empty((4,4))

clf = MLPClassifier(random_state=0, max_iter=1000).fit(X_train, y_train)

# function for creating classifiers
def metrics(model):
    
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score (y_test, y_pred)
    recall = recall_score (y_test, y_pred)
    f1 = f1_score (y_test, y_pred)
    
    matrix = np.array([accuracy, precision, recall, f1])
    
    print(matrix)
    
    return matrix

all_models[0] = metrics(MLPClassifier(hidden_layer_sizes=(5), activation='logistic', random_state=0, max_iter=1000).fit(X_train, y_train))
all_models[1] = metrics(MLPClassifier(hidden_layer_sizes=(10,10), activation='logistic', random_state=0, max_iter=1000).fit(X_train, y_train))
all_models[2] = metrics(MLPClassifier(hidden_layer_sizes=(5), activation='relu', random_state=0, max_iter=1000).fit(X_train, y_train))
all_models[3] = metrics(MLPClassifier(hidden_layer_sizes=(10,10), activation='relu', random_state=0, max_iter=1000).fit(X_train, y_train))

all_models

[0.8        0.9        0.64285714 0.75      ]
[0.8        0.9        0.64285714 0.75      ]
[0.73333333 0.8        0.57142857 0.66666667]
[0.76666667 0.88888889 0.57142857 0.69565217]




array([[0.8       , 0.9       , 0.64285714, 0.75      ],
       [0.8       , 0.9       , 0.64285714, 0.75      ],
       [0.73333333, 0.8       , 0.57142857, 0.66666667],
       [0.76666667, 0.88888889, 0.57142857, 0.69565217]])

In [91]:
top_model = np.empty((1,4))

top = metrics(MLPClassifier(
    hidden_layer_sizes=(9), 
    activation='relu', 
    random_state=0, 
    max_iter=1000).fit(X_train, y_train))

top_model[0] = top

# This architecture produces an accuracy score of about 0.83, which is the highest so far.

[0.83333333 0.90909091 0.71428571 0.8       ]


