In [78]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
import pickle

In [66]:
def metric(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    specificity = tn /(tn + fp)
    sensitivity = tp/(tp + fn)
    precission = tp/(tp + fp)
    f1 = (precission * sensitivity * 2) / (precission + sensitivity)
    acc = accuracy_score(y_test, y_pred)
    npv = tn/(tn + fn)
    fpr = fp/(fp + tn)
    rmc = 1.0 - acc
    print("Accuracy    : ", acc)
    print("Specificity : ", specificity)
    print("Sensitivity : ", sensitivity)
    print("Precision   : ", precission)
    print("NPV         : ", npv)
    print("FPR         : ", fpr)
    print("RMC         : ", rmc)
    print("F1          : ", f1)
    

In [4]:
read_data = pd.read_csv('heart.csv')
data = read_data.iloc[:,0:13]
label = read_data[["target"]]

In [7]:
data.keys()

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')

In [57]:
X_train, X_test, y_train, y_test = train_test_split(data, label, stratify=label, test_size=0.1)

In [58]:
clf = DecisionTreeClassifier(random_state=0)

In [59]:
cross_val_score(clf, X_train, y_train, cv=10)

array([0.75      , 0.71428571, 0.78571429, 0.78571429, 0.85185185,
       0.77777778, 0.66666667, 0.81481481, 0.61538462, 0.88461538])

In [60]:
clf.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=0,
            splitter='best')

In [61]:
y_pred = clf.predict(X_test)

In [67]:
metric(y_test, y_pred)

Accuracy    :  0.7741935483870968
Specificity :  0.7142857142857143
Sensitivity :  0.8235294117647058
Precision    :  0.7777777777777778
NPV :  0.7692307692307693
FPR :  0.2857142857142857
RMC :  0.22580645161290325
F1 :  0.7999999999999999


In [65]:
with open('my_tree_classifier.pkl', 'wb') as fid:
    pickle.dump(clf, fid)

In [None]:
#with open('my_tree_classifier.pkl', 'rb') as fid:
 #   clf = pickle.load(fid)

In [70]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3,2), random_state=1)

In [71]:
cross_val_score(clf, X_train, y_train, cv=10)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


array([0.85714286, 0.82142857, 0.78571429, 0.82142857, 0.74074074,
       0.74074074, 0.62962963, 0.85185185, 0.76923077, 0.84615385])

In [73]:
clf.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(3, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [74]:
y_pred = clf.predict(X_test)

In [75]:
metric(y_test, y_pred)

Accuracy    :  0.8387096774193549
Specificity :  0.7857142857142857
Sensitivity :  0.8823529411764706
Precision    :  0.8333333333333334
NPV :  0.8461538461538461
FPR :  0.21428571428571427
RMC :  0.16129032258064513
F1 :  0.8571428571428571


In [76]:
with open('my_MLP_classifier.pkl', 'wb') as fid:
    pickle.dump(clf, fid)

In [109]:
clf = RandomForestClassifier(n_estimators = 20)

In [110]:
cross_val_score(clf, X_train, y_train, cv=10)

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)


array([0.78571429, 0.75      , 0.82142857, 0.78571429, 0.77777778,
       0.81481481, 0.81481481, 0.96296296, 0.73076923, 0.84615385])

In [111]:
clf.fit(X_train, y_train)

  """Entry point for launching an IPython kernel.


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [112]:
y_pred = clf.predict(X_test)

In [113]:
metric(y_test, y_pred)

Accuracy    :  0.8709677419354839
Specificity :  0.8571428571428571
Sensitivity :  0.8823529411764706
Precision    :  0.8823529411764706
NPV :  0.8571428571428571
FPR :  0.14285714285714285
RMC :  0.12903225806451613
F1 :  0.8823529411764706


In [114]:
with open('my_Random_Forest_classifier.pkl', 'wb') as fid:
    pickle.dump(clf, fid)