In [1]:
# python 3
"""
# File Name: Shuffle_Data_Ten_Times_and_Evaluate_Model.ipynb
# Zhihong (George) Li (zhihongli@bennington.edu)

This program takes the heart.csv, shuffle it and split the train and test data.
Then it trains with Logistic Regression Model. Using test data to predict the output(whether patient has heart disease or not).
Then it evaluates the predictions by calculating the mean precision and precision std.

Dataset comes from Kaggle https://www.kaggle.com/nyjoey/heart-disease
"""
# import all the modules for data analysis
# dataset comes from Kaggle https://www.kaggle.com/nyjoey/heart-disease
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
import statistics

In [2]:
# read the file using pandas
df = pd.read_csv('heart.csv')

In [3]:
# let's check the basic info about this dataset
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Building the functions for shuffling data and evaluating model

In [4]:
# functions for extracting info from evaluation of the model

def save_precision_for_zero_to_dict(zero):
    # extract precision for zero
    if 'precision' in zero:
        zero['precision'].append(float(cfr_arr[5]))
    else:
        zero['precision'] = [float(cfr_arr[5])]

def save_recall_for_zero_to_dict(zero):
    # extract recall for zero
    if 'recall' in zero:
        zero['recall'].append(float(cfr_arr[6]))
    else:
        zero['recall'] = [float(cfr_arr[6])]

def save_f1_for_zero_to_dict(zero):
        # extract f1-score for zero
    if 'f1' in zero:
        zero['f1'].append(float(cfr_arr[7]))
    else:
        zero['f1'] = [float(cfr_arr[7])]

def save_support_for_zero_to_dict(zero):
    # extract support for zero
    if 'support' in zero:
        zero['support'].append(float(cfr_arr[8]))
    else:
        zero['support'] = [float(cfr_arr[8])]

def save_precision_for_one_to_dict(one):
        # extract precision for one
    if 'precision' in one:
        one['precision'].append(float(cfr_arr[10]))
    else:
        one['precision'] = [float(cfr_arr[10])]

def save_recall_for_one_to_dict(one):
        # extract recall for one
    if 'recall' in one:
        one['recall'].append(float(cfr_arr[11]))
    else:
        one['recall'] = [float(cfr_arr[11])]
        
def save_f1_for_one_to_dict(one):
        # extract f1-score for one
    if 'f1' in one:
        one['f1'].append(float(cfr_arr[12]))
    else:
        one['f1'] = [float(cfr_arr[12])]
        
def save_support_for_one_to_dict(one):
        # extract support for one
    if 'support' in one:
        one['support'].append(float(cfr_arr[13]))
    else:
        one['support'] = [float(cfr_arr[13])]

In [5]:
# save_eval_into_dict function
def save_eval_of_zero_to_dict(zero):
    save_precision_for_zero_to_dict(zero)
    save_recall_for_zero_to_dict(zero)
    save_f1_for_zero_to_dict(zero)
    save_support_for_zero_to_dict(zero)
    
def save_eval_of_one_to_dict(one):
    save_precision_for_one_to_dict(one)
    save_recall_for_one_to_dict(one)
    save_f1_for_one_to_dict(one)
    save_support_for_one_to_dict(one)

In [6]:
def get_precision_mean(dictionary):
    return sum(dictionary['precision']) / len(dictionary['precision'])

def get_recall_mean(dictionary):
    return sum(dictionary['recall']) / len(dictionary['recall'])
    
def get_f1_mean(dictionary):
    return sum(dictionary['f1']) / len(dictionary['f1'])

def get_support_mean(dictionary):
    return sum(dictionary['support']) / len(dictionary['support'])
    
def get_precision_std(dictionary):
    return statistics.stdev(dictionary['precision'])

def get_recall_std(dictionary):
    return statistics.stdev(dictionary['recall'])

def get_f1_std(dictionary):
    return statistics.stdev(dictionary['f1'])
    
def get_support_std(dictionary):
    return statistics.stdev(dictionary['support'])

In [7]:
def mean_std_evaluation_report(dictionary):
    print("Result: ")
    print("mean precision:", get_precision_mean(dictionary))
    print("mean recall :", get_recall_mean(dictionary))
    print("mean f1 :",get_f1_mean(dictionary))
    print("mean support :",get_support_mean(dictionary))
    print("precision std :",get_precision_std(dictionary))
    print("recall std :", get_recall_std(dictionary))
    print("f1-score std :", get_f1_std(dictionary))
    print("support std :", get_support_std(dictionary))

In [8]:
# create two new dict for storing the evaluation info
# becasue classification_report info looks like this
# the eval_zero and eval_one will be the dictionaries stores these info
"""
	precision	recall	f1-score	support
0	0.82	    0.8	    0.81	    41
1	0.84	    0.86	0.85	    50
"""

eval_zero = {}
eval_one = {}

In [9]:
# shuffle the data, train the model and store the evaluation data
# to the these two dictionaries.

for i in range(0,10):
    # shuffle the data
    ds = df.sample(frac=1) 
    
    # define X and y for data spliting
    X = df.drop('target',axis=1)
    y = df['target']
    
    # now, we split out dataset
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
    
    # create new instance of the logisticRegression Model
    logmodel = LogisticRegression()
    
    # train the model
    logmodel.fit(X_train,y_train)
    
    # use the model to predict and store the value as logmodel_pred
    logmodel_pred = logmodel.predict(X_test)
    
    # evaluate the model and save the evaluation as cfr
    cfr_arr = (classification_report(y_test, logmodel_pred)).split()

    # extract info from the evaluation as a dictionary for later
    save_eval_of_zero_to_dict(eval_zero)
    save_eval_of_one_to_dict(eval_one)



# Run functions to evaluate the potential model

In [10]:
print(eval_zero)

{'precision': [0.83, 0.86, 0.87, 0.89, 0.91, 0.86, 0.9, 0.87, 0.94, 0.88], 'recall': [0.83, 0.73, 0.73, 0.79, 0.74, 0.66, 0.67, 0.71, 0.77, 0.73], 'f1': [0.83, 0.79, 0.8, 0.84, 0.82, 0.75, 0.77, 0.78, 0.85, 0.8], 'support': [42.0, 41.0, 45.0, 43.0, 42.0, 47.0, 42.0, 38.0, 44.0, 41.0]}


In [11]:
print(eval_one)

{'precision': [0.86, 0.8, 0.77, 0.83, 0.81, 0.71, 0.77, 0.82, 0.82, 0.81], 'recall': [0.86, 0.9, 0.89, 0.92, 0.94, 0.89, 0.94, 0.92, 0.96, 0.92], 'f1': [0.86, 0.85, 0.83, 0.87, 0.87, 0.79, 0.84, 0.87, 0.88, 0.86], 'support': [49.0, 50.0, 46.0, 48.0, 49.0, 44.0, 49.0, 53.0, 47.0, 50.0]}


In [12]:
mean_std_evaluation_report(eval_zero)

Result: 
mean precision: 0.8810000000000002
mean recall : 0.7360000000000001
mean f1 : 0.8029999999999999
mean support : 42.5
precision std : 0.03071373199943852
recall std : 0.05146735750831674
f1-score std : 0.03198958163736019
support std : 2.4608038433722332


In [13]:
mean_std_evaluation_report(eval_one)

Result: 
mean precision: 0.8000000000000002
mean recall : 0.9139999999999999
mean f1 : 0.852
mean support : 48.5
precision std : 0.04136557881996952
recall std : 0.02951459149490486
f1-score std : 0.02658320271650251
support std : 2.4608038433722332


reference for getting std
https://stackoverflow.com/questions/15389768/standard-deviation-of-a-list