In [1]:
import numpy as np
from sqlalchemy import create_engine
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

In [2]:
filepath = '../Resources/Glucose_Datasets/diabetic_glucose.csv'
medical_df = pd.read_csv(filepath)
medical_df.head(10)

Unnamed: 0,ID#,Gender,Age,Hypertension,Heart_Disease,Ever_Married,Work_Type,Residence_Type,Avg_Glucose_Lvl,BMI,Smoker,Stroke
0,1,Male,67,0,1,Yes,Private,Urban,228.69,36.6,Former,1
1,2,Female,61,0,0,Yes,Self-employed,Rural,202.21,,Never,1
2,4,Female,49,0,0,Yes,Private,Urban,171.23,34.4,Current,1
3,5,Female,79,1,0,Yes,Self-employed,Rural,174.12,24.0,Never,1
4,6,Male,81,0,0,Yes,Private,Urban,186.21,29.0,Former,1
5,14,Male,78,0,1,Yes,Private,Urban,219.84,,Unknown,1
6,15,Female,79,0,1,Yes,Private,Urban,214.09,28.2,Never,1
7,16,Female,50,1,0,Yes,Self-employed,Rural,167.41,30.9,Never,1
8,17,Male,64,0,1,Yes,Private,Urban,191.61,37.5,Current,1
9,18,Male,75,1,0,Yes,Private,Urban,221.29,25.8,Current,1


In [3]:
# Drop ID# column
medical_df.drop(columns=['ID#'], inplace=True)
medical_df.head()

Unnamed: 0,Gender,Age,Hypertension,Heart_Disease,Ever_Married,Work_Type,Residence_Type,Avg_Glucose_Lvl,BMI,Smoker,Stroke
0,Male,67,0,1,Yes,Private,Urban,228.69,36.6,Former,1
1,Female,61,0,0,Yes,Self-employed,Rural,202.21,,Never,1
2,Female,49,0,0,Yes,Private,Urban,171.23,34.4,Current,1
3,Female,79,1,0,Yes,Self-employed,Rural,174.12,24.0,Never,1
4,Male,81,0,0,Yes,Private,Urban,186.21,29.0,Former,1


In [4]:
medical_df['Stroke'].value_counts()

0    899
1    100
Name: Stroke, dtype: int64

In [31]:
print(medical_df['Avg_Glucose_Lvl'].min())
print(medical_df['Avg_Glucose_Lvl'].max())

125.03
271.74


In [5]:
# Get a list of categorical columns
categorical_columns = medical_df.dtypes[medical_df.dtypes=='object'].index.tolist()
categorical_columns

['Gender', 'Ever_Married', 'Work_Type', 'Residence_Type', 'Smoker']

In [6]:
# Create OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit & transform OneHotEncoder using categorical columns
encode_df = pd.DataFrame(enc.fit_transform(medical_df[categorical_columns]))

# Add column names
encode_df.columns = enc.get_feature_names(categorical_columns)
print(encode_df.shape)
encode_df.head(10)

(999, 15)




Unnamed: 0,Gender_Female,Gender_Male,Ever_Married_No,Ever_Married_Yes,Work_Type_Govt_job,Work_Type_Never_worked,Work_Type_Private,Work_Type_Self-employed,Work_Type_children,Residence_Type_Rural,Residence_Type_Urban,Smoker_Current,Smoker_Former,Smoker_Never,Smoker_Unknown
0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
5,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
6,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
7,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
8,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
9,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [7]:
# Get encode_df info
encode_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Gender_Female            999 non-null    float64
 1   Gender_Male              999 non-null    float64
 2   Ever_Married_No          999 non-null    float64
 3   Ever_Married_Yes         999 non-null    float64
 4   Work_Type_Govt_job       999 non-null    float64
 5   Work_Type_Never_worked   999 non-null    float64
 6   Work_Type_Private        999 non-null    float64
 7   Work_Type_Self-employed  999 non-null    float64
 8   Work_Type_children       999 non-null    float64
 9   Residence_Type_Rural     999 non-null    float64
 10  Residence_Type_Urban     999 non-null    float64
 11  Smoker_Current           999 non-null    float64
 12  Smoker_Former            999 non-null    float64
 13  Smoker_Never             999 non-null    float64
 14  Smoker_Unknown           9

In [8]:
# Drop redundant columns
encode_df.drop(columns=['Gender_Female', 'Ever_Married_No', 'Residence_Type_Rural'], inplace=True)
encode_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Gender_Male              999 non-null    float64
 1   Ever_Married_Yes         999 non-null    float64
 2   Work_Type_Govt_job       999 non-null    float64
 3   Work_Type_Never_worked   999 non-null    float64
 4   Work_Type_Private        999 non-null    float64
 5   Work_Type_Self-employed  999 non-null    float64
 6   Work_Type_children       999 non-null    float64
 7   Residence_Type_Urban     999 non-null    float64
 8   Smoker_Current           999 non-null    float64
 9   Smoker_Former            999 non-null    float64
 10  Smoker_Never             999 non-null    float64
 11  Smoker_Unknown           999 non-null    float64
dtypes: float64(12)
memory usage: 93.8 KB


In [9]:
# Merge encoded df with medical df
medical_df = medical_df.merge(encode_df, left_index=True, right_index=True).drop(categorical_columns, axis=1)
print(medical_df.shape)
medical_df.head(10)

(999, 18)


Unnamed: 0,Age,Hypertension,Heart_Disease,Avg_Glucose_Lvl,BMI,Stroke,Gender_Male,Ever_Married_Yes,Work_Type_Govt_job,Work_Type_Never_worked,Work_Type_Private,Work_Type_Self-employed,Work_Type_children,Residence_Type_Urban,Smoker_Current,Smoker_Former,Smoker_Never,Smoker_Unknown
0,67,0,1,228.69,36.6,1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,61,0,0,202.21,,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,49,0,0,171.23,34.4,1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,79,1,0,174.12,24.0,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,81,0,0,186.21,29.0,1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
5,78,0,1,219.84,,1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
6,79,0,1,214.09,28.2,1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
7,50,1,0,167.41,30.9,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
8,64,0,1,191.61,37.5,1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
9,75,1,0,221.29,25.8,1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [10]:
medical_df.isnull().sum()

Age                         0
Hypertension                0
Heart_Disease               0
Avg_Glucose_Lvl             0
BMI                        66
Stroke                      0
Gender_Male                 0
Ever_Married_Yes            0
Work_Type_Govt_job          0
Work_Type_Never_worked      0
Work_Type_Private           0
Work_Type_Self-employed     0
Work_Type_children          0
Residence_Type_Urban        0
Smoker_Current              0
Smoker_Former               0
Smoker_Never                0
Smoker_Unknown              0
dtype: int64

In [11]:
# Create SimpleImputer instance to replace missing BMI feature values with median BMI
imputer = SimpleImputer(strategy='mean')

# Fit SimpleImputer & transform data
med_transformed = imputer.fit_transform(medical_df)

In [12]:
# Add SimpleImputer outcome to dataframe
med_df_transformed = pd.DataFrame(med_transformed, columns=medical_df.columns)
print(med_df_transformed.shape)
med_df_transformed.head(10)

(999, 18)


Unnamed: 0,Age,Hypertension,Heart_Disease,Avg_Glucose_Lvl,BMI,Stroke,Gender_Male,Ever_Married_Yes,Work_Type_Govt_job,Work_Type_Never_worked,Work_Type_Private,Work_Type_Self-employed,Work_Type_children,Residence_Type_Urban,Smoker_Current,Smoker_Former,Smoker_Never,Smoker_Unknown
0,67.0,0.0,1.0,228.69,36.6,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,61.0,0.0,0.0,202.21,31.41329,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,49.0,0.0,0.0,171.23,34.4,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,79.0,1.0,0.0,174.12,24.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,81.0,0.0,0.0,186.21,29.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
5,78.0,0.0,1.0,219.84,31.41329,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
6,79.0,0.0,1.0,214.09,28.2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
7,50.0,1.0,0.0,167.41,30.9,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
8,64.0,0.0,1.0,191.61,37.5,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
9,75.0,1.0,0.0,221.29,25.8,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [14]:
med_df_transformed.isnull().sum()

Age                        0
Hypertension               0
Heart_Disease              0
Avg_Glucose_Lvl            0
BMI                        0
Stroke                     0
Gender_Male                0
Ever_Married_Yes           0
Work_Type_Govt_job         0
Work_Type_Never_worked     0
Work_Type_Private          0
Work_Type_Self-employed    0
Work_Type_children         0
Residence_Type_Urban       0
Smoker_Current             0
Smoker_Former              0
Smoker_Never               0
Smoker_Unknown             0
dtype: int64

In [15]:
# Create feature & target datasets
X = med_df_transformed.drop(columns=['Stroke'])
y = med_df_transformed['Stroke']
print(X.shape)
print(y.shape)

(999, 17)
(999,)


In [16]:
# Create empty arrays to store ML results
recalls = []
accuracies = []
precisions = []

# This will take a while. Feel free to adjust range to sample code. Feel free to review exported spreadsheets in github.
for i in range(1000):
    
    # Create training + testing data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
    
    # Oversample positive stroke cases in training set
    ros = RandomOverSampler()
    X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)
    
    # Scale feature data
    scaler = StandardScaler()
    scaler.fit(X_train_resampled)
    X_train_scaled = scaler.transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)
    
    # Create SVC model
    svc_model = SVC(kernel='linear')
    svc_model = svc_model.fit(X_train_scaled, y_train_resampled)
    
    # Evaluate SVC model
    y_pred = svc_model.predict(X_test_scaled)
    
    # Add recall results to list
    recalls.append(recall_score(y_test, y_pred))
    
    # Add precision results to list
    precisions.append(precision_score(y_test, y_pred))
    
    # Add accuracy results to list
    accuracies.append(accuracy_score(y_test, y_pred))
    
    print("Current progress:", i)

Current progress: 0
Current progress: 1
Current progress: 2
Current progress: 3
Current progress: 4
Current progress: 5
Current progress: 6
Current progress: 7
Current progress: 8
Current progress: 9
Current progress: 10
Current progress: 11
Current progress: 12
Current progress: 13
Current progress: 14
Current progress: 15
Current progress: 16
Current progress: 17
Current progress: 18
Current progress: 19
Current progress: 20
Current progress: 21
Current progress: 22
Current progress: 23
Current progress: 24
Current progress: 25
Current progress: 26
Current progress: 27
Current progress: 28
Current progress: 29
Current progress: 30
Current progress: 31
Current progress: 32
Current progress: 33
Current progress: 34
Current progress: 35
Current progress: 36
Current progress: 37
Current progress: 38
Current progress: 39
Current progress: 40
Current progress: 41
Current progress: 42
Current progress: 43
Current progress: 44
Current progress: 45
Current progress: 46
Current progress: 47
Cu

Current progress: 378
Current progress: 379
Current progress: 380
Current progress: 381
Current progress: 382
Current progress: 383
Current progress: 384
Current progress: 385
Current progress: 386
Current progress: 387
Current progress: 388
Current progress: 389
Current progress: 390
Current progress: 391
Current progress: 392
Current progress: 393
Current progress: 394
Current progress: 395
Current progress: 396
Current progress: 397
Current progress: 398
Current progress: 399
Current progress: 400
Current progress: 401
Current progress: 402
Current progress: 403
Current progress: 404
Current progress: 405
Current progress: 406
Current progress: 407
Current progress: 408
Current progress: 409
Current progress: 410
Current progress: 411
Current progress: 412
Current progress: 413
Current progress: 414
Current progress: 415
Current progress: 416
Current progress: 417
Current progress: 418
Current progress: 419
Current progress: 420
Current progress: 421
Current progress: 422
Current pr

Current progress: 751
Current progress: 752
Current progress: 753
Current progress: 754
Current progress: 755
Current progress: 756
Current progress: 757
Current progress: 758
Current progress: 759
Current progress: 760
Current progress: 761
Current progress: 762
Current progress: 763
Current progress: 764
Current progress: 765
Current progress: 766
Current progress: 767
Current progress: 768
Current progress: 769
Current progress: 770
Current progress: 771
Current progress: 772
Current progress: 773
Current progress: 774
Current progress: 775
Current progress: 776
Current progress: 777
Current progress: 778
Current progress: 779
Current progress: 780
Current progress: 781
Current progress: 782
Current progress: 783
Current progress: 784
Current progress: 785
Current progress: 786
Current progress: 787
Current progress: 788
Current progress: 789
Current progress: 790
Current progress: 791
Current progress: 792
Current progress: 793
Current progress: 794
Current progress: 795
Current pr

In [17]:
recalls

[0.7,
 0.85,
 0.7,
 0.85,
 0.8,
 0.6,
 0.7,
 0.9,
 0.8,
 0.75,
 0.7,
 0.6,
 0.8,
 0.8,
 0.95,
 0.9,
 0.6,
 0.7,
 0.85,
 0.9,
 0.7,
 0.65,
 0.5,
 0.75,
 0.65,
 0.8,
 0.7,
 0.65,
 0.8,
 0.8,
 0.8,
 0.85,
 0.7,
 0.65,
 0.75,
 0.75,
 0.7,
 0.75,
 0.65,
 0.65,
 0.75,
 0.65,
 0.95,
 0.8,
 0.8,
 0.6,
 0.75,
 0.75,
 0.75,
 0.75,
 0.75,
 0.85,
 0.75,
 0.7,
 0.8,
 0.65,
 0.65,
 0.65,
 0.7,
 0.7,
 0.7,
 0.65,
 0.65,
 0.75,
 0.7,
 0.75,
 0.9,
 0.75,
 0.95,
 0.8,
 0.8,
 0.75,
 0.75,
 0.75,
 0.8,
 0.85,
 0.7,
 0.7,
 0.85,
 0.7,
 0.7,
 0.9,
 0.8,
 0.75,
 0.75,
 0.6,
 0.9,
 0.85,
 0.75,
 0.65,
 0.7,
 0.65,
 0.8,
 0.75,
 0.7,
 0.7,
 0.75,
 0.75,
 0.85,
 0.85,
 0.6,
 0.7,
 0.7,
 0.45,
 0.8,
 0.8,
 0.7,
 0.6,
 0.85,
 0.8,
 0.75,
 0.65,
 0.8,
 0.65,
 0.7,
 0.45,
 0.85,
 0.55,
 0.7,
 0.65,
 0.9,
 0.8,
 0.85,
 0.8,
 0.75,
 0.8,
 0.8,
 0.85,
 0.45,
 0.75,
 0.55,
 0.5,
 0.75,
 0.75,
 0.7,
 0.65,
 0.55,
 0.75,
 0.55,
 0.9,
 0.85,
 0.75,
 0.8,
 0.85,
 0.7,
 0.9,
 0.75,
 0.65,
 0.7,
 0.5,
 0.9,
 0.85,
 0.55,
 0.

In [18]:
recalls_df = pd.DataFrame(recalls, columns=['Recall'])
print(recalls_df.shape)
recalls_df.head(10)

(1000, 1)


Unnamed: 0,Recall
0,0.7
1,0.85
2,0.7
3,0.85
4,0.8
5,0.6
6,0.7
7,0.9
8,0.8
9,0.75


In [19]:
accuracies

[0.585,
 0.655,
 0.64,
 0.68,
 0.685,
 0.67,
 0.605,
 0.575,
 0.645,
 0.675,
 0.615,
 0.7,
 0.635,
 0.65,
 0.67,
 0.605,
 0.665,
 0.66,
 0.635,
 0.645,
 0.65,
 0.67,
 0.59,
 0.67,
 0.645,
 0.65,
 0.61,
 0.64,
 0.67,
 0.695,
 0.635,
 0.675,
 0.62,
 0.65,
 0.645,
 0.635,
 0.63,
 0.625,
 0.66,
 0.605,
 0.685,
 0.625,
 0.61,
 0.6,
 0.59,
 0.62,
 0.685,
 0.675,
 0.73,
 0.625,
 0.69,
 0.66,
 0.65,
 0.625,
 0.625,
 0.67,
 0.66,
 0.635,
 0.685,
 0.685,
 0.68,
 0.65,
 0.705,
 0.595,
 0.705,
 0.67,
 0.665,
 0.615,
 0.675,
 0.61,
 0.59,
 0.645,
 0.68,
 0.655,
 0.65,
 0.625,
 0.645,
 0.71,
 0.6,
 0.63,
 0.585,
 0.66,
 0.685,
 0.695,
 0.555,
 0.665,
 0.65,
 0.675,
 0.675,
 0.625,
 0.62,
 0.715,
 0.69,
 0.675,
 0.695,
 0.625,
 0.695,
 0.705,
 0.715,
 0.63,
 0.65,
 0.65,
 0.64,
 0.66,
 0.66,
 0.63,
 0.67,
 0.645,
 0.64,
 0.625,
 0.69,
 0.64,
 0.57,
 0.635,
 0.63,
 0.665,
 0.68,
 0.675,
 0.635,
 0.66,
 0.61,
 0.635,
 0.665,
 0.665,
 0.62,
 0.67,
 0.705,
 0.64,
 0.67,
 0.585,
 0.62,
 0.645,
 0.62,
 0.6

In [20]:
accuracies_df = pd.DataFrame(accuracies, columns=['Accuracy'])
print(accuracies_df.shape)
accuracies_df.head(10)

(1000, 1)


Unnamed: 0,Accuracy
0,0.585
1,0.655
2,0.64
3,0.68
4,0.685
5,0.67
6,0.605
7,0.575
8,0.645
9,0.675


In [21]:
precisions

[0.15384615384615385,
 0.20481927710843373,
 0.175,
 0.21794871794871795,
 0.21333333333333335,
 0.17142857142857143,
 0.16091954022988506,
 0.1782178217821782,
 0.1927710843373494,
 0.2,
 0.16470588235294117,
 0.1875,
 0.18823529411764706,
 0.1951219512195122,
 0.2261904761904762,
 0.18947368421052632,
 0.16901408450704225,
 0.18421052631578946,
 0.19540229885057472,
 0.20689655172413793,
 0.1794871794871795,
 0.18055555555555555,
 0.12195121951219512,
 0.19736842105263158,
 0.16883116883116883,
 0.1951219512195122,
 0.16279069767441862,
 0.16666666666666666,
 0.20512820512820512,
 0.2191780821917808,
 0.18823529411764706,
 0.21518987341772153,
 0.16666666666666666,
 0.17105263157894737,
 0.18518518518518517,
 0.18072289156626506,
 0.17073170731707318,
 0.17647058823529413,
 0.17567567567567569,
 0.15294117647058825,
 0.2054794520547945,
 0.16049382716049382,
 0.19791666666666666,
 0.17391304347826086,
 0.1702127659574468,
 0.15,
 0.2054794520547945,
 0.2,
 0.234375,
 0.17647058823529

In [22]:
precisions_df = pd.DataFrame(precisions, columns=['Precision'])
print(precisions_df.shape)
precisions_df.head(10)

(1000, 1)


Unnamed: 0,Precision
0,0.153846
1,0.204819
2,0.175
3,0.217949
4,0.213333
5,0.171429
6,0.16092
7,0.178218
8,0.192771
9,0.2


In [23]:
results_df = accuracies_df.merge(precisions_df, left_index=True, right_index=True)
print(results_df.shape)
results_df.head(10)

(1000, 2)


Unnamed: 0,Accuracy,Precision
0,0.585,0.153846
1,0.655,0.204819
2,0.64,0.175
3,0.68,0.217949
4,0.685,0.213333
5,0.67,0.171429
6,0.605,0.16092
7,0.575,0.178218
8,0.645,0.192771
9,0.675,0.2


In [24]:
results_df = results_df.merge(recalls_df, left_index=True, right_index=True)
print(results_df.shape)
results_df.head(10)

(1000, 3)


Unnamed: 0,Accuracy,Precision,Recall
0,0.585,0.153846,0.7
1,0.655,0.204819,0.85
2,0.64,0.175,0.7
3,0.68,0.217949,0.85
4,0.685,0.213333,0.8
5,0.67,0.171429,0.6
6,0.605,0.16092,0.7
7,0.575,0.178218,0.9
8,0.645,0.192771,0.8
9,0.675,0.2,0.75


In [25]:
print("Recall mean: " + str(results_df['Recall'].mean()))
print("Recall mode: " + str(results_df['Recall'].mode()))
print("Recall median: " + str(results_df['Recall'].median()))
print("Recall max: " + str(results_df['Recall'].max()))
print("Recall min: " + str(results_df['Recall'].min()))
print("Recall standard deviation: " + str(results_df['Recall'].std()))

Recall mean: 0.738349999999999
Recall mode: 0    0.75
dtype: float64
Recall median: 0.75
Recall max: 1.0
Recall min: 0.4
Recall standard deviation: 0.10028376904731026


In [26]:
print("Accuracy mean: " + str(results_df['Accuracy'].mean()))
print("Accuracy mode: " + str(results_df['Accuracy'].mode()))
print("Accuracy median: " + str(results_df['Accuracy'].median()))
print("Accuracy max: " + str(results_df['Accuracy'].max()))
print("Accuracy min: " + str(results_df['Accuracy'].min()))
print("Accuracy standard deviation: " + str(results_df['Accuracy'].std()))

Accuracy mean: 0.6439849999999979
Accuracy mode: 0    0.65
dtype: float64
Accuracy median: 0.645
Accuracy max: 0.74
Accuracy min: 0.54
Accuracy standard deviation: 0.03241983684136641


In [27]:
print("Precision mean: " + str(results_df['Precision'].mean()))
print("Precision mode: " + str(results_df['Precision'].mode()))
print("Precision median: " + str(results_df['Precision'].median()))
print("Precision max: " + str(results_df['Precision'].max()))
print("Precision min: " + str(results_df['Precision'].min()))
print("Precision standard deviation: " + str(results_df['Precision'].std()))

Precision mean: 0.18327113209636783
Precision mode: 0    0.2
dtype: float64
Precision median: 0.18292682926829268
Precision max: 0.24615384615384617
Precision min: 0.1125
Precision standard deviation: 0.021710079685666423


In [28]:
results_df.isnull().sum().sum()

0

In [29]:
len(results_df)

1000

In [30]:
filepath = ('ML_result_files/SVC_DiabeticGlucose_SimpleImputer(mean)_RandomOverSampler.csv')
results_df.to_csv(filepath, index=False)