In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

from sklearn.ensemble import RandomForestClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score

# Module to save and load Python objects to and from files
import pickle  


In [2]:
# Import Data
df = pd.read_csv('fetal_health.csv')
df.head()

Unnamed: 0,baseline_value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_min,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,fetal_health
0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.5,43.0,...,62.0,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,2.0
1,132.0,0.006,0.0,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,68.0,198.0,6.0,1.0,141.0,136.0,140.0,12.0,0.0,1.0
2,133.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,68.0,198.0,5.0,1.0,141.0,135.0,138.0,13.0,0.0,1.0
3,134.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,1.0
4,132.0,0.007,0.0,0.008,0.0,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,1.0


In [3]:
# Select input and output features
X = df.drop(columns = ['fetal_health'])
y = df['fetal_health']
# Data partitioning into train and test sets
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [4]:
rf_clf = RandomForestClassifier(random_state = 42)
dt_clf = DecisionTreeClassifier(random_state = 42)
Ada_clf = AdaBoostClassifier(n_estimators=100, random_state=0)

In [5]:
softVote_clf = VotingClassifier(estimators=[('rf', rf_clf), ('dt', dt_clf), ('adb', Ada_clf)], voting='soft') # n_jobs = -1?
softVote_clf.fit(train_X, train_y)



In [6]:
for clf in (rf_clf, dt_clf, Ada_clf, softVote_clf):
    clf.fit(train_X, train_y)
    pred_y = clf.predict(test_X)
    f1 = f1_score(test_y, pred_y, average='weighted')
    print(f"{clf.__class__.__name__} F1 Score: {f1:.4f}")

    

RandomForestClassifier F1 Score: 0.9445
DecisionTreeClassifier F1 Score: 0.9235




AdaBoostClassifier F1 Score: 0.8923




VotingClassifier F1 Score: 0.9235


In [10]:
sv_pickle = open('SoftVote_fh.pickle', 'wb') 

# Write DT model to the file
pickle.dump(softVote_clf, sv_pickle) 

# Close the file
sv_pickle.close() 
