# Model Development

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
CTG = pd.read_csv("data/CTG_clean.csv")

In [7]:
pd.set_option('display.max_columns',None)

In [9]:
CTG.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2115 entries, 0 to 2114
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   FHR_Baseline          2115 non-null   float64
 1   Accelerations         2115 non-null   float64
 2   Fetal_Movements       2115 non-null   float64
 3   Uterine_Contractions  2115 non-null   float64
 4   Light_Decels          2115 non-null   float64
 5   Severe_Decels         2115 non-null   float64
 6   Prolonged_Decels      2115 non-null   float64
 7   Abnormal_STV_Percent  2115 non-null   float64
 8   STV_Mean              2115 non-null   float64
 9   Abnormal_LTV_Percent  2115 non-null   float64
 10  LTV_Mean              2115 non-null   float64
 11  Hist_Width            2115 non-null   float64
 12  Hist_Min              2115 non-null   float64
 13  Hist_Max              2115 non-null   float64
 14  Hist_Peaks            2115 non-null   float64
 15  Hist_Zeros           

In [37]:
ints= ["FHR_Baseline", "Hist_Peaks", "Hist_Zeros", "FHR_Class", "Fetal_State"]
for column in ints:
    CTG[column] = CTG[column].astype("int64")

CTG.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2115 entries, 0 to 2114
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   FHR_Baseline          2115 non-null   int64  
 1   Accelerations         2115 non-null   float64
 2   Fetal_Movements       2115 non-null   float64
 3   Uterine_Contractions  2115 non-null   float64
 4   Light_Decels          2115 non-null   float64
 5   Severe_Decels         2115 non-null   float64
 6   Prolonged_Decels      2115 non-null   float64
 7   Abnormal_STV_Percent  2115 non-null   float64
 8   STV_Mean              2115 non-null   float64
 9   Abnormal_LTV_Percent  2115 non-null   float64
 10  LTV_Mean              2115 non-null   float64
 11  Hist_Width            2115 non-null   float64
 12  Hist_Min              2115 non-null   float64
 13  Hist_Max              2115 non-null   float64
 14  Hist_Peaks            2115 non-null   int64  
 15  Hist_Zeros           

In [29]:
X = CTG.drop(columns =["FHR_Class","Fetal_State"])
X.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
FHR_Baseline,2115.0,133.301655,9.836399,106.0,126.0,133.0,140.0,160.0
Accelerations,2115.0,0.003186,0.00387,0.0,0.0,0.002,0.006,0.019
Fetal_Movements,2115.0,0.009508,0.046783,0.0,0.0,0.0,0.003,0.481
Uterine_Contractions,2115.0,0.004385,0.002941,0.0,0.002,0.005,0.007,0.015
Light_Decels,2115.0,0.001899,0.002965,0.0,0.0,0.0,0.003,0.015
Severe_Decels,2115.0,3e-06,5.7e-05,0.0,0.0,0.0,0.0,0.001
Prolonged_Decels,2115.0,0.000159,0.000591,0.0,0.0,0.0,0.0,0.005
Abnormal_STV_Percent,2115.0,46.977778,17.180933,12.0,32.0,49.0,61.0,87.0
STV_Mean,2115.0,1.335035,0.884111,0.2,0.7,1.2,1.7,7.0
Abnormal_LTV_Percent,2115.0,9.789598,18.329675,0.0,0.0,0.0,11.0,91.0


I want to discover creating two models: one with 3 labels and one with 10 labels. Therefore we define two y variables.

In [43]:
y3 = CTG["Fetal_State"]
y10 = CTG["FHR_Class"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y3, test_size = 