In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [None]:
df=pd.read_csv("../input/CTG.csv")

In [None]:
df.head()

## FileName:	of CTG examination	
## Date:	of the examination	
## b:	start instant	
## e:	end instant	
## LBE:	baseline value (medical expert)	
## LB:	baseline value (SisPorto)	
## AC:	accelerations (SisPorto)	
## FM:	foetal movement (SisPorto)	
## UC:	uterine contractions (SisPorto)	
## ASTV:	percentage of time with abnormal short term variability  (SisPorto)	
## mSTV:	mean value of short term variability  (SisPorto)	
## ALTV:	percentage of time with abnormal long term variability  (SisPorto)	
## mLTV:	mean value of long term variability  (SisPorto)	
## DL:	light decelerations	
## DS:	severe decelerations	
## DP:	prolongued decelerations	
## DR:	repetitive decelerations	
## Width:	histogram width	
## Min:	low freq. of the histogram	
## Max:	high freq. of the histogram	
## Nmax:	number of histogram peaks	
## Nzeros:	number of histogram zeros	
## Mode:	histogram mode	
## Mean:	histogram mean	
## Median:	histogram median	
## Variance:	histogram variance	
## Tendency:	histogram tendency: -1=left assymetric; 0=symmetric; 1=right assymetric	
## A:	calm sleep	
## B:	REM sleep	
## C:	calm vigilance	
## D:	active vigilance	
## SH:	shift pattern (A or Susp with shifts)	
## AD:	accelerative/decelerative pattern (stress situation)	
## DE:	decelerative pattern (vagal stimulation)	
## LD:	largely decelerative pattern	
## FS:	flat-sinusoidal pattern (pathological state)	
## SUSP:	suspect pattern	
## CLASS:	Class code (1 to 10) for classes A to SUSP	
## NSP:	Normal=1; Suspect=2; Pathologic=3	


In [None]:
df=df.drop(["FileName","Date","SegFile","b","e"],axis=1)

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df=df.dropna()

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
X=df[['LBE', 'LB', 'AC', 'FM', 'UC', 'DL',
       'DS', 'DP', 'DR']]
Y=df[["NSP"]]

## Peforming the scaling

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
Scaler=StandardScaler()
X=Scaler.fit_transform(X)


In [None]:

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=42)

## Classifiying the NSP into, Normal=1; Suspect=2; Pathologic=3

### CHecking for appropriate values of gamma

In [None]:
from sklearn.svm import SVC

svm_clf=SVC(kernel="poly",degree=6,coef0=5,gamma=0.1)
svm_clf=svm_clf.fit(X_train,y_train)
y_pred=svm_clf.predict(X_test)


## Calculating different metrics

In [None]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score, confusion_matrix

In [None]:
print(confusion_matrix(y_test,y_pred))

In [None]:
f1_score(y_test,y_pred,average='weighted')

In [None]:

accuracy_score(y_test,y_pred)

In [None]:
precision_score(y_test,y_pred,average='weighted')

In [None]:
recall_score(y_test,y_pred,average="weighted")

# Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
tree_clf=DecisionTreeClassifier(min_samples_split=6, min_samples_leaf=4, max_depth=6, )
tree_clf=tree_clf.fit(X_train,y_train)
y_pred=tree_clf.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
recall_score(y_test,y_pred,average="weighted")

In [None]:
precision_score(y_test,y_pred,average='weighted')

In [None]:
from sklearn.tree import export_graphviz
export_graphviz(
tree_clf, out_file="tree.dot",
feature_names=['LBE', 'LB', 'AC', 'FM', 'UC', 'DL',
       'DS', 'DP', 'DR'],
class_names="NSP",
rounded=True,
filled=True)

In [None]:
from subprocess import check_call
check_call(['dot','-Tpng','tree.dot','-o','tree.png'])

# Using the ensemble technique

In [None]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier

In [None]:
svm_clf=SVC(kernel="poly",degree=6,coef0=5,gamma=0.1,probability=True)
decision_tree=DecisionTreeClassifier(min_samples_split=6, min_samples_leaf=4, max_depth=6)
rnd_clf=RandomForestClassifier()
voting_clf=VotingClassifier(estimators=[("svm",svm_clf),("decision_tree",decision_tree),('rf',rnd_clf)],voting="hard")

In [None]:
voting_clf.fit(X_train,y_train)

In [None]:
for clf in (decision_tree,rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

## As, we can observe that there is a slight increase in the overall acuracy while using the ensemble model.