In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('Maternal Health Risk Data Set.csv')
data.head(10)

In [None]:
data.info()

In [None]:
data.describe()

# Checking the dataset distribution

In [None]:
data['RiskLevel'].unique()

In [None]:
data['RiskLevel'].value_counts()/len(data)

In [None]:
data['RiskLevel'].value_counts().plot.pie(autopct='%.2f',figsize=(8,8))

# Seperating dependent and independent variable

In [None]:
X=data.drop(columns=['RiskLevel'])
Y=data['RiskLevel']

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
sns.pairplot(data,hue='RiskLevel')

In [None]:
plt.figure(figsize=(15,6))
sns.countplot('Age',hue='RiskLevel',data=data)

In [None]:
correlation =data.corr()
plt.figure(figsize=(12,8))
sns.heatmap(correlation,xticklabels=correlation.columns,yticklabels=correlation.columns,annot =True)

In [None]:
sns.scatterplot('DiastolicBP','SystolicBP',hue='RiskLevel',data=data)

In [None]:
sns.scatterplot('RiskLevel','BS',hue='RiskLevel',data=data)

In [None]:
sns.scatterplot('Age','BS',hue='RiskLevel',data=data)

In [None]:
sns.catplot(x="RiskLevel", y="SystolicBP", data=data, kind="box").set(title="Distribution based on SystolicBP")
sns.catplot(x="RiskLevel", y="DiastolicBP", data=data, kind="box").set(title="Distribution based on DiastolicBP")

# Scaling the dataset

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaled_X=scaler.fit_transform(X)

# Splitting the dataset

In [None]:
from sklearn.model_selection import train_test_split as tts
x_train,x_test,y_train,y_test=tts(scaled_X,Y,train_size=0.75,stratify=Y,random_state=101)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
x_train.size

In [None]:
x_test.size

# Model Building, predictions and odds ratio

# Logistic Regression 

In [None]:
from sklearn.linear_model import LogisticRegression as LR
classifier=LR(class_weight='balanced')
classifier.fit(x_train,y_train)
predicted_values=classifier.predict(x_test)
predicted_probabilities=classifier.predict_proba(x_test)
predicted_values
predicted_probabilities,predicted_probabilities.shape

In [None]:
y_train.shape

In [None]:
predicted_values.shape

In [None]:
y_test.shape

1. Confusion matrix(Logistic Regression)

In [None]:
from sklearn.metrics import confusion_matrix
cf=confusion_matrix(y_test,predicted_values)
cf

1(a)Accuracy matrix

In [None]:
classifier.score(x_test,y_test)

1(b)Calculating the precision score

In [None]:
from sklearn.metrics import precision_recall_fscore_support as PRF_summary
precision,recall,f1,support=PRF_summary(y_test,predicted_values)

In [None]:
precision

In [None]:
recall

In [None]:
f1

In [None]:
support

In [None]:
sns.heatmap(confusion_matrix(y_test,predicted_values),annot=True)

In [None]:
from sklearn.metrics import classification_report
k=classification_report(y_test,predicted_values)
print(k)

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()

In [None]:
model.fit(x_train, y_train)

In [None]:
pred= model.predict(x_test)

In [None]:
sns.heatmap(confusion_matrix(y_test,pred),annot=True)

In [None]:
print(classification_report(y_test,pred))

In [None]:
from sklearn.preprocessing import StandardScaler    
scale = StandardScaler()  
x_train= scale.fit_transform(x_train)    
x_test= scale.transform(x_test) 

In [None]:
x_train

In [None]:
modelDTC= DecisionTreeClassifier(criterion='gini', random_state=101)  
modelDTC.fit(x_train, y_train) 

In [None]:
y_pred= modelDTC.predict(x_test) 

In [None]:
from sklearn.metrics import plot_confusion_matrix

In [None]:
plot_confusion_matrix(modelDTC,x_test,y_test)

In [None]:
print(classification_report(y_test,y_pred))

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
RF= RandomForestClassifier(criterion='gini')
RF.fit(x_train,y_train)

In [None]:
predict=RF.predict(x_test)

In [None]:
plot_confusion_matrix(RF,x_test,y_test)

In [None]:
print(classification_report(y_test,predict))

In [None]:
RF2= RandomForestClassifier(criterion='entropy',max_depth=7,max_features='auto',n_estimators=50)

In [None]:
RF2.fit(x_train,y_train)

In [None]:
rf_pred=RF2.predict(x_test)

In [None]:
print(classification_report(y_test,rf_pred))

In [None]:
plot_confusion_matrix(RF2,x_test,y_test)

# Support Vector

In [None]:
from sklearn.svm import SVC

In [None]:
svc= SVC(kernel='poly')
svc.fit(x_train,y_train)

In [None]:
svc_pred=svc.predict(x_test)

In [None]:
print(classification_report(y_test,svc_pred))

In [None]:
plot_confusion_matrix(svc,x_test,y_test)