# Maternal Health Risk using EDA and machine learning


Many pregnant women die from pregnancy issues as a result of a lack of information on maternal health care during and after pregnancy. It is more common in rural regions and among lower-middle-class families in emerging countries. During pregnancy, every minute should be observed to ensure the proper growth of the baby and the safe delivery.

<img src="https://miro.medium.com/max/400/1*rmj7B0EumeMHL4SiW6tD5Q.gif" width="600px">



# 5 Algorithms Classification

We used dataprep EDA and 5 algorithms Classification



* Support Vector Machine(RBF)
* Xgboost
* Decision Tree
* Random Forest
* Gaussian Naive Bayes



Data Set Information:


Data has been collected from different hospitals, community clinics, maternal health cares through the IoT based risk monitoring system.

* Age: Age in years when a woman is pregnant.
* SystolicBP: Upper value of Blood Pressure in mmHg, another significant attribute during pregnancy.
* DiastolicBP: Lower value of Blood Pressure in mmHg, another significant attribute during pregnancy.
* BS: Blood glucose levels is in terms of a molar concentration, mmol/L.
* HeartRate: A normal resting heart rate in beats per minute.
* Risk Level: Predicted Risk Intensity Level during pregnancy considering the previous attribute.


In [None]:
pip install dataprep

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv('../input/maternal-health-risk-data/Maternal Health Risk Data Set.csv')
df.head()

In [None]:
df.tail()

In [None]:
df.dtypes

In [None]:
df.info()

In [None]:
df.shape

In [None]:
#Miss Valuse
df.isnull().sum()

In [None]:
#Duplicated value
data_dup = df.duplicated().any()
data_dup

In [None]:
df = df.drop_duplicates()

In [None]:
data_dup = df.duplicated().any()
data_dup

In [None]:
ca_val=[]
co_val=[]

for column in df.columns:
    if df[column].nunique() <=10:
        ca_val.append(column)
    else:
        co_val.append(column)

In [None]:
#Categorical Data
ca_val

In [None]:
df['BodyTemp'].unique()

In [None]:
df['RiskLevel'].unique()

In [None]:
co_val

In [None]:
df['Age'].unique()

In [None]:
df['SystolicBP'].unique()

In [None]:
df['DiastolicBP'].unique()

In [None]:
df['BS'].unique()

In [None]:
df['HeartRate'].unique()

In [None]:
RiskLevel = {'low risk':1,
        'mid risk':2,
        'high risk':3}

# apply using map
df['RiskLevel'] = df['RiskLevel'].map(RiskLevel).astype(float)
df

In [None]:
df['RiskLevel'].value_counts()

In [None]:
df.describe()

In [None]:
df[['RiskLevel','Age']].corr()

In [None]:
df[['RiskLevel','SystolicBP']].corr()

In [None]:
df[['RiskLevel','DiastolicBP']].corr()

In [None]:
df[['RiskLevel','BS']].corr()

In [None]:
df[['RiskLevel','HeartRate']].corr()

In [None]:
from dataprep.eda import plot
plot(df)

In [None]:
plot(df,'Age')

In [None]:
plot(df,'SystolicBP')

In [None]:
plot(df,'DiastolicBP')

In [None]:
plot(df,'BS')

In [None]:
plot(df,'HeartRate')

In [None]:
plot(df,'RiskLevel')

In [None]:
df


In [None]:
from dataprep.eda import plot_correlation
plot_correlation(df)

In [None]:
import matplotlib.pyplot as plt
df.hist(bins=50, figsize=(20, 15))
plt.show()

In [None]:
df['RiskLevel'].value_counts()

df['RiskLevel'].value_counts() * 100 / len(df)


sns.countplot(x='RiskLevel', data=df, palette='viridis')

In [None]:
X = df.drop('RiskLevel',axis=1)
y = df['RiskLevel']

In [None]:
X

In [None]:
y

In [None]:
#Splitting The Dataset

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,
                                               random_state=42)
y_test

In [None]:
print(f'Training Shape x:',X_train.shape)
print(f'Testing Shape x:',X_test.shape)
print('*****___________*****___________*****')
print(f'Training Shape y:',X.shape)
print(f'Testing Shape y:',y.shape)

In [None]:
#StandardScaler

ss = StandardScaler()

X_train = ss.fit_transform(X_train)

X_test= ss.transform(X_test)


In [None]:
#Applying SVC (Support Vector Classification)
from sklearn.svm import SVC

#Create svm
svm = SVC(kernel='rbf', random_state=0, gamma=.10, C=1.0)
svm.fit(X_train, y_train)
print("Train accuracy:",svm.score(X_train,y_train))
print("Test accuracy:",svm.score(X_test,y_test))

y_pred = svm.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, svm.predict(X_test)))

In [None]:
#Applying XGBoost
import xgboost as xgb

#Create XGBClassifier
xgb_clf = xgb.XGBClassifier()

xgb_clf = xgb_clf.fit(X_train, y_train)

print("Train accuracy:",xgb_clf.score(X_train,y_train))
print("Test accuracy:",xgb_clf.score(X_test,y_test))

y_pred = xgb_clf.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, xgb_clf.predict(X_test)))


In [None]:
#Applying Decision Tree
from sklearn import tree

#Create tree object
decision_tree = tree.DecisionTreeClassifier(criterion='gini')

#Train DT based on scaled training set
decision_tree.fit(X_train, y_train)

print("Train accuracy:",decision_tree.score(X_train,y_train))
print("Test accuracy:",decision_tree.score(X_test,y_test))

y_pred = decision_tree.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, decision_tree.predict(X_test)))

In [None]:
#Applying RandomForest
from sklearn.ensemble import RandomForestClassifier

#Create Random Forest object
random_forest = RandomForestClassifier()

#Train model
random_forest.fit(X_train, y_train)

print("Train accuracy:",random_forest.score(X_train,y_train))
print("Test accuracy:",random_forest.score(X_test,y_test))

y_pred = random_forest.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, random_forest.predict(X_test)))

In [None]:
#Applying GaussianNB
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train , y_train)

print("Train accuracy:",nb.score(X_train,y_train))
print("Test accuracy:",nb.score(X_test,y_test))

y_pred = nb.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, nb.predict(X_test)))