In [None]:
#Importing necessary libraries and loading data

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

data = pd.read_csv(r"/content/Maternal Health Risk Data Set.csv")

X = data.drop('RiskLevel', axis=1)
y = data['RiskLevel']

label_mapping = {'low risk': 0, 'mid risk': 1, 'high risk': 2}
y = y.map(label_mapping)

scaler = StandardScaler()
X = scaler.fit_transform(X)
print(X)
print(y)

[[-0.36173812  0.91339632  0.25502279  1.90589019 -0.4852155   1.44695615]
 [ 0.38077697  1.45702716  0.97553854  1.29833966 -0.4852155  -0.53208757]
 [-0.06473208 -1.26112705 -0.46549297 -0.22053665  0.97388449  0.70481475]
 ...
 [ 0.38077697 -1.53294248 -1.18600873  3.12099124 -0.4852155   1.44695615]
 [ 0.97478904  0.36976548  0.97553854  2.81721597 -0.4852155  -0.53208757]
 [ 0.15802244  0.36976548 -0.82575085 -0.82808717  1.70343448  0.21005383]]
0       2
1       2
2       2
3       2
4       0
       ..
1009    2
1010    2
1011    2
1012    2
1013    1
Name: RiskLevel, Length: 1014, dtype: int64


In [None]:
print("Shape of the DataFrame:", data.shape)

print("\nInformation about the DataFrame:")
print(data.info())

print("\nDescriptive statistics:")
print(data.describe())

print("\nFirst 5 rows of the DataFrame:")
print(data.head())

print("\nLast 5 rows of the DataFrame:")
print(data.tail())

print("\nColumn labels of the DataFrame:")
print(data.columns)

print("\nNumber of null values in each column:")
print(data.isnull().sum())

print("\nFrequency counts of unique values in the 'RiskLevel' column:")
print(data['RiskLevel'].value_counts())

Shape of the DataFrame: (1014, 7)

Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1014 entries, 0 to 1013
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          1014 non-null   int64  
 1   SystolicBP   1014 non-null   int64  
 2   DiastolicBP  1014 non-null   int64  
 3   BS           1014 non-null   float64
 4   BodyTemp     1014 non-null   float64
 5   HeartRate    1014 non-null   int64  
 6   RiskLevel    1014 non-null   object 
dtypes: float64(2), int64(4), object(1)
memory usage: 55.6+ KB
None

Descriptive statistics:
               Age   SystolicBP  DiastolicBP           BS     BodyTemp  \
count  1014.000000  1014.000000  1014.000000  1014.000000  1014.000000   
mean     29.871795   113.198225    76.460552     8.725986    98.665089   
std      13.474386    18.403913    13.885796     3.293532     1.371384   
min      10.000000    70.000000    49.000000     6.000000  

In [None]:
#Splitting data into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Defining and training XGBoost, Random Forest, and TensorFlow models

xgb_model = xgb.XGBClassifier()
rf_model = RandomForestClassifier()
tf_model = Sequential()
tf_model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
tf_model.add(Dense(128, activation='relu'))
tf_model.add(Dense(128, activation='relu'))
tf_model.add(Dense(3, activation='softmax'))

tf_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

xgb_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
tf_model.fit(X_train, y_train, epochs=150, batch_size=32)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.src.callbacks.History at 0x7a6778711630>

In [None]:
#Making predictions on the validation data using the trained models

xgb_preds = xgb_model.predict(X_val)
rf_preds = rf_model.predict(X_val)
tf_probs = tf_model.predict(X_val)
tf_preds = np.round(tf_probs).astype(int)



In [None]:
# Creating a new feature set by concatenating the predictions of XGBoost, Random Forest, and TensorFlow models
meta_X = np.concatenate((X_val, xgb_preds.reshape(-1, 1), rf_preds.reshape(-1, 1), tf_preds), axis=1)

In [None]:
#Training a meta-learner (second-level model) on the new feature set

meta_model = xgb.XGBClassifier()

params = {'max_depth': [3, 5, 7], 'learning_rate': [0.1, 0.01], 'n_estimators': [100, 200, 300]}

grid_search = GridSearchCV(meta_model, params, cv=5, scoring='accuracy')
grid_search.fit(meta_X, y_val)

best_meta_model = grid_search.best_estimator_


In [None]:
#Making final predictions on the validation data using the meta-learner

meta_preds = best_meta_model.predict(meta_X)


In [None]:
#Evaluating the meta-learner using classification report

print(classification_report(y_val, meta_preds))


              precision    recall  f1-score   support

           0       0.99      0.93      0.95        80
           1       0.94      0.95      0.94        76
           2       0.92      1.00      0.96        47

    accuracy                           0.95       203
   macro avg       0.95      0.96      0.95       203
weighted avg       0.95      0.95      0.95       203

