# **Backward Propogation Classification - Healthcare Prediction**

## **Importing Libararies**

In [251]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import tensorflow
import keras
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MinMaxScaler
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report

## **Reading Data and Header View**

In [252]:
df = pd.read_csv('healthcare_dataset.csv')
df.head()

Unnamed: 0,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Doctor,Hospital,Insurance Provider,Billing Amount,Room Number,Admission Type,Discharge Date,Medication,Test Results
0,Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Matthew Smith,Sons and Miller,Blue Cross,18856.281306,328,Urgent,2024-02-02,Paracetamol,Normal
1,LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Samantha Davies,Kim Inc,Medicare,33643.327287,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Tiffany Mitchell,Cook PLC,Aetna,27955.096079,205,Emergency,2022-10-07,Aspirin,Normal
3,andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Kevin Wells,"Hernandez Rogers and Vang,",Medicare,37909.78241,450,Elective,2020-12-18,Ibuprofen,Abnormal
4,adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Kathleen Hanna,White-White,Aetna,14238.317814,458,Urgent,2022-10-09,Penicillin,Abnormal


## **Shape of Data**

In [253]:
df.shape

(55500, 15)

## **Info about Data**

In [254]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55500 entries, 0 to 55499
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Name                55500 non-null  object 
 1   Age                 55500 non-null  int64  
 2   Gender              55500 non-null  object 
 3   Blood Type          55500 non-null  object 
 4   Medical Condition   55500 non-null  object 
 5   Date of Admission   55500 non-null  object 
 6   Doctor              55500 non-null  object 
 7   Hospital            55500 non-null  object 
 8   Insurance Provider  55500 non-null  object 
 9   Billing Amount      55500 non-null  float64
 10  Room Number         55500 non-null  int64  
 11  Admission Type      55500 non-null  object 
 12  Discharge Date      55500 non-null  object 
 13  Medication          55500 non-null  object 
 14  Test Results        55500 non-null  object 
dtypes: float64(1), int64(2), object(12)
memory usage: 6.4

## **Null Values Distribution**

In [255]:
df.isnull().sum()

Name                  0
Age                   0
Gender                0
Blood Type            0
Medical Condition     0
Date of Admission     0
Doctor                0
Hospital              0
Insurance Provider    0
Billing Amount        0
Room Number           0
Admission Type        0
Discharge Date        0
Medication            0
Test Results          0
dtype: int64

## **Columns Names**

In [256]:
df.columns

Index(['Name', 'Age', 'Gender', 'Blood Type', 'Medical Condition',
       'Date of Admission', 'Doctor', 'Hospital', 'Insurance Provider',
       'Billing Amount', 'Room Number', 'Admission Type', 'Discharge Date',
       'Medication', 'Test Results'],
      dtype='object')

## **Target Column Value Counts**

In [257]:
df['Admission Type'].value_counts()

Admission Type
Elective     18655
Urgent       18576
Emergency    18269
Name: count, dtype: int64

## **Encoding Target Column**

In [258]:
oe = OrdinalEncoder()
df['Admission Type'] = oe.fit_transform(df[['Admission Type']]).flatten()

In [259]:
df['Admission Type'].value_counts()

Admission Type
0.0    18655
2.0    18576
1.0    18269
Name: count, dtype: int64

## **Train Test Split**

In [260]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=[
    'Name', 'Date of Admission', 'Admission Type', 'Discharge Date'
]),
                                                    df['Admission Type'],
                                                    test_size=0.2,
                                                    random_state=42)

## **Seprating Numerical and Categorical Columns Indices**

In [261]:
numerical_columns = [0, 7, 8]
nominal_columns = [1, 2, 3, 4, 5, 6]
ordinal_columns = [9, 10]

## **Creating Pipelines**

In [262]:
handle_numerical = Pipeline(steps=[
    ('impute', KNNImputer(n_neighbors=5)),
    ('scale', MinMaxScaler())
])

In [263]:
handle_nominal = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OneHotEncoder(drop='first', handle_unknown='ignore'))
])

In [264]:
hanlde_ordinal = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OrdinalEncoder())
])

## **Combining Pipelines into Transformers**

In [265]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns),
    ('nominal', handle_nominal, nominal_columns),
    ('ordinal', hanlde_ordinal, ordinal_columns)
])

## **Applying Transformation on Train and Test Data**

In [266]:
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

## **Applying Model**

In [267]:
model = Sequential()

In [268]:
model.add(Dense(128,activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(16, activation='sigmoid'))
model.add(Dense(8, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid')) 

In [269]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [270]:
history = model.fit(X_train, y_train, epochs=100, batch_size=1000, validation_split=0.2)

Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 111ms/step - accuracy: 0.3308 - loss: 0.7420 - val_accuracy: 0.3241 - val_loss: 0.5801
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.3334 - loss: 0.5398 - val_accuracy: 0.3241 - val_loss: 0.4409
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.3281 - loss: 0.4081 - val_accuracy: 0.3241 - val_loss: 0.3522
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.3297 - loss: 0.3260 - val_accuracy: 0.3241 - val_loss: 0.2916
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 99ms/step - accuracy: 0.3307 - loss: 0.2692 - val_accuracy: 0.3241 - val_loss: 0.2481
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 104ms/step - accuracy: 0.3294 - loss: 0.2300 - val_accuracy: 0.3241 - val_loss: 0.2152
Epoch 7/100
[1m36/36[0m 

## **Prediction Through Model**

In [271]:
y_pred = model.predict(X_test)

[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


## **Evaluating Model's Performance**

In [272]:
y_pred = (y_pred > 0.5).astype(int)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      3664
         1.0       0.33      1.00      0.50      3656
         2.0       0.00      0.00      0.00      3780

    accuracy                           0.33     11100
   macro avg       0.11      0.33      0.17     11100
weighted avg       0.11      0.33      0.16     11100

