PREDICTING WHETHER A PERSON WILL FACE COMPLICATION OR NOT USING THE TRAINED MODEL BY GETTING INPUTS AS NEW RECORDS

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
#Set random seed for reproducibility
np.random.seed(42)
#We load the dataset
combined_df = pd.read_csv("combined_synthetic_dataset.csv")
combined_df

Unnamed: 0,patient_id,age,gender,diagnosis,medications,treatment_plan,heart_rate,blood_pressure_systolic,blood_pressure_diastolic,oxygen_saturation,...,cholesterol_level,hemoglobin,white_blood_cell_count,ventilator_setting,dialysis_machine,cardiac_monitor,level_of_consciousness,breathing_rate,pulse_rate,outcome
0,1,69,1,1,0,2,98,135,78,88,...,201,12.484757,8.765534,0,0,1,1,22,90,1
1,2,32,1,0,3,1,91,158,76,90,...,212,13.756085,5.239948,2,1,1,0,16,86,0
2,3,89,0,3,0,2,76,168,92,92,...,223,16.186200,6.545394,1,0,0,2,18,76,0
3,4,78,1,1,3,1,94,187,71,89,...,212,12.333985,8.614250,1,0,0,2,19,82,1
4,5,38,1,2,1,0,96,140,62,74,...,166,16.711718,12.952196,3,1,0,1,12,72,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,27,0,4,0,0,79,148,84,86,...,211,14.351245,6.895513,0,1,0,1,19,70,0
996,997,51,0,0,0,1,77,172,75,85,...,203,16.360149,12.666945,0,1,0,2,18,93,1
997,998,72,0,2,1,1,97,102,78,80,...,209,13.125202,9.766583,3,1,0,0,16,75,0
998,999,49,1,2,0,0,62,165,60,91,...,195,14.383976,7.367358,3,1,0,2,14,113,0


In [2]:
#Check fisrt 5 rows
combined_df.head()


Unnamed: 0,patient_id,age,gender,diagnosis,medications,treatment_plan,heart_rate,blood_pressure_systolic,blood_pressure_diastolic,oxygen_saturation,...,cholesterol_level,hemoglobin,white_blood_cell_count,ventilator_setting,dialysis_machine,cardiac_monitor,level_of_consciousness,breathing_rate,pulse_rate,outcome
0,1,69,1,1,0,2,98,135,78,88,...,201,12.484757,8.765534,0,0,1,1,22,90,1
1,2,32,1,0,3,1,91,158,76,90,...,212,13.756085,5.239948,2,1,1,0,16,86,0
2,3,89,0,3,0,2,76,168,92,92,...,223,16.1862,6.545394,1,0,0,2,18,76,0
3,4,78,1,1,3,1,94,187,71,89,...,212,12.333985,8.61425,1,0,0,2,19,82,1
4,5,38,1,2,1,0,96,140,62,74,...,166,16.711718,12.952196,3,1,0,1,12,72,1


In [3]:
#PREPROCESSING

#Step 1: LABEL ENCODER
label_encoders = {}
for column in combined_df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    combined_df[column] = label_encoders[column].fit_transform(combined_df[column])

In [4]:
#STEP 2: SCALING NUMERICAL FEATURES
scaler = StandardScaler()
scaled_features = scaler.fit_transform(combined_df.drop(columns=['patient_id','outcome']))
X = pd.DataFrame(scaled_features, columns=combined_df.drop(columns=['patient_id', 'outcome']).columns)

In [5]:
#STEP 3: TARGET VARIABLE
y = combined_df['outcome']

In [6]:
#SPLITTING THE DATA INTO TRAINING AND TESTING SETS
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [7]:
#TRAIN THE MODEL USING RANDOM FOREST MODEL
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [8]:
#WE MAKE PREDICTIONS
y_pred = clf.predict(X_test)

In [9]:
#EVALUATING THE MODEL
accuracy = accuracy_score(y_test,y_pred)
print(f"Model accuracy : {accuracy * 100:.2f}%")


#ENSURE THE ACCURACY IS WITHIN THE DESIRED RANGE OF 80-95%
#(OPTIONAL)

if 80 <= accuracy*100 <= 95:
    print("The model meets the accuracy requirements")
else:
    print("Model accuracy is outside the desired requirements")

Model accuracy : 91.50%
The model meets the accuracy requirements


In [11]:
# Updating the predict_new_data function to match the new features in the dataset
def predict_new_data():
    # Collect new patient data input
    new_data = {
        'age': [int(input("Enter patient's age: "))],
        'gender': [int(input("Enter patient's gender (0 for female, 1 for male): "))],
        'diagnosis': [int(input("Enter patient's diagnosis (categorical): "))],
        'medications': [int(input("Enter patient's medication code: "))],
        'treatment_plan': [int(input("Enter patient's treatment plan: "))],
        'heart_rate': [int(input("Enter patient's heart rate: "))],
        'blood_pressure_systolic': [int(input("Enter patient's systolic blood pressure: "))],
        'blood_pressure_diastolic': [int(input("Enter patient's diastolic blood pressure: "))],
        'oxygen_saturation': [int(input("Enter patient's oxygen saturation level: "))],
        'respiratory_rate': [int(input("Enter patient's respiratory rate: "))],
        'xray_findings': [int(input("Enter patient's X-ray findings (categorical): "))],
        'ct_findings': [int(input("Enter patient's CT findings (categorical): "))],
        'mri_findings': [int(input("Enter patient's MRI findings (categorical): "))],
        'blood_glucose': [int(input("Enter patient's blood glucose level: "))],
        'cholesterol_level': [int(input("Enter patient's cholesterol level: "))],
        'hemoglobin': [float(input("Enter patient's hemoglobin level: "))],
        'white_blood_cell_count': [float(input("Enter patient's white blood cell count: "))],
        'ventilator_setting': [int(input("Enter patient's ventilator setting: "))],
        'dialysis_machine': [int(input("Is patient on dialysis machine? (0 for No, 1 for Yes): "))],
        'cardiac_monitor': [int(input("Is patient on cardiac monitor? (0 for No, 1 for Yes): "))],
        'level_of_consciousness': [int(input("Enter patient's level of consciousness (categorical): "))],
        'breathing_rate': [int(input("Enter patient's breathing rate: "))],
        'pulse_rate': [int(input("Enter patient's pulse rate: "))]
    }
    
    # Convert new data into a DataFrame
    new_data_df = pd.DataFrame(new_data)
    
    # Apply label encoders for categorical features if required (assumed to be fitted earlier)
    for column in new_data_df.columns:
        if column in label_encoders:
            new_data_df[column] = label_encoders[column].transform(new_data_df[column])
    
    # Scale the numerical features using the same scaler used earlier
    new_data_scaled = scaler.transform(new_data_df)
    
    # Make the prediction using the trained classifier
    prediction = clf.predict(new_data_scaled)
    
    return prediction[0]  # Returns either 0 or 1 based on your model's classification

# After fitting, call this function to predict based on new patient data
predicted_outcome = predict_new_data()

# Output the predicted outcome
if predicted_outcome == 1:
    print("Prediction: Patient is likely to experience complications.")
else:
    print("Prediction: Patient is not likely to experience complications.")

Enter patient's age: 69
Enter patient's gender (0 for female, 1 for male): 1
Enter patient's diagnosis (categorical): 1
Enter patient's medication code: 0
Enter patient's treatment plan: 2
Enter patient's heart rate: 98
Enter patient's systolic blood pressure: 135
Enter patient's diastolic blood pressure: 78
Enter patient's oxygen saturation level: 88
Enter patient's respiratory rate: 18
Enter patient's X-ray findings (categorical): 0
Enter patient's CT findings (categorical): 1
Enter patient's MRI findings (categorical): 0
Enter patient's blood glucose level: 180
Enter patient's cholesterol level: 201
Enter patient's hemoglobin level: 12.48
Enter patient's white blood cell count: 8.7
Enter patient's ventilator setting: 0
Is patient on dialysis machine? (0 for No, 1 for Yes): 0
Is patient on cardiac monitor? (0 for No, 1 for Yes): 1
Enter patient's level of consciousness (categorical): 1
Enter patient's breathing rate: 22
Enter patient's pulse rate: 90
Prediction: Patient is likely to 

