<a href="https://colab.research.google.com/github/georgeroshankujur/George-Roshan-KUJUR-Portofolio/blob/main/chapter_appendix-tools-for-deep-learning/jupyter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Health Care Modelling

In [None]:
#!pip install pandas
#!pip install faker
#!pip install numpy
#!pip install seaborn
#!pip install matplotlib
#!pip install pyarrow
#!pip install scikit-learn

In [63]:
#------------------------------------------------------------------
#                4th trial to generate sythetic datasets
#------------------------------------------------------------------
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import timedelta

# Initialize Faker
Faker.seed(100) # Use a seed for reproducibility
fake = Faker('en_US')
N_RECORDS = 1000

# --- Custom/Helper Functions for realistic data generation ---

def generate_vital_signs():
    """Generates a single record for Vital Signs and Lab Results."""
    return {
        'Heart_Rate_BPM': random.randint(55, 110),
        'Systolic_BP_mmHg': random.randint(100, 160),
        'Diastolic_BP_mmHg': random.randint(60, 100),
        'Respiratory_Rate_per_min': random.randint(12, 25),
        'SpO2_percent': random.randint(90, 100), # Oxygen Saturation
        'Temperature_C': round(random.uniform(36.0, 38.5), 1),
        'Sodium_mEq_L': random.randint(135, 145),
        'Creatinine_mg_dL': round(random.uniform(0.6, 1.3), 2),
        'Platelets_k_uL': random.randint(150, 450),
        'Glucose_mg_dL': random.randint(70, 200),
        'WBC_count_x10_9_L': round(random.uniform(3.5, 12.0), 1),
        'Lab_Abnormality_Flag': np.random.choice(['None', 'Mild', 'Severe'], p=[0.7, 0.2, 0.1])
    }

def generate_medical_record(patient_id, admission_date):
    """Generates a single record for Notes and Summaries."""
    encounter_date = admission_date + timedelta(days=random.randint(0, 5)) # Encounter usually early in admission

    # Simulate a realistic Length of Stay (LOS) for the Regression Target
    los_days = np.random.poisson(lam=5) + 1 # Poisson with mean 5, min 1 day
    discharge_date = encounter_date + timedelta(days=los_days)

    common_diagnoses = ['Hypertension', 'Type 2 Diabetes', 'Pneumonia', 'Appendicitis', 'Asthma Exacerbation', 'Heart Failure', 'Stroke']

    # Choose the Primary Diagnosis first
    primary_diagnosis = random.choice(common_diagnoses)

    # Define the population for secondary diagnoses: all diagnoses EXCEPT the primary one
    secondary_population_diagnoses = [d for d in common_diagnoses if d != primary_diagnosis]

    # Sample the secondary diagnoses from the safe population (size 6)
    secondary_diagnoses = random.sample(secondary_population_diagnoses, k=random.randint(0,2))

    icd10_codes = [fake.bothify(text='C##.#') for _ in range(len(secondary_diagnoses) + 1)]
    # For Clinical Decision Support: Severity Score
    severity_score = random.randint(1, 10) # 1=Mild, 10=Critical

    return {
        'Patient_ID': patient_id,
        'Encounter_ID': fake.unique.random_int(min=100000, max=9999999999),
        'Encounter_Date': encounter_date,
        'Discharge_Date': discharge_date,
        'LOS_Days': los_days, # Explicitly calculated LOS - TARGET FOR REGRESSION
        'Attending_Physician': fake.name(),
        'Specialty': random.choice(['Internal Medicine', 'Cardiology', 'Emergency', 'General Surgery', 'Neurology', 'Pulmonology']),
        'Primary_Diagnosis': primary_diagnosis,
        'Secondary_Diagnoses': ", ".join(secondary_diagnoses) if secondary_diagnoses else None,
        'ICD10_Codes': ", ".join(icd10_codes),
        'Severity_Score_Encounter': severity_score, # Severity for Clinical Decision Support
        'Clinical_Note': fake.text(max_nb_chars=500),
        'Discharge_Summary': fake.paragraph(nb_sentences=7)
    }

def generate_medication_record(patient_id):
    """Generates a single record for Medication and Treatment Outcomes."""
    medications = ['Lisinopril', 'Metformin', 'Simvastatin', 'Amoxicillin', 'Insulin Glargine', 'Warfarin', 'Amlodipine']
    routes = ['Oral', 'IV', 'Subcutaneous', 'Topical']
    outcomes = ['Treatment Ongoing', 'Completed', 'Discontinued (Side Effect)', 'Discontinued (Ineffective)', 'Dosage Adjusted']

    return {
        'Patient_ID': patient_id,
        'Prescription_ID': fake.unique.random_int(min=100000, max=9999999999),
        'Medication_Name': random.choice(medications),
        'Dosage_mg': random.choice([5, 10, 25, 500, 1000, 20]),
        'Route': random.choice(routes),
        'Frequency': random.choice(['Once Daily', 'Twice Daily', 'Three Times Daily', 'PRN', 'Weekly']),
        'Treatment_Start': fake.date_between(start_date='-3y', end_date='-6m'),
        'Treatment_End': fake.date_between(start_date='-5m', end_date='today'),
        'Treatment_Outcome': np.random.choice(outcomes, p=[0.3, 0.25, 0.2, 0.15, 0.1])
    }

# Get a list of unique Patient IDs for consistency across datasets
fake.unique.clear()
patient_ids = [fake.unique.random_int(min=100000, max=9999999999) for _ in range(N_RECORDS)]


# --- 1. Patient demographics and admission records (NOW INCLUDING OPERATIONAL DATA) ---
demographics_data = []
for p_id in patient_ids:
    admission_date = fake.date_between(start_date='-5y', end_date='today')
    age = random.randint(18, 90)

    # --- Feature for Operational Analytics: Bed Availability ---
    # Low number = High Hospital Utilization/Busyness
    available_beds = random.randint(10, 200)
    bed_occupancy_rate = round(1 - (available_beds / 250) + random.uniform(-0.1, 0.1), 2) # Assume 250 beds total +/- noise
    hospital_busyness_flag = 'High' if available_beds < 50 else 'Low'

    # --- Features for Risk Stratification & Clinical Decision Support ---
    comorbidity_count = random.randint(0, 5) # Number of co-existing conditions
    # Higher risk if older, more comorbidities, male (simplified)
    risk_score = (age / 90) * 0.4 + (comorbidity_count / 5) * 0.4 + (1 if random.choice(['Male','Female']) == 'Male' else 0) * 0.2
    risk_score = round(risk_score * 100) # Scale to 0-100
    high_risk_flag = 'Yes' if risk_score > 70 else 'No'

    # --- Feature for Patient Experience ---
    patient_satisfaction_score = random.randint(1, 5)
    feedback_sentiment = np.random.choice(['Positive', 'Neutral', 'Negative'], p=[0.6, 0.25, 0.15])

    demographics_data.append({
        'Patient_ID': p_id,
        'DOB': fake.date_of_birth(minimum_age=18, maximum_age=90),
        'Age_at_Admission': age,
        'Gender': random.choice(['Male', 'Female', 'Other']),
        'Race': random.choice(['White', 'Black', 'Asian', 'Hispanic', 'Other']),
        'Postal_Code': fake.postcode(),
        'Admission_Date': admission_date,
        'Admission_Type': random.choice(['Emergency', 'Elective', 'Urgent']),
        'Insurance_Provider': fake.random_element(elements=('Aetna', 'Cigna', 'Medicare', 'Blue Cross', 'Self-Pay')),
        # New Operational Data
        'Available_Beds_Hospital': available_beds, # Operational Feature
        'Bed_Occupancy_Rate': bed_occupancy_rate, # Operational Feature
        'Hospital_Busyness_Flag': hospital_busyness_flag, # Operational Feature
        # Clinical Features
        'Comorbidity_Count': comorbidity_count,
        'Risk_Score': risk_score,
        'High_Risk_Flag': high_risk_flag,
        # Experience Features
        'Patient_Satisfaction_Score': patient_satisfaction_score,
        'Feedback_Sentiment': feedback_sentiment
    })
df_demographics = pd.DataFrame(demographics_data)

# --- 2. Vital signs and laboratory results ---
vital_signs_data = []
for p_id in patient_ids:
    record = generate_vital_signs()
    record['Patient_ID'] = p_id
    admission_date_for_patient = df_demographics[df_demographics['Patient_ID'] == p_id]['Admission_Date'].iloc[0]
    record['Observation_Time'] = fake.date_time_between(start_date=admission_date_for_patient, end_date=admission_date_for_patient + timedelta(days=random.randint(0, 10)))
    vital_signs_data.append(record)
df_vitals = pd.DataFrame(vital_signs_data)

# --- 3. Clinical notes and discharge summaries (INCLUDES LOS FOR REGRESSION) ---
notes_data = []
for index, row in df_demographics.head(N_RECORDS).iterrows():
    notes_data.append(generate_medical_record(row['Patient_ID'], row['Admission_Date']))
df_notes = pd.DataFrame(notes_data)

# --- Final LOS and Readmission Columns for Analysis ---
df_notes['Readmission_Risk_Score'] = np.random.randint(1, 100, size=len(df_notes))
df_notes['Expected_LOS_Category'] = pd.cut(df_notes['LOS_Days'], bins=[0, 3, 7, 14, 30, 999], labels=['Short (<3D)', 'Medium (3-7D)', 'Long (7-14D)', 'Very Long (14-30D)', 'Extended (>30D)'], right=False)


# --- 4. Medication histories and treatment outcomes ---
medication_data = [generate_medication_record(p_id) for p_id in patient_ids]
df_medications = pd.DataFrame(medication_data)


# --- Output the datasets ---

print("--- 1. Patient Demographics & Admission Records (First 3) ---")
print("**Note:** New columns 'Available_Beds_Hospital', 'Bed_Occupancy_Rate', and 'Hospital_Busyness_Flag' added for operational analysis.")
print(df_demographics.head(3).to_markdown(index=False, numalign="left"))
print("\n" + "="*120 + "\n")

print("--- 2. Vital Signs and Laboratory Results (First 3) ---")
print(df_vitals.head(3).to_markdown(index=False, numalign="left"))
print("\n" + "="*120 + "\n")

print("--- 3. Clinical Notes and Discharge Summaries (First 3) ---")
print("**Note:** New column **'LOS_Days'** is the target variable for **Regression Analysis**.")
# Limit text columns for display
display_df_notes = df_notes.copy()
display_df_notes['Clinical_Note'] = display_df_notes['Clinical_Note'].apply(lambda x: x[:40] + '...')
display_df_notes['Discharge_Summary'] = display_df_notes['Discharge_Summary'].apply(lambda x: x[:40] + '...')
print(display_df_notes.head(3).to_markdown(index=False, numalign="left"))
print("\n" + "="*120 + "\n")

print("--- 4. Medication Histories and Treatment Outcomes (First 3) ---")
print(df_medications.head(3).to_markdown(index=False, numalign="left"))

# Clear the unique generator
fake.unique.clear()

--- 1. Patient Demographics & Admission Records (First 3) ---
**Note:** New columns 'Available_Beds_Hospital', 'Bed_Occupancy_Rate', and 'Hospital_Busyness_Flag' added for operational analysis.
| Patient_ID   | DOB        | Age_at_Admission   | Gender   | Race   | Postal_Code   | Admission_Date   | Admission_Type   | Insurance_Provider   | Available_Beds_Hospital   | Bed_Occupancy_Rate   | Hospital_Busyness_Flag   | Comorbidity_Count   | Risk_Score   | High_Risk_Flag   | Patient_Satisfaction_Score   | Feedback_Sentiment   |
|:-------------|:-----------|:-------------------|:---------|:-------|:--------------|:-----------------|:-----------------|:---------------------|:--------------------------|:---------------------|:-------------------------|:--------------------|:-------------|:-----------------|:-----------------------------|:---------------------|
| 4920711987   | 1951-08-30 | 54                 | Other    | White  | 92164         | 2024-09-07       | Elective         | Medicare 

## Export the datasets for the futher use

In [70]:
df_demographics.to_csv('demographics.csv', index=False)
df_vitals.to_csv('vitals.csv', index=False)
df_notes.to_csv('notes.csv', index = False)
df_medications.to_csv('medications.csv')

# 1. Preprocessing the datasets

In [18]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import timedelta

### dataset: Demographics

In [66]:
df_vitals = pd.read_csv('vitals.csv')
df_vitals.head()

Unnamed: 0,Heart_Rate_BPM,Systolic_BP_mmHg,Diastolic_BP_mmHg,Respiratory_Rate_per_min,SpO2_percent,Temperature_C,Sodium_mEq_L,Creatinine_mg_dL,Platelets_k_uL,Glucose_mg_dL,WBC_count_x10_9_L,Lab_Abnormality_Flag,Patient_ID,Observation_Time
0,79,118,87,22,100,37.0,144,0.85,438,190,6.1,,4920711987,2024-09-09 16:56:26.056953
1,55,122,74,13,98,36.5,139,0.85,412,129,8.2,Severe,3310591232,2025-07-19 00:20:46.937607
2,100,146,75,14,95,37.0,145,1.09,332,137,10.8,,7325223548,2022-11-12 01:27:03.340913
3,98,125,85,24,93,38.2,137,1.08,366,175,6.2,Mild,7438807149,2025-05-16 22:29:04.855812
4,92,124,84,20,90,36.2,142,0.74,300,75,11.2,,3436161708,2023-04-18 18:31:48.643556


In [71]:
df_notes = pd.read_csv('notes.csv')
df_notes.head()

Unnamed: 0,Patient_ID,Encounter_ID,Encounter_Date,Discharge_Date,LOS_Days,Attending_Physician,Specialty,Primary_Diagnosis,Secondary_Diagnoses,ICD10_Codes,Severity_Score_Encounter,Clinical_Note,Discharge_Summary,Readmission_Risk_Score,Expected_LOS_Category
0,4920711987,7207505178,2024-09-11,2024-09-15,4,Shannon Alvarez,General Surgery,Pneumonia,,C87.9,5,Be generation avoid cover nearly before. Cours...,House also degree without successful prove sou...,89,Medium (3-7D)
1,3310591232,1827402518,2025-07-21,2025-07-27,6,Lori Rhodes,Internal Medicine,Hypertension,Heart Failure,"C41.3, C32.5",6,College speak author true available. Book trut...,Event mean require step role maintain drug. Se...,29,Medium (3-7D)
2,7325223548,5322267113,2022-11-12,2022-11-14,2,Jacob Carlson MD,Internal Medicine,Hypertension,Appendicitis,"C19.1, C50.3",7,Line weight clear hear wrong us investment. De...,There world name poor guess great particularly...,80,Short (<3D)
3,7438807149,7240113524,2025-05-13,2025-05-18,5,Vincent Brown,Cardiology,Asthma Exacerbation,Type 2 Diabetes,"C63.5, C15.8",4,Near cell despite more space. Like can three c...,Pay ground movement structure war usually. Mod...,72,Medium (3-7D)
4,3436161708,1654159028,2023-04-21,2023-04-27,6,Sarah Marshall,Neurology,Type 2 Diabetes,"Asthma Exacerbation, Pneumonia","C54.9, C22.3, C33.4",2,Believe scientist education.\nBillion importan...,Pull mother painting. Development focus meetin...,90,Medium (3-7D)


In [7]:
# import the dataset : df_demographics
df_demographics = pd.read_csv('demographics.csv')
df_demographics.head()

Unnamed: 0,Patient_ID,DOB,Age_at_Admission,Gender,Race,Postal_Code,Admission_Date,Admission_Type,Insurance_Provider,Available_Beds_Hospital,Bed_Occupancy_Rate,Hospital_Busyness_Flag,Comorbidity_Count,Risk_Score,High_Risk_Flag,Patient_Satisfaction_Score,Feedback_Sentiment
0,4920711987,1951-08-30,83,Male,Hispanic,92164,2024-09-07,Emergency,Medicare,20,0.84,High,3,61,No,3,Negative
1,3310591232,1953-08-27,62,Female,White,18542,2025-07-18,Elective,Self-Pay,30,0.9,High,5,68,No,1,Positive
2,7325223548,1999-12-06,59,Female,White,77598,2022-11-08,Emergency,Self-Pay,139,0.5,Low,3,50,No,4,Neutral
3,7438807149,1977-07-13,54,Other,Hispanic,95882,2025-05-09,Emergency,Self-Pay,16,1.0,High,4,76,Yes,5,Positive
4,3436161708,2001-05-08,89,Other,Asian,63851,2023-04-18,Elective,Self-Pay,198,0.15,Low,4,92,Yes,2,Negative


In [21]:
# We merge with "inner merge" to only keep patients in both files.

df = pd.merge(
    left=df_demographics,
    right=df_vitals,
    on='Patient_ID',
    how='left'  # Use 'left' to keep all patient records from demographics
)

print(df.head(15))

    Patient_ID         DOB  Age_at_Admission  Gender      Race  Postal_Code  \
0   4920711987  1951-08-30                83    Male  Hispanic        92164   
1   3310591232  1953-08-27                62  Female     White        18542   
2   7325223548  1999-12-06                59  Female     White        77598   
3   7438807149  1977-07-13                54   Other  Hispanic        95882   
4   3436161708  2001-05-08                89   Other     Asian        63851   
5   2288894160  1947-07-01                48   Other  Hispanic        94506   
6   8934292135  1988-11-26                88  Female  Hispanic        23441   
7   6253339943  1947-08-01                63   Other     White        18403   
8   8795709919  1954-11-21                55    Male     Black        73356   
9   4067769068  1971-10-04                30  Female     White        58802   
10  1440420622  1988-10-15                75  Female     Other        93906   
11   766583143  2003-05-13                88   Other

### 1.2. Missing values

In [22]:
# Handling missing values
df.isnull().sum()

Unnamed: 0,0
Patient_ID,0
DOB,0
Age_at_Admission,0
Gender,0
Race,0
Postal_Code,0
Admission_Date,0
Admission_Type,0
Insurance_Provider,0
Available_Beds_Hospital,0


#### 1.3. converting to appropriate variables types

In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Age_at_Admission            1000 non-null   int64         
 1   Gender                      1000 non-null   category      
 2   Race                        1000 non-null   category      
 3   Admission_Date              1000 non-null   datetime64[ns]
 4   Admission_Type              1000 non-null   category      
 5   Insurance_Provider          1000 non-null   category      
 6   Available_Beds_Hospital     1000 non-null   int64         
 7   Bed_Occupancy_Rate          1000 non-null   float64       
 8   Hospital_Busyness_Flag      1000 non-null   category      
 9   Comorbidity_Count           1000 non-null   int64         
 10  Risk_Score                  1000 non-null   int64         
 11  High_Risk_Flag              1000 non-null   object       

In [58]:
# Converting to appropriate variable types
df['Gender'] = df['Gender'].astype('category')
df['Race'] = df['Race'].astype('category')
df['Admission_Type'] = df['Admission_Type'].astype('category')
df['Insurance_Provider'] = df['Insurance_Provider'].astype('category')
df['Hospital_Busyness_Flag'] = df['Hospital_Busyness_Flag'].astype('category')
df['Lab_Abnormality_Flag'] = df['Lab_Abnormality_Flag'].astype('category')
df['Feedback_Sentiment'] = df['Feedback_Sentiment'].astype('category')
df['Observation_Time'] = pd.to_datetime(df['Observation_Time'])
df['Admission_Date'] = pd.to_datetime(df['Admission_Date'])


In [59]:
catgories_Lab_Abnormality_flag =  df['Lab_Abnormality_Flag'].unique()
print(f'Number of  categories in Lab_Abnormality_Flag: {len(catgories_Lab_Abnormality_flag)}')
print('-'*50)

count_catgories_Lab_Abnormality_flag= df['Lab_Abnormality_Flag'].value_counts()
print(f'Number of  categories in Lab_Abnormality_Flag: {count_catgories_Lab_Abnormality_flag}')

""" comments on the missing values: we have missing values in Lab_Abnormality_Flag. However, they are not missing at random.
they are probably the noraml condition of a patient.
We will consider it as a valid missing value. and fill the category as normal.
"""

df['Lab_Abnormality_Flag'].fillna('Normal', inplace=True)
after_fill_catgories_Lab_Abnormality_flag= df['Lab_Abnormality_Flag'].value_counts()
print('-'*50)
print(f'Number of categories in Lab_Abnormality_Flag: {after_fill_catgories_Lab_Abnormality_flag}')

print('-'*50)
print(df.isnull().sum())

Number of  categories in Lab_Abnormality_Flag: 3
--------------------------------------------------
Number of  categories in Lab_Abnormality_Flag: Lab_Abnormality_Flag
Normal    715
Mild      189
Severe     96
Name: count, dtype: int64
--------------------------------------------------
Number of categories in Lab_Abnormality_Flag: Lab_Abnormality_Flag
Normal    715
Mild      189
Severe     96
Name: count, dtype: int64
--------------------------------------------------
Age_at_Admission              0
Gender                        0
Race                          0
Admission_Date                0
Admission_Type                0
Insurance_Provider            0
Available_Beds_Hospital       0
Bed_Occupancy_Rate            0
Hospital_Busyness_Flag        0
Comorbidity_Count             0
Risk_Score                    0
High_Risk_Flag                0
Patient_Satisfaction_Score    0
Feedback_Sentiment            0
Heart_Rate_BPM                0
Systolic_BP_mmHg              0
Diastolic_BP_mm

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Lab_Abnormality_Flag'].fillna('Normal', inplace=True)


In [60]:
# To keep the confidentiality, it's worth dropping Patient_ID
#df.drop('Patient_ID','DOB', 'Postal_Code', axis=1, inplace=True)

df = df.copy()
print(df.head(3))

   Age_at_Admission  Gender      Race Admission_Date Admission_Type  \
0                83    Male  Hispanic     2024-09-07      Emergency   
1                62  Female     White     2025-07-18       Elective   
2                59  Female     White     2022-11-08      Emergency   

  Insurance_Provider  Available_Beds_Hospital  Bed_Occupancy_Rate  \
0           Medicare                       20                0.84   
1           Self-Pay                       30                0.90   
2           Self-Pay                      139                0.50   

  Hospital_Busyness_Flag  Comorbidity_Count  ...  Respiratory_Rate_per_min  \
0                   High                  3  ...                        25   
1                   High                  5  ...                        16   
2                    Low                  3  ...                        16   

  SpO2_percent  Temperature_C Sodium_mEq_L  Creatinine_mg_dL  Platelets_k_uL  \
0           94           36.3          145   

In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Age_at_Admission            1000 non-null   int64         
 1   Gender                      1000 non-null   category      
 2   Race                        1000 non-null   category      
 3   Admission_Date              1000 non-null   datetime64[ns]
 4   Admission_Type              1000 non-null   category      
 5   Insurance_Provider          1000 non-null   category      
 6   Available_Beds_Hospital     1000 non-null   int64         
 7   Bed_Occupancy_Rate          1000 non-null   float64       
 8   Hospital_Busyness_Flag      1000 non-null   category      
 9   Comorbidity_Count           1000 non-null   int64         
 10  Risk_Score                  1000 non-null   int64         
 11  High_Risk_Flag              1000 non-null   object       

### 1.3. Normalization

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

 'Age_at_Admission', 'Gender', 'Race', 'Postal_Code',
       'Admission_Date', 'Admission_Type', 'Insurance_Provider',
       'Available_Beds_Hospital', 'Bed_Occupancy_Rate',
       'Hospital_Busyness_Flag', 'Comorbidity_Count', ,
       , , 'Feedback_Sentiment',
       'Heart_Rate_BPM', 'Systolic_BP_mmHg', 'Diastolic_BP_mmHg',
       'Respiratory_Rate_per_min', 'SpO2_percent', 'Temperature_C',
       'Sodium_mEq_L', 'Creatinine_mg_dL', 'Platelets_k_uL', 'Glucose_mg_dL',
       'WBC_count_x10_9_L', , 'Observation_Time'

# Target variables
targets = df['Risk_Score', 'High_Risk_Flag', 'Patient_Satisfaction_Score', 'Lab_Abnormality_Flag']

# Normalization for the Countinous variables
feature = ['Age_at_Admission', 'Available_Beds_Hospital', 'Bed_Occupancy_Rate',
       'Hospital_Busyness_Flag', 'Comorbidity_Count', ,
       , , 'Feedback_Sentiment',
       'Heart_Rate_BPM', 'Systolic_BP_mmHg', 'Diastolic_BP_mmHg',
       'Respiratory_Rate_per_min', 'SpO2_percent', 'Temperature_C',
       'Sodium_mEq_L', 'Creatinine_mg_dL', 'Platelets_k_uL', 'Glucose_mg_dL',
       'WBC_count_x10_9_L', , 'Observation_Time']
# Normalization of DataTime variables
df['Observation_Date'] = df['Observation_Time'].dt.normalize()





scaler = MinMaxScaler()


