In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving the model
import glob  # For finding all case files

# Define the folder path containing the case files
folder_path = "E:/jai/uqvitalsignsdata/case*/"  # Replace with your folder path

# Use glob to find all CSV files in the folder
case_files = glob.glob(folder_path + "*.csv")
print(f"Found {len(case_files)} case files: {case_files}")

# Initialize an empty DataFrame to store combined data
df_combined = pd.DataFrame()

# Process each case file
for file in case_files:
    print(f"Processing {file}...")
    df = pd.read_csv(file)

    # Define the binary flags based on alarms and descriptions
    l = ['SpO2   LOW PERF   ', '** SpO2   LOW     ', '** etCO2  LOW     ', 'AGM NO BREATH     ']
    l2 = ['SpO2 LOW PERF', 'SpO2 LOW ', 'etCO2 LOW', 'AGM NO BREATH']
    for i in range(4):
        df[str(f"is_{l2[i]}")] = np.where(
            (df["Alarms..."] == l[i]) | 
            (df["desc1"] == l[i]) | 
            (df["desc2"] == l[i]) | 
            (df["desc3"] == l[i]), 
            1, 
            0
        )

    # Select features and target variables
    x = df[["HR", "SpO2", "etCO2", "awRR", "is_SpO2 LOW PERF", "is_SpO2 LOW ", "is_etCO2 LOW", "is_AGM NO BREATH"]]
    x = x.dropna()  # Drop rows with missing values

    # Append the data to the combined DataFrame
    df_combined = pd.concat([df_combined, x], ignore_index=True)

# Split into features (X) and targets (y)
y = df_combined[['is_SpO2 LOW PERF', 'is_SpO2 LOW ', 'is_etCO2 LOW', 'is_AGM NO BREATH']]
x = df_combined[["HR", "SpO2", "etCO2", "awRR"]]

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier for each target variable
models = {}
for target in y.columns:
    print(f"Training model for {target}...")
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(x_train, y_train[target])
    models[target] = model

    # Evaluate the model
    y_pred = model.predict(x_test)
    print(f"Accuracy for {target}: {accuracy_score(y_test[target], y_pred):.2f}")
    print(classification_report(y_test[target], y_pred))
    print("\n")

# Save the trained models to disk
for target, model in models.items():
    model_filename = f"{target}_model.pkl"  # Create a unique filename for each model
    joblib.dump(model, model_filename)  # Save the model to a file
    print(f"Model for {target} saved as {model_filename}")