In [3]:
import pandas as pd
import glob
import numpy as np
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define the folder path containing the CSV files
folder_path = "G:/mini project/csv_files/csv_files/"

# Use glob to find all CSV files in the folder
csv_files = glob.glob(folder_path + "*.csv")
print(csv_files)

# Initialize an empty DataFrame to append data
df_combined = pd.DataFrame()
l = ['SpO2   LOW PERF   ','** SpO2   LOW     ', '** etCO2  LOW     ','AGM NO BREATH     ']
l2 = ['SpO2 LOW PERF','SpO2 LOW ', 'etCO2 LOW','AGM NO BREATH']

# Iterate through each CSV file and append to the combined DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    
    df[["RelativeTimeMilliseconds","HR","SpO2","etCO2","awRR","inSEV","inO2","Alarms...","desc1","desc2","desc3","desc4","desc5","desc6","desc7"]]
    df.head()
    
    for i in range(4):
        df[str(f"is_{l2[i]}")] = np.where(
            (df["Alarms..."] == l[i]) | 
            (df["desc1"] == l[i]) | 
            (df["desc2"] == l[i]) | 
            (df["desc4"] == l[i]) | 
            (df["desc5"] == l[i]) | 
            (df["desc6"] == l[i]) | 
            (df["desc7"] == l[i]) | 
            (df["desc3"] == l[i]), 
            1, 
            0
        )
    
    x = df[["RelativeTimeMilliseconds","HR","SpO2","etCO2","awRR","is_SpO2 LOW PERF","is_SpO2 LOW ","is_etCO2 LOW","is_AGM NO BREATH","inSEV","inO2"]]
    x = x.dropna()
    df = x.sort_values(by="RelativeTimeMilliseconds")
    df.set_index("RelativeTimeMilliseconds", inplace=True)

    if x.empty:
        print(f"Skipping {file} due to empty data after dropping NaNs.")
        continue

    df_combined = pd.concat([df_combined, df], ignore_index=True)

# Now df_combined contains all the rows from the CSV files
df_combined.head()

# Define feature columns
feature_columns = ["HR", "SpO2", "etCO2", "awRR"]
label_columns = [f"is_{label}" for label in l2]

# Extract features (X) and labels (Y)
X = df_combined[feature_columns].values
Y = df_combined[label_columns].values

# Normalize features
scaler_X = MinMaxScaler()
X = scaler_X.fit_transform(X)

# Split into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier for multi-label classification
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, Y_train)

# Predict on test set
Y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(Y_test, Y_pred.round())
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(Y_test, Y_pred.round(), target_names=label_columns))

# Save the model and scaler
with open("multiclass_classifier.pkl", "wb") as model_file:
    pickle.dump(clf, model_file)

with open("scaler_X.pkl", "wb") as scaler_file:
    pickle.dump(scaler_X, scaler_file)

print("Model and scaler saved successfully!")


['G:/mini project/csv_files/csv_files\\uq_vsd_case01_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case02_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case03_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case04_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case05_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case06_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case07_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case08_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case09_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case10_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case11_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case12_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case13_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_case14_trenddata.csv', 'G:/mini project/csv_files/csv_files\\uq_vsd_ca

  df = pd.read_csv(file)


Skipping G:/mini project/csv_files/csv_files\uq_vsd_case14_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case15_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case17_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case18_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case19_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case22_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case25_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case27_trenddata.csv due to empty data after dropping NaNs.
Skipping G:/mini project/csv_files/csv_files\uq_vsd_case28_trenddata.csv due to empty data after droppin

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model and scaler saved successfully!
