In [None]:
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
def load_data_from_files(file_label_map):
    data_frames = []
    for file, label in file_label_map.items():
        df = pd.read_excel(file, header=None)  # No header in the file
        df['label'] = label  # Add the label column
        data_frames.append(df)
    return pd.concat(data_frames, ignore_index=True)

In [6]:
file_label_map = {
    'High.xlsx': 'high',
    'Medium.xlsx': 'medium',
    'Low.xlsx': 'low'
}

In [7]:
df = load_data_from_files(file_label_map)

In [8]:
df.columns

Index([0, 1, 2, 'label'], dtype='object')

In [10]:
# Assuming df is your DataFrame
df.columns = ['EDA', 'Temp', 'BVP', 'label']

# Convert columns to numeric (if they are strings)
df[['EDA', 'Temp', 'BVP']] = df[['EDA', 'Temp', 'BVP']].apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN (if any)
df.dropna(subset=['EDA', 'Temp', 'BVP'], inplace=True)

# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(df[['EDA', 'Temp', 'BVP']])
y = df['label']

In [11]:
# Step 1: Load the data
def load_data_with_headers(file_label_map):
    dfs = []
    for file, label in file_label_map.items():
        df = pd.read_excel(file)  # Automatically reads headers
        df['label'] = label
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

# Map filenames to their labels
file_label_map = {
    'high.xlsx': 'high',
    'medium.xlsx': 'medium',
    'low.xlsx': 'low'
}

df = load_data_with_headers(file_label_map)

#Check if columns are named correctly (optional)
print("Columns:", df.columns)

Columns: Index(['EDA', 'Temp', 'BVA', 'label'], dtype='object')


In [12]:
df=df.dropna()

In [13]:
df

Unnamed: 0,EDA,Temp,BVA,label
0,0.122355,31.281250,0.007719,high
1,0.121281,31.289062,0.008361,high
2,0.121209,31.281250,0.008915,high
3,0.123287,31.289062,0.009383,high
4,0.121868,31.281250,0.009770,high
...,...,...,...,...
204137,0.018364,29.257812,0.000968,low
204138,0.018737,29.273438,0.000926,low
204139,0.016932,29.273438,0.001038,low
204140,0.019181,29.257812,0.001276,low


In [14]:
# Step 2: Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Step 3: Feature Scaling
scaler = StandardScaler()
X = scaler.fit_transform(df[['EDA', 'Temp', 'BVA']])
y = df['label']

# Step 4: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Random Forest
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.998533724340176
Classification Report:
               precision    recall  f1-score   support

        high       1.00      1.00      1.00       260
         low       1.00      1.00      1.00        96
      medium       1.00      1.00      1.00       326

    accuracy                           1.00       682
   macro avg       1.00      1.00      1.00       682
weighted avg       1.00      1.00      1.00       682



In [22]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load the new CSV file (no headers)
new_data = pd.read_csv('cognitive_load_data.csv', header=None)  # Replace 'new_data.csv' with your file path

# Step 2: Assign column names
new_data.columns = ['EDA', 'Temp', 'BVA']

# Step 3: Scale BVP between 0 and 1
bvp_scaler = MinMaxScaler(feature_range=(0, 1))
new_data['BVA'] = bvp_scaler.fit_transform(new_data[['BVA']])

In [23]:
new_data

Unnamed: 0,EDA,Temp,BVA
0,0.115478,37.317266,0.222218
1,0.112914,37.280067,0.179241
2,0.113102,37.292933,0.038433
3,0.113223,37.324933,0.046644
4,0.113562,37.360867,0.133180
...,...,...,...
198,0.108960,36.026667,0.091205
199,0.110387,36.044500,0.056148
200,0.115521,36.068143,0.021967
201,0.125015,36.048875,0.221717


In [24]:
X_new = scaler.transform(new_data[['EDA', 'Temp', 'BVA']])  # Use transform(), not fit_transform()

# Step 3: Make predictions
numeric_predictions = clf.predict(X_new)
class_predictions = le.inverse_transform(numeric_predictions)

# Step 4: Add predictions to DataFrame
new_data['Predicted_Label'] = class_predictions

# Step 5: Save or display results
new_data.to_csv('labeled_predictions.csv', index=False)
print(new_data.head())

# Optional: Process row by row
for idx, row in new_data.iterrows():
    print(f"Row {idx}: EDA={row['EDA']:.2f}, Temp={row['Temp']:.2f}, BVA={row['BVA']:.2f} → {row['Predicted_Label']}")

        EDA       Temp       BVA Predicted_Label
0  0.115478  37.317266  0.222218             low
1  0.112914  37.280067  0.179241             low
2  0.113102  37.292933  0.038433             low
3  0.113223  37.324933  0.046644             low
4  0.113562  37.360867  0.133180             low
Row 0: EDA=0.12, Temp=37.32, BVA=0.22 → low
Row 1: EDA=0.11, Temp=37.28, BVA=0.18 → low
Row 2: EDA=0.11, Temp=37.29, BVA=0.04 → low
Row 3: EDA=0.11, Temp=37.32, BVA=0.05 → low
Row 4: EDA=0.11, Temp=37.36, BVA=0.13 → low
Row 5: EDA=0.11, Temp=37.28, BVA=0.13 → low
Row 6: EDA=0.11, Temp=37.35, BVA=0.05 → low
Row 7: EDA=0.11, Temp=37.28, BVA=0.05 → low
Row 8: EDA=0.11, Temp=37.30, BVA=0.07 → low
Row 9: EDA=0.11, Temp=37.29, BVA=0.28 → medium
Row 10: EDA=0.11, Temp=37.29, BVA=0.13 → low
Row 11: EDA=0.11, Temp=37.28, BVA=0.20 → low
Row 12: EDA=0.11, Temp=37.26, BVA=0.14 → low
Row 13: EDA=0.11, Temp=37.26, BVA=0.21 → low
Row 14: EDA=0.11, Temp=37.28, BVA=0.09 → low
Row 15: EDA=0.11, Temp=37.27, BVA=0.14

In [27]:
# After training your model, add this cell to properly save all components
import joblib
from sklearn.preprocessing import MinMaxScaler
import os

# Create directories if they don't exist
os.makedirs('models', exist_ok=True)
os.makedirs('scalers', exist_ok=True)
os.makedirs('encoders', exist_ok=True)

# 1. Fit and save BVA scaler (0 to 1)
bva_scaler = MinMaxScaler(feature_range=(0, 1))
bva_scaler.fit(df[['BVA']])  # Fit on your training data

# 2. Save all components with protocol=4 for compatibility
joblib.dump(clf, 'models/random_forest_model.pkl', protocol=4)
joblib.dump(scaler, 'models/standard_scaler.pkl', protocol=4) 
joblib.dump(bva_scaler, 'scalers/bva_scaler.pkl', protocol=4)
joblib.dump(le, 'encoders/label_encoder.pkl', protocol=4)

print("All components saved successfully!")

All components saved successfully!
