# Human Emotion Detector Using EEG Signals
Simple ML pipeline using Linear Regression and Logistic Regression.

This notebook explains **every line of code** with inline comments.


In [None]:
import numpy as np  # numerical arrays and math
import pandas as pd  # data tables (DataFrames)
from pathlib import Path  # clean file paths

from sklearn.linear_model import LinearRegression, LogisticRegression  # ML models
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix, classification_report  # evaluation
from sklearn.model_selection import train_test_split  # split data
from sklearn.preprocessing import StandardScaler  # normalize features


## 1. Dataset Loading and Exploration
- Loads EEG data from multiple subject files  
- Displays dataset statistics and sample data  
- Provides comprehensive data overview  


In [None]:
DATASET_DIR = Path("dataset")  # folder containing EEG CSV files
MAX_SUBJECTS = 5  # number of subject files to load
N_CHANNELS = 19  # EEG channels per file

def load_eeg_data(dataset_dir: Path, max_subjects: int = 5) -> pd.DataFrame:  # function to load EEG files
    files = sorted([f for f in dataset_dir.iterdir() if f.name.startswith("s") and f.suffix == ".csv"])  # list CSVs
    files = files[:max_subjects]  # keep only the first N files

    all_data = []  # store each subject DataFrame
    for file in files:  # loop over subject files
        df = pd.read_csv(file, header=None)  # read raw EEG values
        df.columns = [f"EEG_Ch_{i + 1}" for i in range(N_CHANNELS)]  # name columns
        all_data.append(df)  # collect subject data

    return pd.concat(all_data, ignore_index=True)  # merge into one DataFrame

data = load_eeg_data(DATASET_DIR, max_subjects=MAX_SUBJECTS)  # load EEG data
data.head(3)  # show first 3 rows


In [None]:
print("Shape:", data.shape)  # show rows and columns
data.describe().loc[["mean", "std", "min", "max"]].round(3)  # summary stats


## 2. Data Preprocessing
- Handles missing values using channel means  
- Normalizes features using StandardScaler  
- Creates synthetic emotion labels (continuous and binary)  
- Splits data into training and testing sets  


In [None]:
eeg_cols = [f"EEG_Ch_{i + 1}" for i in range(N_CHANNELS)]  # list of channel names

data[eeg_cols] = data[eeg_cols].fillna(data[eeg_cols].mean())  # fill missing values

scaler = StandardScaler()  # create scaler object
X = scaler.fit_transform(data[eeg_cols].values)  # normalize EEG features

rng = np.random.default_rng(42)  # random generator (fixed seed)
weights = rng.normal(0, 1, N_CHANNELS)  # random weights per channel
weights[[0, 4, 9, 14]] *= 2.0  # emphasize some channels
scores = X @ weights  # weighted sum (continuous score)
y_cont = (scores - scores.min()) / (scores.max() - scores.min()) * 10  # scale 0-10
y_bin = (y_cont > np.median(y_cont)).astype(int)  # binary label (High/Low)

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_cont, test_size=0.2, random_state=42)  # split for linear
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X, y_bin, test_size=0.2, random_state=42, stratify=y_bin)  # split for logistic


## 3. Linear Regression Analysis
- Predicts continuous emotion intensity scores (0–10 scale)  
- Evaluates using MSE and R²  
- Displays feature importance based on coefficients  


In [None]:
lin_model = LinearRegression()  # create linear regression model
lin_model.fit(X_train_c, y_train_c)  # train model

y_pred = lin_model.predict(X_test_c)  # predict intensity scores
mse = mean_squared_error(y_test_c, y_pred)  # compute MSE
r2 = r2_score(y_test_c, y_pred)  # compute R^2

print("MSE:", round(mse, 4))  # display MSE
print("R²:", round(r2, 4))  # display R^2

coef_importance = np.abs(lin_model.coef_)  # coefficient magnitudes
top_idx = np.argsort(coef_importance)[-5:][::-1]  # top 5 indices
for idx in top_idx:  # loop over top features
    print(f"EEG_Ch_{idx+1}: {coef_importance[idx]:.4f}")  # show importance


## 4. Logistic Regression Analysis
- Classifies emotions as High vs Low  
- Evaluates using accuracy and confusion matrix  
- Provides classification report with precision, recall, F1-score  


In [None]:
log_model = LogisticRegression(max_iter=1000)  # create logistic regression model
log_model.fit(X_train_b, y_train_b)  # train classifier

y_pred_b = log_model.predict(X_test_b)  # predict class labels
acc = accuracy_score(y_test_b, y_pred_b)  # compute accuracy
cm = confusion_matrix(y_test_b, y_pred_b)  # compute confusion matrix

print("Accuracy:", round(acc, 4))  # display accuracy
print("Confusion Matrix:")  # label for confusion matrix
print(cm)  # print confusion matrix
print("Classification Report:")  # label for report
print(classification_report(y_test_b, y_pred_b, target_names=["Low", "High"]))  # precision/recall/F1


## 5. Model Comparison and Discussion
- Compares performance of both models  
- Discusses limitations of classical regression approaches  
- References advanced multi-reservoir ESN methods  


In [None]:
print("Linear Regression MSE:", round(mse, 4))  # show linear MSE
print("Logistic Regression Accuracy:", round(acc, 4))  # show logistic accuracy
print("Limitations: linear models may miss non-linear EEG patterns.")  # limitation note
print("Advanced methods (e.g., reservoir computing) could improve results.")  # future note
