# Human Emotion Detector Using EEG Signals
Simple ML pipeline using Linear Regression and Logistic Regression.


In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


## 1. Dataset Loading and Exploration
- Loads EEG data from multiple subject files  
- Displays dataset statistics and sample data  
- Provides comprehensive data overview  


In [None]:
DATASET_DIR = Path("dataset")
MAX_SUBJECTS = 5
N_CHANNELS = 19

def load_eeg_data(dataset_dir: Path, max_subjects: int = 5) -> pd.DataFrame:
    files = sorted([f for f in dataset_dir.iterdir() if f.name.startswith("s") and f.suffix == ".csv"])
    files = files[:max_subjects]

    all_data = []
    for file in files:
        df = pd.read_csv(file, header=None)
        df.columns = [f"EEG_Ch_{i + 1}" for i in range(N_CHANNELS)]
        all_data.append(df)

    return pd.concat(all_data, ignore_index=True)

data = load_eeg_data(DATASET_DIR, max_subjects=MAX_SUBJECTS)
data.head(3)


In [None]:
print("Shape:", data.shape)
data.describe().loc[["mean", "std", "min", "max"]].round(3)


## 2. Data Preprocessing
- Handles missing values using channel means  
- Normalizes features using StandardScaler  
- Creates synthetic emotion labels (continuous and binary)  
- Splits data into training and testing sets  


In [None]:
eeg_cols = [f"EEG_Ch_{i + 1}" for i in range(N_CHANNELS)]

# Fill missing values
data[eeg_cols] = data[eeg_cols].fillna(data[eeg_cols].mean())

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(data[eeg_cols].values)

# Synthetic labels
rng = np.random.default_rng(42)
weights = rng.normal(0, 1, N_CHANNELS)
weights[[0, 4, 9, 14]] *= 2.0
scores = X @ weights
y_cont = (scores - scores.min()) / (scores.max() - scores.min()) * 10
y_bin = (y_cont > np.median(y_cont)).astype(int)

# Split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_cont, test_size=0.2, random_state=42)
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X, y_bin, test_size=0.2, random_state=42, stratify=y_bin)


## 3. Linear Regression Analysis
- Predicts continuous emotion intensity scores (0–10 scale)  
- Evaluates using MSE and R²  
- Displays feature importance based on coefficients  


In [None]:
lin_model = LinearRegression()
lin_model.fit(X_train_c, y_train_c)

y_pred = lin_model.predict(X_test_c)
mse = mean_squared_error(y_test_c, y_pred)
r2 = r2_score(y_test_c, y_pred)

print("MSE:", round(mse, 4))
print("R²:", round(r2, 4))

# Top features
coef_importance = np.abs(lin_model.coef_)
top_idx = np.argsort(coef_importance)[-5:][::-1]
for idx in top_idx:
    print(f"EEG_Ch_{idx+1}: {coef_importance[idx]:.4f}")


## 4. Logistic Regression Analysis
- Classifies emotions as High vs Low  
- Evaluates using accuracy and confusion matrix  
- Provides classification report with precision, recall, F1-score  


In [None]:
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train_b, y_train_b)

y_pred_b = log_model.predict(X_test_b)
acc = accuracy_score(y_test_b, y_pred_b)
cm = confusion_matrix(y_test_b, y_pred_b)

print("Accuracy:", round(acc, 4))
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(classification_report(y_test_b, y_pred_b, target_names=["Low", "High"]))


## 5. Model Comparison and Discussion
- Compares performance of both models  
- Discusses limitations of classical regression approaches  
- References advanced multi-reservoir ESN methods  


In [None]:
print("Linear Regression MSE:", round(mse, 4))
print("Logistic Regression Accuracy:", round(acc, 4))
print("Limitations: linear models may miss non-linear EEG patterns.")
print("Advanced methods (e.g., reservoir computing) could improve results.")
