In [None]:
#WORKING

#new process for eeg prediction 

# EEG Prediction Using Trained Random Forest Model (With Feature Extraction)
import pandas as pd
import numpy as np
import os
import joblib
from scipy.stats import skew, kurtosis
from scipy.signal import butter, filtfilt, iirnotch
from tqdm import tqdm

# 📌 Define Paths
input_folder = r"C:\Users\Kevin Tran\Documents\Project Data\Input files"
output_folder = r"C:\Users\Kevin Tran\Documents\Project Data\Predicted_EEGs"
model_path = r"C:\Users\Kevin Tran\Documents\Project Data\For Machine Learning\random_forest_model.pkl"
feature_columns_path = r"C:\Users\Kevin Tran\Documents\Project Data\For Machine Learning\feature_columns.pkl"

# ✅ Ensure Output Folder Exists
os.makedirs(output_folder, exist_ok=True)

# ✅ Load Trained Model and Expected Feature Names
print("🧠 Loading trained Random Forest model...")
rf_model = joblib.load(model_path)
expected_features = joblib.load(feature_columns_path)  # Load expected feature names

# 📌 EEG Signal Processing Functions
def apply_notch_filter(signal, fs=400, freq=60.0, quality_factor=30):
    """Apply a notch filter to remove 60Hz noise."""
    b, a = iirnotch(w0=freq, Q=quality_factor, fs=fs)
    return filtfilt(b, a, signal)

def apply_bandpass_filter(signal, fs=400, lowcut=0.5, highcut=40.0, order=5):
    """Apply a bandpass filter to keep frequencies between 0.5Hz and 40Hz."""
    nyquist = 0.5 * fs
    low, high = lowcut / nyquist, highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)

def normalize_signal(signal):
    """Normalize EEG signal to have zero mean and unit variance."""
    return (signal - np.mean(signal)) / np.std(signal)

# 📌 Feature Extraction Functions
def extract_time_features(signal):
    return {
        "mean": np.mean(signal),
        "variance": np.var(signal),
        "skewness": skew(signal),
        "kurtosis": kurtosis(signal),
        "rms": np.sqrt(np.mean(signal**2)),
        "zero_crossing_rate": np.sum(np.diff(np.sign(signal)) != 0) / len(signal),
        "mean_abs": np.mean(np.abs(signal)),   # Added missing feature
        "diff_rms1": np.sqrt(np.mean(np.diff(signal) ** 2)),  # Added missing feature
        "diff_rms2": np.sqrt(np.mean(np.diff(signal, n=2) ** 2))  # Added missing feature
    }

def extract_frequency_features(signal, fs):
    """Extract frequency-based features using FFT."""
    L = len(signal)
    Y = np.fft.fft(signal)
    P2 = np.abs(Y / L)
    P1 = P2[:L // 2 + 1]
    P1[1:-1] *= 2
    freqs = fs * np.arange(L // 2 + 1) / L

    # EEG frequency bands
    bands = {
        "delta": (1, 3),
        "theta": (4, 7),
        "alpha1": (8, 9),
        "alpha2": (10, 12),
        "beta1": (13, 17),
        "beta2": (18, 30),
        "gamma1": (31, 40),
        "gamma2": (41, 50),
        "higher": (51, 250),
    }

    band_powers = {name: np.sum(P1[(freqs >= low) & (freqs <= high)]) for name, (low, high) in bands.items()}
    band_powers["spectral_entropy"] = -np.sum(P1 * np.log(P1 + 1e-10))
    return band_powers

# 📌 Function to Extract Features from EEG Data
def extract_features_from_eeg(file_path):
    data = pd.read_parquet(file_path)

    # Process each EEG channel
    all_features = []
    for channel in data.columns:
        signal = data[channel].values
        signal = apply_notch_filter(signal)
        signal = apply_bandpass_filter(signal)
        signal = normalize_signal(signal)

        # Extract time and frequency features
        time_features = extract_time_features(signal)
        frequency_features = extract_frequency_features(signal, fs=400)

        # Combine into one dictionary
        combined_features = {**time_features, **frequency_features}
        all_features.append(combined_features)

    # Convert list of feature dicts into a DataFrame
    feature_df = pd.DataFrame(all_features).mean(axis=0).to_frame().T  # Aggregate across channels

    return feature_df

# 📌 Function to Process & Predict a Single EEG File
def process_and_predict(file_path):
    try:
        print(f"\n📂 Processing: {os.path.basename(file_path)}")

        # Extract Features
        feature_df = extract_features_from_eeg(file_path)

        # Ensure Columns Match Expected Features
        missing_features = set(expected_features) - set(feature_df.columns)
        extra_features = set(feature_df.columns) - set(expected_features)

        if missing_features:
            print(f"⚠️ Missing features: {missing_features}")
            for feat in missing_features:
                feature_df[feat] = 0  # Fill missing features with 0

        if extra_features:
            print(f"⚠️ Extra features found: {extra_features}")
            feature_df = feature_df[expected_features]  # Keep only expected features

        # Reorder features to match model training order
        feature_df = feature_df[expected_features]

        # Predict using trained model
        print(f"🔮 Predicting seizures for {os.path.basename(file_path)}...")
        prediction = rf_model.predict(feature_df)

        # Determine seizure presence
        seizure_detected = 1 in prediction  # If any row has 1, seizure is detected

        # Display Final Result
        if seizure_detected:
            print(f"🚨 SEIZURE DETECTED in {os.path.basename(file_path)}! 🚨")
            seizure_status = "Seizure Detected"
        else:
            print(f"✅ No seizure detected in {os.path.basename(file_path)}.")
            seizure_status = "No Seizure Detected"

        # Save predictions
        output_file = os.path.join(output_folder, f"predicted_{os.path.basename(file_path)}")
        feature_df["predicted_label"] = prediction
        feature_df.to_parquet(output_file)

        return f"✅ Prediction saved: {output_file} | Status: {seizure_status}"

    except Exception as e:
        return f"❌ Failed: {os.path.basename(file_path)} | Error: {e}"

# 📌 Run EEG Processing and Prediction
if __name__ == "__main__":
    print(f"📂 Scanning for EEG `.parquet` files in {input_folder}...")
    eeg_files = [os.path.join(input_folder, f) for f in os.listdir(input_folder) if f.endswith(".parquet")]

    if len(eeg_files) == 0:
        print("🚨 No `.parquet` files found in the input folder! Please check your folder.")
    else:
        print(f"✅ Found {len(eeg_files)} EEG files. Starting predictions...\n")
        for file in tqdm(eeg_files, desc="Processing EEG Files"):
            result = process_and_predict(file)
            print(result)  # Show final prediction status
        print(f"✅ All predictions completed! Files saved in: {output_folder}")

