## Import Library 

In [182]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import timedelta
from scipy.signal import butter, filtfilt, iirnotch, welch
from scipy.stats import entropy, skew, kurtosis, mode
import mne
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.model_selection import GroupKFold
from sklearn.metrics import ConfusionMatrixDisplay
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import RobustScaler
from imblearn.over_sampling import SMOTE

# Import Dataset

In [183]:
df = pd.read_csv('Dataset/farel.csv')
label = pd.read_csv('Dataset/label.csv')
df.head()

Unnamed: 0,timestamps,TP9,AF7,AF8,TP10,Right AUX
0,1763459000.0,-462.891,-1000.0,-888.184,-241.211,0.0
1,1763459000.0,-535.156,-1000.0,-1000.0,-222.656,0.0
2,1763459000.0,-475.586,-246.582,-531.25,-63.965,0.0
3,1763459000.0,-274.414,999.512,864.258,145.02,0.0
4,1763459000.0,-364.258,237.793,142.578,-73.73,0.0


In [184]:
label.head()

Unnamed: 0,Timestamp,Nama Lengkap,Email,Jenis Kelamin,Umur,Video ID,Rating,Confidence Score,Submitted At
0,18/11/2025 12:57:30,Athallah Azhar Aulia Hadi,athallah23004@mail.unpad.ac.id,Laki-laki,20,video_6,Biasa Saja,5,2025-11-18T05:57:27.728Z
1,18/11/2025 12:59:10,Athallah Azhar Aulia Hadi,athallah23004@mail.unpad.ac.id,Laki-laki,20,video_7,Biasa Saja,4,2025-11-18T05:59:08.355Z
2,18/11/2025 12:59:11,Athallah Azhar Aulia Hadi,athallah23004@mail.unpad.ac.id,Laki-laki,20,video_7,Biasa Saja,4,2025-11-18T05:59:09.518Z
3,18/11/2025 13:00:53,Athallah Azhar Aulia Hadi,athallah23004@mail.unpad.ac.id,Laki-laki,20,video_8,Menarik,4,2025-11-18T06:00:51.347Z
4,18/11/2025 13:02:36,Athallah Azhar Aulia Hadi,athallah23004@mail.unpad.ac.id,Laki-laki,20,video_5,Biasa Saja,4,2025-11-18T06:02:34.550Z


In [185]:
label['Nama Lengkap'].value_counts()

Nama Lengkap
Muhammad Luthfi Aziz Sunarya    14
Farrel Liesdia Putra            14
Athallah Azhar Aulia Hadi       12
Luthfi Hamam Arsyada            12
Name: count, dtype: int64

In [186]:
label = label.drop_duplicates(subset=["Nama Lengkap", "Video ID"], keep="first").reset_index(drop=True)

## Ubah Timestamps sesuai dengan yang dibutuhkan

In [187]:
def timestamp_preprocess(df):
    df['timestamps'] = pd.to_datetime(df['timestamps'], unit='s', errors='coerce')
    df['timestamps'] = df['timestamps'].dt.tz_localize('UTC').dt.tz_convert('Asia/Jakarta')
    
    return df

df = timestamp_preprocess(df)

In [188]:
def make_timestamps_unique(df):
    duplicated_mask = df['timestamps'].duplicated(keep=False)
    groups = df[duplicated_mask].groupby('timestamps')

    for ts, idxs in groups.groups.items():
        n = len(idxs)
        df.loc[idxs, 'timestamps'] += pd.to_timedelta(np.arange(n), unit='ns')

    return df

df = make_timestamps_unique(df)


## Cek Rentang Timestamps

In [189]:
def rentang(df):
    min_time = df['timestamps'].min()
    max_time = df['timestamps'].max()

    rentang_waktu = max_time - min_time

    print("Rentang timestamps:")
    print("Dari :", min_time)
    print("Sampai:", max_time)
    print("Durasi rentang:", rentang_waktu)

    return rentang_waktu.total_seconds()

In [190]:
rentang(df)

Rentang timestamps:
Dari : 2025-11-18 16:37:20.369999886+07:00
Sampai: 2025-11-18 16:55:18.733999968+07:00
Durasi rentang: 0 days 00:17:58.364000082


1078.364

In [191]:
df = assign_eeg_labels(df, label, "Farrel Liesdia Putra")

Total durasi dataset: 1078.364
Durasi blok per video: 104.0364
Durasi istirahat akhir: 44.0364


  eeg_df['confidence'] = eeg_df['confidence'].replace('Istirahat', 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eeg_df['confidence'] = eeg_df['confidence'].replace('Istirahat', 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eeg_df['video_id'] = eeg_df['video_id'].str.replace('video_', '', regex=False).replace('Istirahat', 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.ht

In [192]:
df['video_id'].value_counts()

video_id
0     122562
10     15373
5      15373
9      15373
4      15373
2      15373
6      15372
7      15372
8      15372
1      15372
3      15371
Name: count, dtype: int64

In [193]:
rentang(df)

Rentang timestamps:
Dari : 2025-11-18 16:37:20.369999886+07:00
Sampai: 2025-11-18 16:55:18.730000019+07:00
Durasi rentang: 0 days 00:17:58.360000133


1078.36

In [194]:
df.describe()

Unnamed: 0,TP9,AF7,AF8,TP10,Right AUX,rating,confidence
count,276286.0,276286.0,276286.0,276286.0,276286.0,276286.0,276286.0
mean,-15.896343,75.204275,64.405891,96.44214,0.0,0.945871,2.392499
std,121.431233,790.57843,765.186327,779.292879,0.0,1.026019,2.239286
min,-1000.0,-1000.0,-1000.0,-1000.0,0.0,0.0,0.0
25%,-76.172,-794.434,-742.676,-722.656,0.0,0.0,0.0
50%,-4.395,194.824,97.168,215.332,0.0,1.0,3.0
75%,43.457,937.988,872.559,936.523,0.0,2.0,5.0
max,999.512,999.512,999.512,999.512,0.0,3.0,5.0


In [195]:
df

Unnamed: 0,timestamps,TP9,AF7,AF8,TP10,Right AUX,video_id,rating,confidence
0,2025-11-18 16:37:20.369999886+07:00,-462.891,-1000.000,-888.184,-241.211,0.0,0,0,0
1,2025-11-18 16:37:20.374000072+07:00,-535.156,-1000.000,-1000.000,-222.656,0.0,0,0,0
2,2025-11-18 16:37:20.378000021+07:00,-475.586,-246.582,-531.250,-63.965,0.0,0,0,0
3,2025-11-18 16:37:20.381999969+07:00,-274.414,999.512,864.258,145.020,0.0,0,0,0
4,2025-11-18 16:37:20.385999918+07:00,-364.258,237.793,142.578,-73.730,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...
276281,2025-11-18 16:55:18.713999987+07:00,-212.891,-1000.000,-910.156,-1000.000,0.0,0,0,0
276282,2025-11-18 16:55:18.717999935+07:00,-64.453,-37.598,565.918,54.199,0.0,0,0,0
276283,2025-11-18 16:55:18.721999884+07:00,116.211,999.512,999.512,999.512,0.0,0,0,0
276284,2025-11-18 16:55:18.726000071+07:00,106.445,856.934,599.121,834.473,0.0,0,0,0


In [196]:
if "Right AUX" in df.columns:
    df = df.drop(columns=["Right AUX"])

In [197]:
FS_TARGET = 256  
BANDPASS_LOW = 0.5
BANDPASS_HIGH = 45.0
NOTCH_FREQ = 50.0
ICA_KURTOSIS_THRESH = 10.0

EEG_CHANNELS = ['AF7', 'AF8', 'TP9', 'TP10']

# Frequency bands for band power
BANDS = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30),
    'gamma': (30, 45)
}

In [198]:
# resample DF -> uniform grid
def resample_dataframe_to_fs(df, fs=FS_TARGET, channels=EEG_CHANNELS):
    df = df.sort_values('timestamps').reset_index(drop=True)
    start = df['timestamps'].iloc[0]
    stop = df['timestamps'].iloc[-1]
    
    total_seconds = (stop - start).total_seconds()
    n_samples = int(np.floor(total_seconds * fs)) + 1   
    new_times = pd.date_range(start=start, periods=n_samples, freq=pd.Timedelta(seconds=1/fs))
    new_df = pd.DataFrame(index=new_times)

    tmp = df.set_index('timestamps')
    for ch in channels:
        if ch not in tmp.columns:
            raise ValueError(f"Channel {ch} not in dataframe columns")
        
        series = tmp[ch]
        series = series.reindex(series.index.union(new_times)).sort_index().interpolate(method='time').reindex(new_times)
        new_df[ch] = series.values

    new_df = new_df.reset_index().rename(columns={'index': 'timestamps'})

    return new_df

In [199]:
# MNE Helpers: make Raw, filtering, notch, ICA, re-referencing
def make_mne_raw_from_df(df, ch_names=EEG_CHANNELS, sfreq=FS_TARGET):
    """
    Convert dataframe (columns=channels, timestamps present) to mne.RawArray
    """
    data = df[ch_names].T.values 
    ch_types = ['eeg'] * len(ch_names)
    info = mne.create_info(ch_names, sfreq=sfreq, ch_types=ch_types)
    raw = mne.io.RawArray(data, info, verbose=False)
    return raw

In [200]:
def preprocess_raw(raw, l_freq=BANDPASS_LOW, h_freq=BANDPASS_HIGH, notch_freq=NOTCH_FREQ):
    if notch_freq is not None:
        raw.notch_filter(freqs=notch_freq, picks='eeg', verbose=False)
    raw.filter(l_freq=l_freq, h_freq=h_freq, picks='eeg', verbose=False)
    raw.set_eeg_reference('average', verbose=False)
    return raw

In [201]:
def run_ica_auto(raw, n_components=0.99):
    """
    Fit ICA on raw, automatically exclude components with extreme kurtosis (heuristic).
    Returns cleaned raw, and list of excluded components.
    """
    ica = mne.preprocessing.ICA(n_components=n_components, random_state=42, max_iter='auto', verbose=False)
    ica.fit(raw, verbose=False)
    sources = ica.get_sources(raw).get_data() 
    ks = kurtosis(sources, axis=1, fisher=False, nan_policy='omit')  
    exclude_idx = np.where(np.abs(ks) > ICA_KURTOSIS_THRESH)[0].tolist()
    if exclude_idx:
        ica.exclude = exclude_idx
        raw_clean = ica.apply(raw.copy(), exclude=exclude_idx, verbose=False)
    else:
        raw_clean = raw
    return raw_clean, exclude_idx

In [202]:
def assign_eeg_labels(eeg_df, label_df, nama_lengkap):
    person_labels = label_df[label_df["Nama Lengkap"] == nama_lengkap].reset_index(drop=True)
    jumlah_video = len(person_labels)

    # Hitung total durasi dataset (detik)
    min_time = eeg_df['timestamps'].min()
    max_time = eeg_df['timestamps'].max()
    total_duration = (max_time - min_time).total_seconds()

    # 1. Baseline awal fixed 38 detik
    baseline_awal = 38

    # 2. Sisa waktu dibagi rata ke masing-masing video-block
    remaining_time = total_duration - baseline_awal
    video_block = remaining_time / jumlah_video  # total 1 blok (video + rest)

    durasi_video = 60
    durasi_rest = video_block - durasi_video  # istirahat per video (akhir)

    print("Total durasi dataset:", total_duration)
    print("Durasi blok per video:", video_block)
    print("Durasi istirahat akhir:", durasi_rest)

    # Inisialisasi kolom
    eeg_df["video_id"] = None
    eeg_df["rating"] = None
    eeg_df["confidence"] = None

    # Start timestamp
    current_time = min_time

    # Beri label BASELINE AWAL 38 detik
    rest1_end = current_time + timedelta(seconds=baseline_awal)
    eeg_df.loc[
        (eeg_df["timestamps"] >= current_time) &
        (eeg_df["timestamps"] < rest1_end),
        ["video_id", "rating", "confidence"]
    ] = ["Istirahat", "Istirahat", "Istirahat"]

    current_time = rest1_end

    # Loop per video
    for i, row in person_labels.iterrows():

        # VIDEO 60 detik
        vid_start = current_time
        vid_end = current_time + timedelta(seconds=durasi_video)

        eeg_df.loc[
            (eeg_df["timestamps"] >= vid_start) &
            (eeg_df["timestamps"] < vid_end),
            ["video_id", "rating", "confidence"]
        ] = [
            row["Video ID"], 
            row["Rating"], 
            row["Confidence Score"]
        ]

        current_time = vid_end

        # ISTIRAHAT AKHIR (durasi_rest)
        rest2_end = current_time + timedelta(seconds=durasi_rest)

        eeg_df.loc[
            (eeg_df["timestamps"] >= current_time) &
            (eeg_df["timestamps"] < rest2_end),
            ["video_id", "rating", "confidence"]
        ] = ["Istirahat", "Istirahat", "Istirahat"]

        current_time = rest2_end

    eeg_df = eeg_df.dropna(subset=["video_id"])
    eeg_df['confidence'] = eeg_df['confidence'].replace('Istirahat', 0)
    eeg_df['video_id'] = eeg_df['video_id'].str.replace('video_', '', regex=False).replace('Istirahat', 0)
    eeg_df['rating'] = eeg_df['rating'].map({'Istirahat': 0, 'Tidak Menarik': 1, 'Biasa Saja': 2, 'Menarik': 3})
    return eeg_df

In [203]:
def epoch_eeg_from_assigned_labels(df, window_s=2.0, overlap=0.5):
    epochs = []
    hop_s = window_s * (1 - overlap)

    video_blocks = df.groupby("video_id")

    for vid, block in video_blocks:
        if vid == "Istirahat":
            continue  # skip istirahat jika tidak mau dilatih

        start_time = block['timestamps'].min()
        end_time = block['timestamps'].max()

        t = start_time
        while t + timedelta(seconds=window_s) <= end_time:
            win_start = t
            win_end = t + timedelta(seconds=window_s)

            mask = (df["timestamps"] >= win_start) & (df["timestamps"] < win_end)
            epoch_df = df.loc[mask]

            if len(epoch_df) > 0:
                epochs.append({
                    "video_id": vid,
                    "rating": epoch_df["rating"].iloc[0],
                    "confidence": epoch_df["confidence"].iloc[0],
                    "start_time": win_start,
                    "end_time": win_end,
                    "data": epoch_df
                })

            t = t + timedelta(seconds=hop_s)

    return epochs


In [204]:
# Feature extraction: band power via Welch, Hjorth, stats
def bandpower_welch(epoch_signal, sf=FS_TARGET, bands=BANDS, nperseg=None):
    """
    epoch_signal: array shape (n_channels, n_samples)
    returns dict of band power aggregated per channel per band (flattened later)
    """
    if nperseg is None:
        nperseg = int(epoch_signal.shape[1] // 4)  
    powers = {}
    for ch_idx in range(epoch_signal.shape[0]):
        f, Pxx = welch(epoch_signal[ch_idx, :], fs=sf, nperseg=nperseg)
        for band_name, (low, high) in bands.items():
            idx_band = np.logical_and(f >= low, f <= high)
            bp = np.trapz(Pxx[idx_band], f[idx_band])
            powers[f"ch{ch_idx+1}_{band_name}"] = bp
    return powers

In [205]:
def hjorth_parameters(x):
    first_deriv = np.diff(x)
    second_deriv = np.diff(x, n=2)
    activity = np.var(x)
    mobility = np.sqrt(np.var(first_deriv) / activity) if activity > 0 else 0
    complexity = np.sqrt(np.var(second_deriv) / np.var(first_deriv)) / mobility if (np.var(first_deriv) > 0 and mobility > 0) else 0
    return activity, mobility, complexity

In [206]:
def extract_features_from_epoch(epoch_df, ch_names=EEG_CHANNELS):
    X = epoch_df[ch_names].T.values
    features = {}
    bp = bandpower_welch(X, sf=FS_TARGET, bands=BANDS)
    features.update(bp)
    
    for i, ch in enumerate(ch_names):
        act, mob, com = hjorth_parameters(X[i, :])
        features[f"{ch}_hjorth_activity"] = act
        features[f"{ch}_hjorth_mobility"] = mob
        features[f"{ch}_hjorth_complexity"] = com
        features[f"{ch}_mean"] = np.mean(X[i, :])
        features[f"{ch}_std"] = np.std(X[i, :])
    return features

In [207]:
def epochs_to_feature_matrix(epochs, ch_names=EEG_CHANNELS):
    feature_rows = []
    y = []
    groups = [] 
    confidences = []
    meta = []
    for ep in epochs:
        feat = extract_features_from_epoch(ep['data'], ch_names=ch_names)
        feature_rows.append(feat)
        y.append(ep['rating'])
        groups.append(ep['video_id'])
        confidences.append(ep['confidence'])
        meta.append({'start_time': ep['start_time'], 'end_time': ep['end_time'], 'video_id': ep['video_id']})
    X_df = pd.DataFrame(feature_rows).fillna(0)
    return X_df, np.array(y), np.array(groups), np.array(confidences), pd.DataFrame(meta)

In [208]:
# Normalisasi & optional baseline subtraction
def normalize_features_df(X_df, method='zscore'):
    if method == 'zscore':
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_df)
        return pd.DataFrame(X_scaled, columns=X_df.columns), scaler
    else:
        return X_df, None

In [209]:
# Modeling: LOVO per-subject (leave-one-video-out)
def lovo_evaluate(X_df, y, groups, confidences=None, classifier='svm', use_sample_weight=False, do_smote=False):
    """
    groups: array of video_ids per sample
    returns averaged metrics and per-fold reports
    """
    unique_videos = np.unique(groups)
    reports = []
    cm_total = None
    f1s = []
    accs = []
    for v in unique_videos:
        test_idx = np.where(groups == v)[0]
        train_idx = np.where(groups != v)[0]
        X_train = X_df.iloc[train_idx].values
        X_test = X_df.iloc[test_idx].values
        y_train = y[train_idx]
        y_test = y[test_idx]
        sample_weight = None
        if use_sample_weight and confidences is not None:
            sample_weight = confidences[train_idx].astype(float)
        
        if do_smote:
            sm = SMOTE(random_state=42)
            X_train, y_train = sm.fit_resample(X_train, y_train)
        
        if classifier == 'svm':
            clf = SVC(kernel='rbf', class_weight='balanced', probability=False)
        else:
            clf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
        clf.fit(X_train, y_train, sample_weight=sample_weight)
        y_pred = clf.predict(X_test)
        rep = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
        cm = confusion_matrix(y_test, y_pred, labels=np.unique(y))
        if cm_total is None:
            cm_total = cm
        else:
            cm_total += cm
        f1s.append(f1_score(y_test, y_pred, average='macro', zero_division=0))
        accs.append(accuracy_score(y_test, y_pred))
        reports.append(rep)
    results = {
        'mean_macro_f1': np.mean(f1s),
        'std_macro_f1': np.std(f1s),
        'mean_accuracy': np.mean(accs),
        'confusion_matrix_total': cm_total,
        'per_fold_reports': reports
    }
    return results

In [210]:
# LOSO evaluate cross-subject (requires concatenated data with subject_id column in meta)
def loso_evaluate(X_df, y, subject_ids, confidences=None, classifier='svm', use_sample_weight=False):
    unique_subjects = np.unique(subject_ids)
    f1s, accs = [], []
    for s in unique_subjects:
        test_idx = np.where(subject_ids == s)[0]
        train_idx = np.where(subject_ids != s)[0]
        X_train = X_df.iloc[train_idx].values
        X_test = X_df.iloc[test_idx].values
        y_train, y_test = y[train_idx], y[test_idx]
        sample_weight = None
        if use_sample_weight and confidences is not None:
            sample_weight = confidences[train_idx].astype(float)
        clf = SVC(kernel='rbf', class_weight='balanced')
        clf.fit(X_train, y_train, sample_weight=sample_weight)
        y_pred = clf.predict(X_test)
        f1s.append(f1_score(y_test, y_pred, average='macro', zero_division=0))
        accs.append(accuracy_score(y_test, y_pred))
    return {'mean_macro_f1': np.mean(f1s), 'mean_accuracy': np.mean(accs), 'f1s': f1s, 'accs': accs}

In [211]:
channel_df = resample_dataframe_to_fs(df, fs=FS_TARGET, channels=EEG_CHANNELS)
channel_df

Unnamed: 0,timestamps,AF7,AF8,TP9,TP10
0,2025-11-18 16:37:20.369999886+07:00,-1000.000000,-888.184000,-462.891000,-241.211000
1,2025-11-18 16:37:20.373906136+07:00,-1000.000000,-997.373834,-533.458748,-223.091792
2,2025-11-18 16:37:20.377812386+07:00,-281.926345,-553.240000,-478.380548,-71.409512
3,2025-11-18 16:37:20.381718636+07:00,911.861123,766.097267,-288.564539,130.319890
4,2025-11-18 16:37:20.385624886+07:00,309.211773,210.242717,-355.834227,-53.220000
...,...,...,...,...,...
276056,2025-11-18 16:55:18.713749886+07:00,-971.359843,-910.858297,-205.379781,-972.550640
276057,2025-11-18 16:55:18.717656136+07:00,-120.318377,439.046488,-77.211543,-36.411512
276058,2025-11-18 16:55:18.721562386+07:00,886.077057,952.087223,96.450694,896.117445
276059,2025-11-18 16:55:18.725468636+07:00,875.876271,652.315145,107.742467,856.399338


In [212]:
raw = make_mne_raw_from_df(channel_df, ch_names=EEG_CHANNELS, sfreq=FS_TARGET)

In [213]:
raw

Unnamed: 0,General,General.1
,MNE object type,RawArray
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,00:17:59 (HH:MM:SS)
,Sampling frequency,256.00 Hz
,Time points,276061
,Channels,Channels
,EEG,4


In [214]:
raw = preprocess_raw(raw, l_freq=BANDPASS_LOW, h_freq=BANDPASS_HIGH, notch_freq=NOTCH_FREQ)

In [215]:
raw

Unnamed: 0,General,General.1
,MNE object type,RawArray
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,00:17:59 (HH:MM:SS)
,Sampling frequency,256.00 Hz
,Time points,276061
,Channels,Channels
,EEG,4


In [216]:
raw_clean, excluded = run_ica_auto(raw)

In [217]:
raw_clean

Unnamed: 0,General,General.1
,MNE object type,RawArray
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,00:17:59 (HH:MM:SS)
,Sampling frequency,256.00 Hz
,Time points,276061
,Channels,Channels
,EEG,4


In [218]:
print(f"Excluded components: {excluded}")

Excluded components: [0, 1, 2]


In [219]:
data_clean = raw_clean.get_data()

In [220]:
data_clean

array([[ 0.21841138,  0.21841138,  0.21841138, ...,  0.21841138,
         0.21841138,  0.21841138],
       [ 0.2373418 ,  0.2373418 ,  0.2373418 , ...,  0.2373418 ,
         0.2373418 ,  0.2373418 ],
       [-0.29504527, -0.29504527, -0.29504527, ..., -0.29504527,
        -0.29504527, -0.29504527],
       [-0.16070791, -0.16070791, -0.16070791, ..., -0.16070791,
        -0.16070791, -0.16070791]], shape=(4, 276061))

In [221]:
times = raw_clean.times

In [222]:
timestamps = pd.to_datetime(channel_df['timestamps'].iloc[0]) + pd.to_timedelta(times, unit='s')
timestamps

DatetimeIndex(['2025-11-18 16:37:20.369999886+07:00',
               '2025-11-18 16:37:20.373906136+07:00',
               '2025-11-18 16:37:20.377812386+07:00',
               '2025-11-18 16:37:20.381718636+07:00',
               '2025-11-18 16:37:20.385624886+07:00',
               '2025-11-18 16:37:20.389531136+07:00',
               '2025-11-18 16:37:20.393437386+07:00',
               '2025-11-18 16:37:20.397343636+07:00',
               '2025-11-18 16:37:20.401249886+07:00',
               '2025-11-18 16:37:20.405156136+07:00',
               ...
               '2025-11-18 16:55:18.694218636+07:00',
               '2025-11-18 16:55:18.698124886+07:00',
               '2025-11-18 16:55:18.702031136+07:00',
               '2025-11-18 16:55:18.705937386+07:00',
               '2025-11-18 16:55:18.709843636+07:00',
               '2025-11-18 16:55:18.713749886+07:00',
               '2025-11-18 16:55:18.717656136+07:00',
               '2025-11-18 16:55:18.721562386+07:00',
         

In [223]:
df_clean = pd.DataFrame(data_clean.T, columns=EEG_CHANNELS)

In [224]:
df_clean

Unnamed: 0,AF7,AF8,TP9,TP10
0,0.218411,0.237342,-0.295045,-0.160708
1,0.218411,0.237342,-0.295045,-0.160708
2,0.218411,0.237342,-0.295045,-0.160708
3,0.218411,0.237342,-0.295045,-0.160708
4,0.218411,0.237342,-0.295045,-0.160708
...,...,...,...,...
276056,0.218411,0.237342,-0.295045,-0.160708
276057,0.218411,0.237342,-0.295045,-0.160708
276058,0.218411,0.237342,-0.295045,-0.160708
276059,0.218411,0.237342,-0.295045,-0.160708


In [225]:
df_clean['timestamps'] = timestamps

In [226]:
df_clean

Unnamed: 0,AF7,AF8,TP9,TP10,timestamps
0,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:37:20.369999886+07:00
1,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:37:20.373906136+07:00
2,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:37:20.377812386+07:00
3,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:37:20.381718636+07:00
4,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:37:20.385624886+07:00
...,...,...,...,...,...
276056,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:55:18.713749886+07:00
276057,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:55:18.717656136+07:00
276058,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:55:18.721562386+07:00
276059,0.218411,0.237342,-0.295045,-0.160708,2025-11-18 16:55:18.725468636+07:00


In [227]:
df_clean = assign_eeg_labels(df_clean, label, "Farrel Liesdia Putra")

Total durasi dataset: 1078.359375
Durasi blok per video: 104.0359375
Durasi istirahat akhir: 44.0359375


  eeg_df['confidence'] = eeg_df['confidence'].replace('Istirahat', 0)


In [233]:
epochs = epoch_eeg_from_assigned_labels(df_clean, window_s=2, overlap=0.5)

In [236]:
print(f"Total epochs created: {len(epochs)}")

Total epochs created: 1657


In [237]:
X_df, y, groups, confidences, meta = epochs_to_feature_matrix(epochs)

  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f[idx_band])
  bp = np.trapz(Pxx[idx_band], f

In [239]:
X_norm, scaler = normalize_features_df(X_df, method='zscore')

In [254]:
lovo_results = lovo_evaluate(X_norm, y, groups, confidences=confidences, classifier='svm', use_sample_weight=True, do_smote=False)
print("LOVO results:", lovo_results)

LOVO results: {'mean_macro_f1': np.float64(0.10879278848658722), 'std_macro_f1': np.float64(0.10113073627328245), 'mean_accuracy': np.float64(0.20077540363776072), 'confusion_matrix_total': array([[  0, 217,  96, 164],
       [145, 241, 136,  68],
       [ 67, 121, 138,  28],
       [ 19,  60,  90,  67]]), 'per_fold_reports': [{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 477.0}, '1': {'precision': 0.3774403470715835, 'recall': 0.58, 'f1-score': 0.45729303547963207, 'support': 300.0}, '2': {'precision': 0.328719723183391, 'recall': 0.5277777777777778, 'f1-score': 0.4051172707889126, 'support': 180.0}, '3': {'precision': 0.20489296636085627, 'recall': 0.5583333333333333, 'f1-score': 0.29977628635346754, 'support': 120.0}, 'accuracy': 0.31197771587743733, 'macro avg': {'precision': 0.2277632591539577, 'recall': 0.4165277777777778, 'f1-score': 0.290546648155503, 'support': 1077.0}, 'weighted avg': {'precision': 0.182905116302496, 'recall': 0.31197771587743733, 'f1-sc