In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from scipy.signal import welch
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error
from scipy.fft import fft



## load the dataset

In [13]:
data = pd.read_csv("dataset/normalized_data.csv",index_col=False)
data.head()

Unnamed: 0,eeg_1,eeg_2,eeg_3,eeg_4,eeg_5,eeg_6,eeg_7,eeg_8,eeg_9,eeg_10,...,eeg_243,eeg_244,eeg_245,eeg_246,eeg_247,eeg_248,eeg_249,eeg_250,score,normalized_form
0,0.7177,-0.2062,0.0475,1.0721,2.4135,3.5125,3.7577,2.7633,0.7151,-1.527,...,9.8659,6.0245,-0.3442,-6.7022,-10.592,-10.705,-7.2834,-1.8025,9,0.90544
1,-20.272,-15.527,-7.347,1.1873,7.7569,11.805,14.147,15.75,16.753,16.474,...,-14.811,-11.206,-4.8725,1.8731,6.2072,6.1948,1.8273,-4.8429,7,0.723446
2,-2.5855,-3.6533,-3.1125,-1.5704,-0.2837,-0.3124,-1.8101,-3.9264,-5.3829,-5.323,...,-0.1838,-3.6101,-8.3515,-12.843,-15.176,-14.121,-9.851,-3.9632,8,0.809585
3,3.1062,3.902,4.1897,4.0014,3.4469,2.5849,1.4279,0.0445,-1.3564,-2.4403,...,14.739,16.043,16.177,15.433,14.13,12.479,10.61,8.6729,8,0.813472
4,4.8654,4.2604,3.1231,1.746,0.686,0.5555,1.7299,4.1115,7.0969,9.7958,...,0.2532,0.0332,-1.3112,-3.568,-5.8331,-6.8665,-5.7612,-2.5252,9,0.898316


## Define features (EEG signals) and target (normalized_form)
## Split the data into training and testing sets


In [15]:
X = data.drop(columns=["score", "normalized_form"],axis = 1)
y = data["normalized_form"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the training and testing sets
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (458, 250)
Shape of X_test: (115, 250)
Shape of y_train: (458,)
Shape of y_test: (115,)


In [17]:
svr_model = SVR()
svr_model.fit(X_train, y_train)

y_pred = svr_model.predict(X_test)

mape = mean_absolute_percentage_error(y_test, y_pred)
print("Mean Absolute Percentage Error (MAPE):", mape)



Mean Absolute Percentage Error (MAPE): 0.24568406409806307


In [25]:
# Function to convert time-domain signals to frequency-domain signals
def time_to_frequency_domain(signal):
    fft_result = fft(signal)
    # Assuming signal is sampled at 250 Hz, calculate the frequency bins
    freqs = np.fft.fftfreq(len(signal), 1/250)
    return freqs, np.abs(fft_result)

In [23]:
# Define frequency bands
frequency_bands = [(4, 8), (8, 12), (12, 20), (20, 30)]

In [30]:
# Define parameter grid for grid search
param_grid = {
    'svr__C': [0.1, 1, 10, 100],
    'svr__gamma': [0.1, 0.01, 0.001],
}

In [33]:
for band in frequency_bands:
    # Extract EEG signals corresponding to the frequency band
    eeg_band = data.loc[:, 'eeg_1':'eeg_250']
    # Convert to frequency domain
    freqs, freq_domain_signals = time_to_frequency_domain(eeg_band)
    # Find indices corresponding to the frequency band
    band_indices = np.where((freqs >= band[0]) & (freqs <= band[1]))[0]
    # Take the mean amplitude across the frequency band for each sample
    freq_domain_signals_band = freq_domain_signals[:, band_indices].mean(axis=1)
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(freq_domain_signals_band, data['normalized_form'], test_size=0.2, random_state=42)
    
    svr = SVR()
    
    X_train_reshaped = X_train.reshape(-1, 1)  # Reshape X_train if it has a single feature
    y_train_reshaped = y_train.reshape(-1, 1)  # Reshape y_train if it has a single feature

    grid_search.fit(X_train_reshaped, y_train_reshaped)
    
    model = make_pipeline(StandardScaler(), svr)
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring=mean_absolute_percentage_error)
    grid_search.fit(X_train, y_train)
    y_pred = grid_search.predict(X_test)

    mape = mean_absolute_percentage_error(y_test,y_pred)
    
    print(f"Frequency band: {band}, MAPE: {mape}")
    

AttributeError: 'Series' object has no attribute 'reshape'

## Define a function to compute MAPE

In [5]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Define MAPE scorer
mape_scorer = make_scorer(mean_absolute_percentage_error, greater_is_better=False)

## Define SVR model


In [6]:
svr = SVR()

# Define parameter grid for grid search
param_grid = {
    'svr__C': [0.1, 1, 10, 100],
    'svr__gamma': [0.1, 0.01, 0.001],
}

## Define a function to compute power spectral density and select the best frequency band

In [7]:
def compute_psd(X):
    freqs, psd = welch(X, fs=250)  # Assuming sampling frequency is 250 Hz
    return freqs, psd

best_mape = float('inf')
best_band = None

## Iterate over predefined frequency bands or search for the best frequency band automatically


In [8]:
freq_bands = [(0.5, 4), (4, 8), (8, 12), (12, 30), (30, 50)]
type(freq_bands[0][0])
X_train

Unnamed: 0,eeg_1,eeg_2,eeg_3,eeg_4,eeg_5,eeg_6,eeg_7,eeg_8,eeg_9,eeg_10,...,eeg_241,eeg_242,eeg_243,eeg_244,eeg_245,eeg_246,eeg_247,eeg_248,eeg_249,eeg_250
182,-5.8752,-9.6503,-11.6040,-11.2040,-8.8674,-5.7020,-2.8941,-1.1302,-0.4092,-0.3045,...,0.2758,-0.9900,-1.1001,-0.2778,1.0800,2.6193,4.0556,5.0772,5.2972,4.3709
63,-1.7191,-3.1417,-4.5580,-5.5020,-5.7403,-5.4192,-4.9740,-4.8498,-5.2111,-5.8268,...,-0.4782,-0.8940,-1.1496,-1.1832,-0.9732,-0.5130,0.2064,1.1730,2.2966,3.3728
158,-4.8527,-6.2319,-6.9988,-7.0181,-6.7243,-6.8108,-7.7226,-9.2840,-10.7100,-10.9980,...,5.6820,7.0245,6.6952,4.8949,2.2619,-0.4283,-2.5313,-3.6887,-3.8813,-3.4133
60,-5.2450,-5.0819,-4.0047,-2.2753,-0.3411,1.2875,2.2057,2.2872,1.7802,1.2433,...,4.3586,4.1531,4.4424,5.0231,5.6730,6.2807,6.8434,7.3690,7.7941,8.0113
15,13.2870,14.6380,15.0990,13.3940,9.3631,4.3101,0.3171,-1.0192,0.4652,3.4963,...,3.0655,5.3494,7.9281,9.6094,9.2641,6.2249,0.6104,-6.5600,-13.5770,-18.5680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,-5.4472,-4.5187,-3.7901,-3.5904,-4.0907,-5.2407,-6.7635,-8.2257,-9.1751,-9.3010,...,2.5677,2.7750,2.3913,1.5459,0.5775,-0.0912,-0.1071,0.6669,2.0633,3.6355
106,4.2980,3.9070,3.0255,2.1565,1.6101,1.3821,1.1790,0.6128,-0.5324,-2.1390,...,-6.1815,-8.2260,-9.1975,-8.8645,-7.4943,-5.7131,-4.0963,-2.8777,-2.0265,-1.0171
270,1.0539,2.7241,5.3093,7.8402,9.2869,9.0317,7.1705,4.4793,2.0500,0.7700,...,4.5416,5.0986,5.9305,7.0725,8.5710,10.4310,12.5480,14.6770,16.4720,17.6180
435,0.3446,-0.0295,-0.1942,-0.0213,0.7299,2.0852,3.6334,4.6077,4.2544,2.2692,...,-5.3980,-4.5855,-3.0832,-1.4581,0.0559,1.5882,3.4831,6.0869,9.5562,13.7250


In [10]:
for band in freq_bands:
    X_train_band = X_train.apply(lambda x: compute_psd(x.loc[(x.index >= band[0]) & (x.index <= band[1])])[1], axis=1)
    X_test_band = X_test.apply(lambda x: compute_psd(x.loc[(x.index >= band[0]) & (x.index <= band[1])])[1], axis=1)
    # Train SVR model with grid search
    model = make_pipeline(StandardScaler(), svr)
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring=mape_scorer)
    grid_search.fit(X_train_band, y_train)

    # Predict on the test set
    y_pred = grid_search.predict(X_test_band)

    # Compute MAPE
    mape = mean_absolute_percentage_error(y_test, y_pred)

    # Update best MAPE and frequency band
    if mape < best_mape:
        best_mape = mape
        best_band = band

# Print the best frequency band
print("Best Frequency Band:", best_band)

first run


TypeError: '>=' not supported between instances of 'str' and 'float'