In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# 1. Load dataset
df = pd.read_csv("/kaggle/input/epileptic-seizure-recognition/Epileptic Seizure Recognition.csv")
df.head()

Unnamed: 0,Unnamed,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,5
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,5
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,5


In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from scipy.interpolate import CubicSpline

# 1. Load dataset
df = pd.read_csv("/kaggle/input/epileptic-seizure-recognition/Epileptic Seizure Recognition.csv")

# 2. Drop the unnamed first column if present
if 'Unnamed' in df.columns[0]:
    df.drop(columns=[df.columns[0]], inplace=True)

# 3. Separate features and target
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# 4. Define data augmentation methods
def jitter(X, sigma=0.05):
    return X + np.random.normal(loc=0., scale=sigma, size=X.shape)

def scaling(X, sigma=0.1):
    factor = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[0], 1))
    return X * factor

def time_warp(X, sigma=0.2):
    orig_steps = np.arange(X.shape[1])
    warped_X = np.empty_like(X)
    for i in range(X.shape[0]):
        random_warp = np.random.normal(loc=1.0, scale=sigma, size=X.shape[1])
        warped_steps = np.cumsum(random_warp)
        warped_steps = warped_steps / warped_steps[-1] * (X.shape[1] - 1)
        cs = CubicSpline(warped_steps, X[i])
        warped_X[i] = cs(orig_steps)
    return warped_X

# 5. Apply augmentations randomly to each sample
np.random.seed(42)
augmented_X = []
augmented_y = []

for i in range(X.shape[0]):
    x = X[i].reshape(1, -1)
    label = y[i]
    method = np.random.choice(['jitter', 'scaling', 'time_warp'])

    if method == 'jitter':
        x_aug = jitter(x)
    elif method == 'scaling':
        x_aug = scaling(x)
    elif method == 'time_warp':
        x_aug = time_warp(x)

    augmented_X.append(x_aug.flatten())
    augmented_y.append(label)

# 6. Combine original and augmented data
X_augmented = np.vstack((X, np.array(augmented_X)))
y_augmented = np.hstack((y, np.array(augmented_y)))

# 7. Create a new DataFrame
columns = [f'X{i+1}' for i in range(X.shape[1])]
augmented_df = pd.DataFrame(X_augmented, columns=columns)
augmented_df['y'] = y_augmented

# 8. Save to CSV
augmented_df.to_csv("EEG_seizure_data_augmented.csv", index=False)
print("Augmented dataset saved as 'EEG_seizure_data_augmented.csv'")


Augmented dataset saved as 'EEG_seizure_data_augmented.csv'


In [3]:
df1=pd.read_csv("/kaggle/working/EEG_seizure_data_augmented.csv")
df1.shape

(23000, 179)

In [4]:
df.shape

(11500, 179)