In [1]:
import pandas as pd
import scipy.io
import os

def read_csv(file_path):
    df = pd.read_csv(file_path, delimiter=',')  # Read
    print("Columns in CSV file:", df.columns)
    return df

def read_mat_files(df, folder_path, categories=["physiology", "pathology"]):
    mat_data = {}
    for category in categories:
        category_df = df[df['category_name'] == category]  # Filter by category
        mat_data[category] = []
        for _, row in category_df.iterrows():
            file_name = f"{row['segment_id']}.mat"
            file_path = os.path.join(folder_path, file_name)
            if os.path.exists(file_path):
                mat_data[category].append((row['segment_id'], scipy.io.loadmat(file_path)))  # Load the .mat files
            else:
                print(f"File {file_path} not found.")
    return mat_data

def extract_timeseries_from_mat(data):
    timeseries = []
    if 'data' in data:
        timeseries = data['data']  # Extract time series data
    else:
        print("No 'data' key found in the .mat file.")
    return timeseries

def create_df_from_mat_data(mat_data, category):
    data_list = []
    for segment_id, data in mat_data[category]:
        timeseries = extract_timeseries_from_mat(data)
        data_list.append({'segment_id': segment_id, 'timeseries': timeseries})
    return pd.DataFrame(data_list)

csv_file_path = r'/home/zeynepki/Desktop/Dataset_Mayo./DATASET_MAYO/segments.csv'  
mat_folder_path = r'/home/zeynepki/Desktop/Dataset_Mayo./DATASET_MAYO' 

df = read_csv(csv_file_path)

if 'category_name' in df.columns:
    mat_data = read_mat_files(df, mat_folder_path)  # .mat files for pathology and physiology
    
    if "pathology" in mat_data:
        pathology_df = create_df_from_mat_data(mat_data, "pathology")  # DataFrame for pathology
    else:
        pathology_df = pd.DataFrame()
        print("No pathology data found.")
    
    if "physiology" in mat_data:
        physiology_df = create_df_from_mat_data(mat_data, "physiology")  # DataFrame for physiology
    else:
        physiology_df = pd.DataFrame()
        print("No physiology data found.")
else:
    print("Error: 'category_name' column not found in the CSV file.")

print("Pathology DataFrame:")
print(pathology_df)

print("Physiology DataFrame:")
print(physiology_df)

print("\nExample of time series data from Pathology DataFrame:")
if not pathology_df.empty:
    print(pathology_df['timeseries'].iloc[0]) 

print("\nExample of time series data from Physiology DataFrame:")
if not physiology_df.empty:
    print(physiology_df['timeseries'].iloc[0]) 



Columns in CSV file: Index(['index', 'anatomy', 'category_id', 'channel', 'electrode_type',
       'institution', 'patient_id', 'reviewer_id', 'segment_id', 'soz',
       'category_name'],
      dtype='object')
Pathology DataFrame:
      segment_id                                         timeseries
0        x003028  [[-1.6189451, -1.6656016, -1.6811538, -1.65004...
1        x003136  [[0.9008443, 0.918407, 0.9008443, 0.86571884, ...
2        x003490  [[1.0471021, 1.0545983, 1.0545983, 1.0471021, ...
3        x003512  [[-1.5214216, -1.4957209, -1.4785872, -1.47002...
4        x003514  [[-0.87199485, -0.8794713, -0.8794713, -0.8645...
...          ...                                                ...
15222    x155177  [[-0.12208059, -0.10598343, -0.09793485, -0.11...
15223    x155178  [[2.1157165, 2.1157165, 2.1157165, 2.1157165, ...
15224    x155179  [[0.37042892, 0.38996935, 0.3997396, 0.4095098...
15225    x155180  [[1.3656585, 1.3752718, 1.3848852, 1.3848852, ...
15226    x155181  [[

In [2]:
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.signal import butter, filtfilt, welch

# # Assuming ieeg_data is already extracted from nwbfile
# data = ieeg_data.data[:]
# rate = ieeg_data.rate
# starting_time = ieeg_data.starting_time
# num_channels = data.shape[1]

# io.close()

# print(f"Data shape: {data.shape}")
# print(f"Sampling rate: {rate} Hz")
# print(f"Number of channels: {num_channels}")
# Plot a segment of the data from the first channel
# plt.figure(figsize=(15, 5))
# plt.plot(np.arange(0, 10, 1/rate), data[:int(10*rate), 0])
# plt.xlabel('Time (s)')
# plt.ylabel('Amplitude (V)')
# plt.title('iEEG Signal - Channel 1')
# plt.show()

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, classification_report

# Add a category column
pathology_df['category'] = 'pathology'
physiology_df['category'] = 'physiology'

merged_df = pd.concat([pathology_df, physiology_df], ignore_index=True)

print(merged_df.head())

# Separate features 
X = [ts.flatten() for ts in merged_df['timeseries'].values]
y = [1 if category == 'pathology' else 0 for category in merged_df['category']]

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = SGDClassifier(loss='log_loss', max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("done")

  segment_id                                         timeseries   category
0    x003028  [[-1.6189451, -1.6656016, -1.6811538, -1.65004...  pathology
1    x003136  [[0.9008443, 0.918407, 0.9008443, 0.86571884, ...  pathology
2    x003490  [[1.0471021, 1.0545983, 1.0545983, 1.0471021, ...  pathology
3    x003512  [[-1.5214216, -1.4957209, -1.4785872, -1.47002...  pathology
4    x003514  [[-0.87199485, -0.8794713, -0.8794713, -0.8645...  pathology
Accuracy: 0.671650917176209
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.78      0.79     14208
           1       0.24      0.26      0.25      3782

    accuracy                           0.67     17990
   macro avg       0.52      0.52      0.52     17990
weighted avg       0.68      0.67      0.68     17990

done


In [15]:
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Add category column
pathology_df['category'] = 'pathology'
physiology_df['category'] = 'physiology'

merged_df = pd.concat([pathology_df, physiology_df], ignore_index=True)

# Separate features 
X = np.array([ts for ts in merged_df['timeseries'].values])
y = np.array([1 if category == 'pathology' else 0 for category in merged_df['category']])

print(f"Initial shape of X: {X.shape}")

n_samples, _, n_timesteps = X.shape # Reshape X to samples, timesteps, features
X = X.reshape(n_samples, n_timesteps, 1)

print(f"Shape of X after reshaping: {X.shape}")

y = to_categorical(y, num_classes=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(n_timesteps, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes))

Initial shape of X: (71957, 1, 15000)
Shape of X after reshaping: (71957, 15000, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.97      0.96     14208
           1       0.86      0.78      0.82      3782

    accuracy                           0.93     17990
   macro avg       0.90      0.88      0.89     17990
weighted avg       0.93      0.93      0.93     17990

