In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import json
import glob
import random
from btbench_config import *
from braintreebank_subject import Subject
from btbench_datasets import BrainTreebankSubjectTrialBenchmarkDataset

In [3]:
subject_id, trial_id = 3, 0
subject = Subject(subject_id, cache=True) # use cache=True to load the neural data into RAM, if you have enough memory!
dataset = BrainTreebankSubjectTrialBenchmarkDataset(subject, trial_id, dtype=torch.float32, eval_name="speech") # eval_name can be "pitch", "rms", "onset", or "speech"

In [None]:
print(len(dataset))
print(dataset[1][0].shape)

In [None]:
word_onset_idx = int(START_NEURAL_DATA_BEFORE_WORD_ONSET * SAMPLING_RATE) # 1024
word_onset_idx_end = int((START_NEURAL_DATA_BEFORE_WORD_ONSET + END_NEURAL_DATA_AFTER_WORD_ONSET) * SAMPLING_RATE) # 3072

# Convert PyTorch dataset to numpy arrays for scikit-learn
X = []
y = []
for i in range(len(dataset)):
    features, label = dataset[i]
    X.append(features.numpy()[:, 1024:3072].flatten()) # take 1 second of raw voltage starting with the word onset
    y.append(label.numpy())
X = np.array(X)
y = np.array(y)

# Train logistic regression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train logistic regression model
clf = LogisticRegression(random_state=42, max_iter=1000)
clf.fit(X_train, y_train)

# Evaluate model
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)

print(f"Training accuracy: {train_score:.3f}")
print(f"Test accuracy: {test_score:.3f}")
