In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#%cd /content/drive/MyDrive/Georgetown/BrainDecoding

/content/drive/MyDrive/Georgetown/BrainDecoding


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
class TransformerClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, num_heads, dropout):
        super(TransformerClassifier, self).__init__()
        self.transformer = nn.TransformerEncoderLayer(input_size, num_heads, hidden_size, dropout)
        self.encoder = nn.TransformerEncoder(self.transformer, num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # Change shape from (seq_len, batch_size, input_size) to (batch_size, seq_len, input_size)
        src = self.encoder(src)
        src = src.mean(dim=0)  # average pooling to get the representation of an entire sequence
        out = self.fc(src)
        return out.squeeze(1)

In [6]:
input_size = 200  # Dimensionality of each vector
hidden_size = 128
output_size = 1
num_layers = 6  # Number of Transformer encoder layers
num_heads = 4  # Number of attention heads
dropout = 0.1
num_epochs = 135
lr = 0.001


In [7]:
Xtrain = torch.load('preprocessed_data/Xtrain.pt').to(device)
ytrain = torch.load('preprocessed_data/ytrain.pt').unsqueeze(1).to(device)
Xdev = torch.load('preprocessed_data/Xdev.pt').to(device)
ydev= torch.load('preprocessed_data/ydev.pt').unsqueeze(1).to(device)

In [7]:
model = TransformerClassifier(input_size, hidden_size, output_size, num_layers, num_heads, dropout).to(device)
criterion = nn.BCEWithLogitsLoss()  # Binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=lr)



In [8]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

1492097


In [29]:
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(Xtrain).squeeze()
    #print(outputs)
    loss = criterion(outputs, ytrain.squeeze().float())
    loss.backward()
    optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}')
    model.eval()
    with torch.no_grad():
        outputs = model(Xdev).squeeze()
        op_cpu = outputs.cpu()
        predictions = [int(o.item()>0) for o in op_cpu]
        ydev_cpu = ydev.cpu().numpy()
        accuracy = accuracy_score(ydev_cpu, predictions)
        print(f'Dev Accuracy: {accuracy:.4f}')


Epoch [1/135], Train Loss: 0.7515
Dev Accuracy: 0.4955
Epoch [2/135], Train Loss: 0.8617
Dev Accuracy: 0.5016
Epoch [3/135], Train Loss: 0.7334
Dev Accuracy: 0.5119
Epoch [4/135], Train Loss: 0.7279
Dev Accuracy: 0.5257
Epoch [5/135], Train Loss: 0.6931
Dev Accuracy: 0.5180
Epoch [6/135], Train Loss: 0.7036
Dev Accuracy: 0.5226
Epoch [7/135], Train Loss: 0.7077
Dev Accuracy: 0.5332
Epoch [8/135], Train Loss: 0.6955
Dev Accuracy: 0.5490
Epoch [9/135], Train Loss: 0.6883
Dev Accuracy: 0.5255
Epoch [10/135], Train Loss: 0.6920
Dev Accuracy: 0.5042
Epoch [11/135], Train Loss: 0.6956
Dev Accuracy: 0.5211
Epoch [12/135], Train Loss: 0.6928
Dev Accuracy: 0.5441
Epoch [13/135], Train Loss: 0.6878
Dev Accuracy: 0.5416
Epoch [14/135], Train Loss: 0.6867
Dev Accuracy: 0.5398
Epoch [15/135], Train Loss: 0.6884
Dev Accuracy: 0.5341
Epoch [16/135], Train Loss: 0.6899
Dev Accuracy: 0.5347
Epoch [17/135], Train Loss: 0.6891
Dev Accuracy: 0.5324
Epoch [18/135], Train Loss: 0.6863
Dev Accuracy: 0.5424
E

In [30]:
Xtest = torch.load('preprocessed_data/Xtest.pt').to(device)
ytest= torch.load('preprocessed_data/ytest.pt').unsqueeze(1).to(device)

In [31]:
model.eval()
with torch.no_grad():
    outputs = model(Xtest).squeeze()
    op_cpu = outputs.cpu()
    predictions = [int(o.item()>0) for o in op_cpu]
    ytest_cpu = ytest.cpu().numpy()
    accuracy = accuracy_score(ytest_cpu, predictions)
    print(f'Test Accuracy: {accuracy:.4f}')
    print(classification_report(ytest_cpu, predictions))

Test Accuracy: 0.5816
              precision    recall  f1-score   support

         0.0       0.65      0.50      0.56      2816
         1.0       0.53      0.68      0.60      2382

    accuracy                           0.58      5198
   macro avg       0.59      0.59      0.58      5198
weighted avg       0.60      0.58      0.58      5198



In [32]:
model.eval()
with torch.no_grad():
    outputs = model(Xdev).squeeze()
    op_cpu = outputs.cpu()
    predictions = [int(o.item()>0) for o in op_cpu]
    ydev_cpu = ydev.cpu().numpy()
    accuracy = accuracy_score(ydev_cpu, predictions)
    print(f'Dev Accuracy: {accuracy:.4f}')
    print(classification_report(ydev_cpu, predictions))

Dev Accuracy: 0.5697
              precision    recall  f1-score   support

         0.0       0.56      0.55      0.55      1690
         1.0       0.58      0.59      0.59      1787

    accuracy                           0.57      3477
   macro avg       0.57      0.57      0.57      3477
weighted avg       0.57      0.57      0.57      3477

