In [4]:
import numpy as np
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

from model import DeepLocModel

# Load Dataset

In [5]:
class NPZProteinDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Load npz data
train_data = np.load("data/train.npz")
test_data = np.load("data/test.npz")

X_train, y_train, partition = train_data['X_train'], train_data['y_train'], train_data['partition']
X_test, y_test = test_data['X_test'], test_data['y_test']

# Use fold 1 as validation
val_idx = np.where(partition == 1)
train_idx = np.where(partition != 1)

X_tr, y_tr = X_train[train_idx], y_train[train_idx]
X_val, y_val = X_train[val_idx], y_train[val_idx]

train_ds = NPZProteinDataset(X_tr, y_tr)
val_ds = NPZProteinDataset(X_val, y_val)
test_ds = NPZProteinDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)
test_loader = DataLoader(test_ds, batch_size=64)

# Train loop

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeepLocModel(input_dim=X_train.shape[2]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss()

for epoch in range(10):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Evaluate
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            outputs = model(x)
            preds = outputs.argmax(dim=1).cpu()
            y_pred.extend(preds.numpy())
            y_true.extend(y.numpy())
    acc = accuracy_score(y_true, y_pred)
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Validation Accuracy: {acc:.4f}")

Epoch 1, Loss: 115.7823, Validation Accuracy: 0.2362
Epoch 2, Loss: 111.0356, Validation Accuracy: 0.2940
Epoch 3, Loss: 107.5380, Validation Accuracy: 0.3132
Epoch 4, Loss: 101.1856, Validation Accuracy: 0.3760
Epoch 5, Loss: 92.2991, Validation Accuracy: 0.4523
Epoch 6, Loss: 88.0138, Validation Accuracy: 0.4791
Epoch 7, Loss: 81.6988, Validation Accuracy: 0.5101
Epoch 8, Loss: 74.4502, Validation Accuracy: 0.5318
Epoch 9, Loss: 75.7256, Validation Accuracy: 0.5193
Epoch 10, Loss: 71.0244, Validation Accuracy: 0.5553


# Final Test Evaluation

In [7]:
y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        outputs = model(x)
        preds = outputs.argmax(dim=1).cpu()
        y_pred.extend(preds.numpy())
        y_true.extend(y.numpy())

acc = accuracy_score(y_true, y_pred)
print(f"Final Test Accuracy: {acc:.4f}")

Final Test Accuracy: 0.5582
