In [1]:
import numpy as np
import h5py
import torch
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import os
import shutil
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
from torchvision.models import resnet34
from torch.optim import Adam
from torchvision.utils import make_grid
from tqdm import tqdm

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
#[0,22): amino acid residues, with the order of 'A', 'C', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'N', 'Q', 'P', 'S', 'R', 'T', 'W', 'V', 'Y', 'X','NoSeq'
#[22,31): Secondary structure labels, with the sequence of 'L', 'B', 'E', 'G', 'I', 'H', 'S', 'T','NoSeq'
dataset_path = '/content/drive/MyDrive/Colab Notebooks/EE541_Project/data/cullpdb+profile_6133.npy'
sequence_len = 700
total_features = 57
amino_acid_residues = 21
num_classes = 8


In [6]:
def get_dataset(dataset_path):
  ds = np.load(dataset_path)
  ds = np.reshape(ds, (ds.shape[0], sequence_len, total_features))
  ds_filtered = np.zeros((ds.shape[0], ds.shape[1], amino_acid_residues + num_classes))
  ds_filtered[:, :, 0:amino_acid_residues] = ds[:, :, 35:56]
  ds_filtered[:, :, amino_acid_residues:] = ds[:, :, amino_acid_residues + 1:amino_acid_residues+ 1 + num_classes]
  return ds_filtered

In [7]:
def get_data_labels(Dataset):
    X = Dataset[:, :, 0:amino_acid_residues]
    Y = Dataset[:, :, amino_acid_residues:amino_acid_residues + num_classes]
    mask = (np.sum(Y, axis=-1) != 0)  # Mask to ignore padded positions
    return X, Y, mask

In [8]:
# def shuffle_and_split(Dataset, seed=None):
#     np.random.seed(seed)
#     np.random.shuffle(Dataset)
#     train_split = int(Dataset.shape[0]*0.8)
#     test_val_split = int(Dataset.shape[0]*0.1)
#     Train = Dataset[0:train_split, :, :]
#     Test = Dataset[train_split:train_split+test_val_split, :, :]
#     Validation = Dataset[train_split+test_val_split:, :, :]
#     return Train, Test, Validation

def shuffle_and_split(Dataset, seed=None):
    train_val, test = train_test_split(Dataset, test_size=0.2, random_state=seed)
    train, val = train_test_split(train_val, test_size=0.125, random_state=seed)  # 0.125 * 0.8 = 0.1
    return train, test, val

In [9]:
dataset = get_dataset(dataset_path)
X, y, mask = get_data_labels(dataset)

In [10]:
X_train, X_test, X_val = shuffle_and_split(X, 100)
y_train, y_test, y_val = shuffle_and_split(y, 100)
mask_train, mask_test, mask_val = shuffle_and_split(mask, 100)

# dataset = get_dataset(dataset_path)

# D_train, D_test, D_val = shuffle_and_split(dataset, 100)

# X_train, y_train = get_data_labels(D_train)
# X_test, y_test = get_data_labels(D_test)
# X_val, y_val = get_data_labels(D_val)

# print("Dataset Loaded")

In [2]:
X_test.shape

NameError: name 'X_test' is not defined

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Define hyperparameters
LR = 0.001  # Learning rate
drop_out = 0.2  # Dropout rate
batch_dim = 64  # Batch size
nn_epochs = 50  # Number of epochs
loss_fn = nn.CrossEntropyLoss()  # Cross-entropy loss function for classification

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
mask_train_tensor = torch.tensor(mask_train, dtype=torch.bool)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
mask_test_tensor = torch.tensor(mask_test, dtype=torch.bool)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
mask_val_tensor = torch.tensor(mask_val, dtype=torch.bool)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor, mask_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor, mask_test_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor, mask_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_dim, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_dim, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_dim, shuffle=False)

class CNNModel(nn.Module):
    def __init__(self, sequence_len, amino_acid_residues, num_classes):
        super(CNNModel, self).__init__()
        # First 1D Convolutional Layer
        self.conv1 = nn.Conv1d(in_channels=amino_acid_residues,
                               out_channels=128,
                               kernel_size=11,
                               padding=5)  # `padding=5` ensures the input and output have the same length
        # Dropout after the first Conv layer
        self.dropout1 = nn.Dropout(drop_out)

        # Second 1D Convolutional Layer
        self.conv2 = nn.Conv1d(in_channels=128,
                               out_channels=64,
                               kernel_size=11,
                               padding=5)
        # Dropout after the second Conv layer
        self.dropout2 = nn.Dropout(drop_out)

        # Final 1D Convolutional Layer for classification
        self.conv3 = nn.Conv1d(in_channels=64,
                               out_channels=num_classes,
                               kernel_size=11,
                               padding=5)
        # Softmax is applied in the forward pass for class probabilities

    def forward(self, x):
        # Pass through the first Conv layer with ReLU activation
        x = F.relu(self.conv1(x))
        x = self.dropout1(x)  # Apply dropout

        # Pass through the second Conv layer with ReLU activation
        x = F.relu(self.conv2(x))
        x = self.dropout2(x)  # Apply dropout

        # Pass through the final Conv layer and apply softmax along the channel dimension
        x = self.conv3(x)
        # x = F.softmax(x, dim=1)  # Softmax along the classes (channel dimension)
        return x


# Instantiate the model
sequence_len = 700  # Example sequence length
amino_acid_residues = 21  # Number of amino acid residues (input channels)
num_classes = 8  # Number of output classes (e.g., Q8 accuracy classification)

model = CNNModel(sequence_len, amino_acid_residues, num_classes)

# Optimizer setup
optimizer = optim.Adam(model.parameters(), lr=LR)

# Model Summary
print("Model Architecture:")
print(model)

Model Architecture:
CNNModel(
  (conv1): Conv1d(21, 128, kernel_size=(11,), stride=(1,), padding=(5,))
  (dropout1): Dropout(p=0.2, inplace=False)
  (conv2): Conv1d(128, 64, kernel_size=(11,), stride=(1,), padding=(5,))
  (dropout2): Dropout(p=0.2, inplace=False)
  (conv3): Conv1d(64, 8, kernel_size=(11,), stride=(1,), padding=(5,))
)


In [None]:
# Training loop
for epoch in range(nn_epochs):
    model.train()
    total_loss = 0
    correct_predictions = 0
    total_samples = 0

    for batch_X, batch_y in train_loader:
        # Move to device (GPU/CPU)
        batch_X = batch_X.permute(0, 2, 1)  # Change to shape (batch_size, channels, seq_len)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, dim=1)
        #print(predicted.shape)
        # Convert batch_y to class indices (argmax across last dimension)
        target_class_indices = batch_y.argmax(dim=2)  # Shape: (batch_size, sequence_len)
        #print(batch_y.shape)
        #print(batch_y.argmax(dim=2).shape)
        # Calculate loss and perform backpropagation
        # loss = loss_fn(outputs.view(-1, num_classes), batch_y.view(-1).long())
        loss = loss_fn(outputs.view(-1, num_classes), target_class_indices.view(-1))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Track Q8 accuracy
        correct_predictions += (predicted == batch_y.argmax(dim=2)).sum().item()
        total_samples += batch_X.size(0) * batch_X.size(2)

    # Calculate Q8 accuracy for the training set
    q8_train_accuracy = 100 * correct_predictions / total_samples

    print(f'Epoch [{epoch+1}/{nn_epochs}], Loss: {total_loss/len(train_loader):.4f}, Q8 Accuracy: {q8_train_accuracy:.2f}%')

# Evaluation function for test/validation data
def evaluate(model, data_loader):
    model.eval()
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for batch_X, batch_y in data_loader:
            batch_X = batch_X.permute(0, 2, 1)  # Change to shape (batch_size, channels, seq_len)
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, dim=1)

            # Track Q8 accuracy
            correct_predictions += (predicted == batch_y.argmax(dim=2)).sum().item()
            total_samples += batch_X.size(0) * batch_X.size(2)

    q8_accuracy = 100 * correct_predictions / total_samples
    return q8_accuracy

# Evaluate on test and validation data
test_q8_accuracy = evaluate(model, test_loader)
val_q8_accuracy = evaluate(model, val_loader)

print(f'Test Q8 Accuracy: {test_q8_accuracy:.2f}%')
print(f'Validation Q8 Accuracy: {val_q8_accuracy:.2f}%')

Epoch [1/20], Loss: 2.0636, Q8 Accuracy: 1.68%
Epoch [2/20], Loss: 2.0392, Q8 Accuracy: 21.59%
Epoch [3/20], Loss: 2.0277, Q8 Accuracy: 11.46%
Epoch [4/20], Loss: 2.0242, Q8 Accuracy: 6.12%
Epoch [5/20], Loss: 2.0225, Q8 Accuracy: 5.00%
Epoch [6/20], Loss: 2.0215, Q8 Accuracy: 4.93%
Epoch [7/20], Loss: 2.0200, Q8 Accuracy: 4.89%
Epoch [8/20], Loss: 2.0190, Q8 Accuracy: 4.80%
Epoch [9/20], Loss: 2.0184, Q8 Accuracy: 4.73%
Epoch [10/20], Loss: 2.0183, Q8 Accuracy: 4.70%
Epoch [11/20], Loss: 2.0180, Q8 Accuracy: 4.61%
Epoch [12/20], Loss: 2.0176, Q8 Accuracy: 4.56%
Epoch [13/20], Loss: 2.0175, Q8 Accuracy: 4.48%
Epoch [14/20], Loss: 2.0171, Q8 Accuracy: 4.43%
Epoch [15/20], Loss: 2.0171, Q8 Accuracy: 4.38%
Epoch [16/20], Loss: 2.0167, Q8 Accuracy: 4.37%
Epoch [17/20], Loss: 2.0163, Q8 Accuracy: 4.36%
Epoch [18/20], Loss: 2.0160, Q8 Accuracy: 4.34%
Epoch [19/20], Loss: 2.0157, Q8 Accuracy: 4.31%
Epoch [20/20], Loss: 2.0154, Q8 Accuracy: 4.31%
Test Q8 Accuracy: 4.32%
Validation Q8 Accuracy:

In [None]:
# Training loop
for epoch in range(nn_epochs):
    model.train()
    total_loss = 0
    correct_predictions = 0
    total_samples = 0

    for batch_X, batch_y in train_loader:
        # print(f"batch_X shape: {batch_X.shape}, batch_y shape: {batch_y.shape}")

        # Move to device (GPU/CPU)
        batch_X = batch_X.permute(0, 2, 1)  # Change to shape (batch_size, channels, seq_len)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_X)  # Shape: (batch_size, num_classes, seq_len)
        _, predicted = torch.max(outputs, dim=1)  # Shape: (batch_size, seq_len)

        # Convert batch_y to class indices (argmax across last dimension)
        target_class_indices = batch_y.argmax(dim=2)  # Shape: (batch_size, seq_len)

        # Calculate loss and perform backpropagation
        loss = loss_fn(outputs.view(-1, num_classes), target_class_indices.view(-1))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Track Q8 accuracy
        correct_predictions += (predicted == target_class_indices).sum().item()
        total_samples += batch_X.size(0) * batch_X.size(2)  # batch_size * seq_len

    # Calculate Q8 accuracy for the training set
    q8_train_accuracy = 100 * correct_predictions / total_samples

    print(f'Epoch [{epoch+1}/{nn_epochs}], Loss: {total_loss/len(train_loader):.4f}, Q8 Accuracy: {q8_train_accuracy:.2f}%')

# Evaluation function for test/validation data
def evaluate(model, data_loader):
    model.eval()
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for batch_X, batch_y in data_loader:
            batch_X = batch_X.permute(0, 2, 1)  # Change to shape (batch_size, channels, seq_len)
            outputs = model(batch_X)  # Shape: (batch_size, num_classes, seq_len)
            _, predicted = torch.max(outputs, dim=1)  # Shape: (batch_size, seq_len)

            # Convert batch_y to class indices
            target_class_indices = batch_y.argmax(dim=2)  # Shape: (batch_size, seq_len)

            # Track Q8 accuracy
            correct_predictions += (predicted == target_class_indices).sum().item()
            total_samples += batch_X.size(0) * batch_X.size(2)

    q8_accuracy = 100 * correct_predictions / total_samples
    return q8_accuracy

# Evaluate on test and validation data
test_q8_accuracy = evaluate(model, test_loader)
val_q8_accuracy = evaluate(model, val_loader)

print(f'Test Q8 Accuracy: {test_q8_accuracy:.2f}%')
print(f'Validation Q8 Accuracy: {val_q8_accuracy:.2f}%')

Epoch [1/20], Loss: 2.0046, Q8 Accuracy: 2.92%
Epoch [2/20], Loss: 2.0045, Q8 Accuracy: 2.93%
Epoch [3/20], Loss: 2.0041, Q8 Accuracy: 2.91%
Epoch [4/20], Loss: 2.0040, Q8 Accuracy: 2.91%
Epoch [5/20], Loss: 2.0038, Q8 Accuracy: 2.85%
Epoch [6/20], Loss: 2.0040, Q8 Accuracy: 2.88%
Epoch [7/20], Loss: 2.0037, Q8 Accuracy: 2.85%
Epoch [8/20], Loss: 2.0036, Q8 Accuracy: 2.84%
Epoch [9/20], Loss: 2.0035, Q8 Accuracy: 2.82%
Epoch [10/20], Loss: 2.0037, Q8 Accuracy: 2.79%
Epoch [11/20], Loss: 2.0039, Q8 Accuracy: 2.82%
Epoch [12/20], Loss: 2.0036, Q8 Accuracy: 2.79%
Epoch [13/20], Loss: 2.0034, Q8 Accuracy: 2.78%
Epoch [14/20], Loss: 2.0039, Q8 Accuracy: 2.77%
Epoch [15/20], Loss: 2.0035, Q8 Accuracy: 2.71%
Epoch [16/20], Loss: 2.0036, Q8 Accuracy: 2.72%
Epoch [17/20], Loss: 2.0035, Q8 Accuracy: 2.74%
Epoch [18/20], Loss: 2.0035, Q8 Accuracy: 2.72%
Epoch [19/20], Loss: 2.0036, Q8 Accuracy: 2.72%
Epoch [20/20], Loss: 2.0035, Q8 Accuracy: 2.75%
Test Q8 Accuracy: 2.45%
Validation Q8 Accuracy: 2

In [None]:
#new code CNN architecture

class ProteinCNN(nn.Module):
    def __init__(self, amino_acid_residues, num_classes, drop_out=0.1):
        super(ProteinCNN, self).__init__()

        # First convolutional block
        self.conv1 = nn.Conv1d(
            in_channels=amino_acid_residues,  # 21 amino acid features
            out_channels=256,                # Number of filters
            kernel_size=11,                  # Window size
            padding=5                        # Same padding to maintain sequence length
        )
        self.bn1 = nn.BatchNorm1d(256)       # Batch normalization
        self.dropout1 = nn.Dropout(drop_out) # Dropout layer

        # Second convolutional block
        self.conv2 = nn.Conv1d(
            in_channels=256,
            out_channels=128,
            kernel_size=11,
            padding=5
        )
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(drop_out)

        # third convolutional layer
        self.conv3 = nn.Conv1d(
            in_channels=128,
            out_channels=64,
            kernel_size=11,
            padding=5
        )
        self.bn3 = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(drop_out)

        # output convolutional layer
        self.conv4 = nn.Conv1d(
            in_channels=64,
            out_channels=num_classes,
            kernel_size=11,
            padding=5
        )
    def forward(self, x):
        # Input shape: (batch_size, amino_acid_residues, sequence_len)

        # First conv block
        x = F.relu(self.bn1(self.conv1(x)))  # Conv -> BatchNorm -> ReLU
        x = self.dropout1(x)                # Dropout

        # Second conv block
        x = F.relu(self.bn2(self.conv2(x))) # Conv -> BatchNorm -> ReLU
        x = self.dropout2(x)                # Dropout

        # third conv block
        x = F.relu(self.bn3(self.conv3(x))) # Conv -> BatchNorm -> ReLU
        x = self.dropout3(x)

        # Final layer
        x = self.conv4(x)                   # No activation; output raw logits
        return x  # Shape: (batch_size, num_classes, sequence_len)

# Instantiate the model
model = ProteinCNN(amino_acid_residues=21, num_classes=8, drop_out=0.1)

# Optimizer and Learning Rate
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Print the model summary
print(model)

ProteinCNN(
  (conv1): Conv1d(21, 256, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (conv2): Conv1d(256, 128, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout2): Dropout(p=0.1, inplace=False)
  (conv3): Conv1d(128, 64, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout3): Dropout(p=0.1, inplace=False)
  (conv4): Conv1d(64, 8, kernel_size=(11,), stride=(1,), padding=(5,))
)


In [None]:
for epoch in range(nn_epochs):
    model.train()
    total_loss = 0
    correct_predictions = 0
    total_samples = 0

    for batch_X, batch_y in train_loader:
        # Reshape input to match Conv1D expectations
        batch_X = batch_X.permute(0, 2, 1)  # (batch_size, amino_acid_residues, sequence_len)

        # Forward pass
        outputs = model(batch_X)  # Shape: (batch_size, num_classes, sequence_len)
        outputs = outputs.permute(0, 2, 1)  # (batch_size, sequence_len, num_classes)

        # Reshape for loss computation
        # Convert batch_y from one-hot encoding to class indices
        target_class_indices = batch_y.argmax(dim=2)  # Shape: (batch_size, sequence_len)
        loss = loss_fn(outputs.reshape(-1, num_classes), target_class_indices.reshape(-1).long())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Accuracy computation
        _, predicted = torch.max(outputs, dim=2)  # Predictions across sequence
        correct_predictions += (predicted == batch_y.argmax(dim=2)).sum().item()
        total_samples += batch_y.numel()

    accuracy = 100 * correct_predictions / total_samples
    print(f"Epoch {epoch+1}/{nn_epochs}, Loss: {total_loss:.4f}, Accuracy: {accuracy:.2f}%")


Epoch 1/50, Loss: 30.9778, Accuracy: 10.87%
Epoch 2/50, Loss: 22.3120, Accuracy: 11.20%
Epoch 3/50, Loss: 21.3813, Accuracy: 11.25%
Epoch 4/50, Loss: 20.7060, Accuracy: 11.29%
Epoch 5/50, Loss: 20.2107, Accuracy: 11.31%


KeyboardInterrupt: 

In [17]:
#since learning was slow decided to experiment with residual connections
LR = 0.001  # Learning rate
drop_out = 0.2  # Dropout rate
batch_dim = 64  # Batch size
nn_epochs = 50  # Number of epochs
loss_fn = nn.CrossEntropyLoss()  # Cross-entropy loss function for classification

import torch
import torch.nn as nn
import torch.nn.functional as F

class ProteinCNN(nn.Module):
    def __init__(self, amino_acid_residues, num_classes, drop_out=0.1):
        super(ProteinCNN, self).__init__()

        # First convolutional block with residual connection
        self.conv1a = nn.Conv1d(amino_acid_residues, 256, kernel_size=11, padding=5)
        self.bn1a = nn.BatchNorm1d(256)
        self.conv1b = nn.Conv1d(256, 256, kernel_size=11, padding=5)
        self.bn1b = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(drop_out)
        self.shortcut1 = nn.Conv1d(amino_acid_residues, 256, kernel_size=1)  # Shortcut layer

        # Second convolutional block with residual connection
        self.conv2a = nn.Conv1d(256, 128, kernel_size=11, padding=5)
        self.bn2a = nn.BatchNorm1d(128)
        self.conv2b = nn.Conv1d(128, 128, kernel_size=11, padding=5)
        self.bn2b = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(drop_out)
        self.shortcut2 = nn.Conv1d(256, 128, kernel_size=1)  # Shortcut layer

        # Third convolutional block with residual connection
        self.conv3a = nn.Conv1d(128, 64, kernel_size=11, padding=5)
        self.bn3a = nn.BatchNorm1d(64)
        self.conv3b = nn.Conv1d(64, 64, kernel_size=11, padding=5)
        self.bn3b = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(drop_out)
        self.shortcut3 = nn.Conv1d(128, 64, kernel_size=1)  # Shortcut layer

        # Output layer
        self.conv_out = nn.Conv1d(64, num_classes, kernel_size=11, padding=5)

    def forward(self, x):
        # First residual block
        x = x.permute(0, 2, 1)
        residual = self.shortcut1(x)
        x = F.relu(self.bn1a(self.conv1a(x)))
        x = self.dropout1(x)
        x = self.bn1b(self.conv1b(x))
        x += residual
        x = F.relu(x)

        # Second residual block
        residual = self.shortcut2(x)
        x = F.relu(self.bn2a(self.conv2a(x)))
        x = self.dropout2(x)
        x = self.bn2b(self.conv2b(x))
        x += residual
        x = F.relu(x)

        # Third residual block
        residual = self.shortcut3(x)
        x = F.relu(self.bn3a(self.conv3a(x)))
        x = self.dropout3(x)
        x = self.bn3b(self.conv3b(x))
        x += residual
        x = F.relu(x)

        # Output layer
        x = self.conv_out(x)
        return x
# Instantiate the model
model = ProteinCNN(amino_acid_residues=21, num_classes=8, drop_out=0.2)

# Optimizer and Learning Rate
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Print the model summary
print(model)

ProteinCNN(
  (conv1a): Conv1d(21, 256, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn1a): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1b): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn1b): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (shortcut1): Conv1d(21, 256, kernel_size=(1,), stride=(1,))
  (conv2a): Conv1d(256, 128, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn2a): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2b): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn2b): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (shortcut2): Conv1d(256, 128, kernel_size=(1,), stride=(1,))
  (conv3a): Conv1d(128, 64, kernel_size=(11,), stride=(1,), padding=(5,))
  (bn3a): BatchNorm1d(64, eps=1e-05, m

In [19]:
# Training loop
train_accuracies = []
val_accuracies = []
train_losses = []
val_losses = []

for epoch in range(nn_epochs):
    model.train()  # Set model to training mode
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for batch_X, batch_y, mask in train_loader:
        optimizer.zero_grad()  # Clear gradients

        # Forward pass
        #batch_X = batch_X.permute(0, 2, 1)
        outputs = model(batch_X)  # Shape: (batch_size, sequence_length, num_classes)
        #print("Outputs shape before reshaping:", outputs.shape)

        # If batch_y is one-hot encoded, convert to class indices
        if batch_y.dim() == 3:  # Check if labels are one-hot
          batch_y = batch_y.argmax(dim=2)  # Shape: (batch_size, sequence_length)

        # Reshape outputs and labels for CrossEntropyLoss
        outputs = outputs.reshape(-1, num_classes)  # Flatten to (batch_size * sequence_length, num_classes)
        batch_y = batch_y.reshape(-1)  # Flatten to (batch_size * sequence_length)
        mask = mask.reshape(-1)  # (batch_size * sequence_length)

        # Apply the mask to exclude padding positions (mask should be 1 for valid, 0 for padding)
        outputs_masked = outputs[mask]  # Select valid positions
        batch_y_masked = batch_y[mask]  # Select valid labels

        #print("Labels shape after reshaping:", batch_y.shape)
        #print("Outputs shape after reshaping:", outputs.shape)

        assert outputs.shape[0] == batch_y.shape[0], "Mismatch in output and label batch sizes"
        # Compute loss
        loss = loss_fn(outputs_masked, batch_y_masked)
        train_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Compute accuracy

        #mask = (batch_y != padding_label)  # Mask to exclude padding
        _, predicted = torch.max(outputs_masked, dim=1)  # Predicted class indices
        train_correct += (predicted == batch_y_masked).sum().item()
        train_total += mask.sum().item() # Count valid residues
    # Calculate epoch-level metrics
    train_accuracy = 100 * train_correct / train_total
    train_loss /= len(train_loader)

    # Validation loop
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for batch_X, batch_y, mask in val_loader:
            #batch_X = batch_X.permute(0, 2, 1)
            outputs = model(batch_X)
            if batch_y.dim() == 3:  # Check if labels are one-hot
              batch_y = batch_y.argmax(dim=2)
            # Reshape for loss computation
            outputs = outputs.reshape(-1, num_classes)
            batch_y = batch_y.reshape(-1)
            mask = mask.reshape(-1)

            # Apply the mask to exclude padding positions
            outputs_masked = outputs[mask]
            batch_y_masked = batch_y[mask]

            loss = loss_fn(outputs_masked, batch_y_masked)
            val_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs_masked, dim=1)
            val_correct += (predicted == batch_y_masked).sum().item()
            val_total += mask.sum().item()  # Count valid residues

    # Calculate validation metrics
    val_accuracy = 100 * val_correct / val_total
    val_loss /= len(val_loader)

    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    print(f"Epoch [{epoch+1}/{nn_epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")


Epoch [1/50]
Train Loss: 2.0667, Train Accuracy: 13.69%
Val Loss: 2.0539, Val Accuracy: 16.39%
Epoch [2/50]
Train Loss: 2.0473, Train Accuracy: 14.54%
Val Loss: 2.0424, Val Accuracy: 17.37%
Epoch [3/50]
Train Loss: 2.0386, Train Accuracy: 14.86%
Val Loss: 2.0355, Val Accuracy: 17.45%
Epoch [4/50]
Train Loss: 2.0347, Train Accuracy: 15.06%
Val Loss: 2.0339, Val Accuracy: 17.17%
Epoch [5/50]
Train Loss: 2.0326, Train Accuracy: 15.15%
Val Loss: 2.0328, Val Accuracy: 17.43%
Epoch [6/50]
Train Loss: 2.0311, Train Accuracy: 15.31%
Val Loss: 2.0320, Val Accuracy: 17.12%
Epoch [7/50]
Train Loss: 2.0305, Train Accuracy: 15.38%
Val Loss: 2.0344, Val Accuracy: 17.25%
Epoch [8/50]
Train Loss: 2.0301, Train Accuracy: 15.51%
Val Loss: 2.0330, Val Accuracy: 17.10%
Epoch [9/50]
Train Loss: 2.0277, Train Accuracy: 15.71%
Val Loss: 2.0318, Val Accuracy: 17.51%
Epoch [10/50]
Train Loss: 2.0246, Train Accuracy: 15.97%
Val Loss: 2.0336, Val Accuracy: 17.14%
Epoch [11/50]
Train Loss: 2.0229, Train Accuracy:

KeyboardInterrupt: 