# Autoencoder for epitope

In [1]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn as nn
import torch
import numpy as np
from autoencoder import ConvAutoEncoder
import pandas as pd
use_gpu=True
if use_gpu and torch.cuda.is_available():
    device = torch.device("cuda:0")
elif use_gpu and torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
num_epochs = 100

num_epochs = 100
batch_size=64

### Train autoencoder

In [3]:
epitope_tensor=torch.load("../../../datasets/datasets_kidera/autoencoder_epitope_train/epitope_tensor.pt")

  epitope_tensor=torch.load("../../../datasets/datasets_kidera/autoencoder_epitope_train/epitope_tensor.pt")


In [2]:

X_train_epitope,X_val_epitope=train_test_split(epitope_tensor,test_size=0.3,random_state=42)

# DataLoaders
train_loader = DataLoader(TensorDataset(X_train_epitope), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_epitope), batch_size=batch_size)
model = ConvAutoEncoder(20,latent_dim=64).to(device)
optimizer = optim.Adam(model.parameters(),  lr=1e-3, weight_decay=1e-5)
criterion = nn.MSELoss()


train_losses, test_losses = [], []

# Training
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch in train_loader:
        batch_x = batch[0].to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_x)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * batch_x.size(0)

    train_losses.append(epoch_loss / len(train_loader.dataset))

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            val_x = batch[0].to(device)
            val_outputs = model(val_x)
            loss = criterion(val_outputs, val_x)
            val_loss += loss.item() * val_x.size(0)
    test_losses.append(val_loss / len(val_loader.dataset))

    if (epoch + 1) % 10 == 0:
        print(f"[{epoch+1}/{num_epochs}] Train Loss: {train_losses[-1]:.4f} | Val Loss: {test_losses[-1]:.4f}")

  epitope_tensor=torch.load("../../../datasets/datasets_kidera/autoencoder_epitope_train/epitope_tensor.pt")


[10/100] Train Loss: 0.3540 | Val Loss: 0.3524



KeyboardInterrupt



### Save autoencoder

In [3]:
torch.save(model.state_dict(), '/projects/tcr_nlp/conv_autoencoder/conv/epitope.pth')

### Test autoencoder

In [4]:
model=ConvAutoEncoder(linear=20,latent_dim=64)
model.load_state_dict(torch.load('/projects/tcr_nlp/conv_autoencoder/conv/epitope.pth'))
def get_encoded_epitope(epitope_enc, model, batch_size, device='cuda'):        
    """
    Pass epitope encodings through an autoencoder and return both encoded (latent) and decoded outputs.

    Args:
        epitope_enc (Tensor): Input tensor of encoded epitopes (e.g., physicochemical features).
        model (nn.Module): Trained autoencoder model.
        batch_size (int): Batch size for processing.
        device (str): Device for computation ('cuda' or 'cpu').

    Returns:
        Tuple[Tensor, Tensor]: 
            - Encoded latent representations (shape: [N, latent_dim])
            - Reconstructed epitopes (same shape as input)
    """
    model.eval()
    model.to(device)
    test_loader = DataLoader(TensorDataset(epitope_enc), batch_size=batch_size)
    encoded_epitope, decoded_epitope = [], []

    with torch.no_grad():
        for batch in test_loader:
            x = batch[0].to(device)
            latent = model.linear_encode(model.encoder(x))
            decoded = model(x)
            encoded_epitope.append(latent.cpu())
            decoded_epitope.append(decoded.cpu())

    return torch.cat(encoded_epitope), torch.cat(decoded_epitope)
epitope_tensor=torch.load('../../../datasets/datasets_kidera/autoencoder_epitope_train/epitopes_test_tensor.pt')
encoded_epitope,decoded_epitope = get_encoded_epitope(epitope_tensor,model,64)

  model.load_state_dict(torch.load('/projects/tcr_nlp/conv_autoencoder/conv/epitope.pth'))
  epitope_tensor=torch.load('../../../datasets/datasets_kidera/autoencoder_epitope_train/epitopes_test_tensor.pt')


In [5]:
np.save('../../../datasets/datasets_kidera/autoencoder_epitope_train/encoded_epitope.npy', encoded_epitope.numpy())
np.save('../../../datasets/datasets_kidera/check_quality/decoded_epitope.npy', decoded_epitope.numpy())