In [1]:
#pip install vit_pytorch

In [2]:
import torch
from PIL import Image
from torchvision import transforms

# Load the image
image_path = 'image_slice.png'
image = Image.open(image_path)

# Preprocess the image
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
image_tensor = preprocess(image).unsqueeze(0)

In [3]:
from vit_pytorch.twins_svt import TwinsSVT

model = TwinsSVT(
    num_classes=1000,       # number of output classes
    s1_emb_dim=64,          # stage 1 - patch embedding projected dimension
    s2_emb_dim=128,         # stage 2 - patch embedding projected dimension
    s3_emb_dim=256,         # stage 3 - patch embedding projected dimension
    s4_emb_dim=512,         # stage 4 - patch embedding projected dimension
    s1_depth=1,
    s2_depth=1,
    s3_depth=5,
    s4_depth=4,
    peg_kernel_size=3,      # positional encoding generator kernel size
    dropout=0.              # dropout
)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
image_tensor = image_tensor.to(device)

# Forward pass to get image embeddings
model.eval()
with torch.no_grad():
    outputs = model(image_tensor)
    image_embeddings = outputs  # Shape: (batch_size, num_classes)

In [20]:
class ClassifierHead(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ClassifierHead, self).__init__()
        self.fc = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.fc(x)

# Define the classifier head with correct input dimension
num_classes = 1000
classifier_head = ClassifierHead(1000, num_classes).to(device)

# Assuming the outputs from the TwinsSVT model need to be flattened
image_embeddings_flattened = image_embeddings.view(image_embeddings.size(0), -1)

# image_embeddings_flattened.shape
print(image_embeddings_flattened.shape)

# Forward pass through classifier head
logits = classifier_head(image_embeddings_flattened)

torch.Size([1, 1000])


In [22]:
import pandas as pd

# Load the gene embeddings
gene_embeddings_path = 'gene_embeddings.csv'
gene_embeddings = pd.read_csv(gene_embeddings_path)

# Convert gene embeddings to tensor and adjust shape if needed
gene_embeddings_tensor = torch.tensor(gene_embeddings.values, dtype=torch.float32).unsqueeze(0).to(device)

# Ensure gene embeddings and image embeddings have the same batch size
batch_size = image_embeddings.size(0)
gene_embeddings_tensor = gene_embeddings_tensor.repeat(batch_size, 1, 1)

gene_embeddings_tensor = torch.reshape(gene_embeddings_tensor, (1175, 512))

# gene_embeddings_tensor.shape
print(gene_embeddings_tensor.shape)

# Combine the embeddings
combined_embeddings = torch.cat((image_embeddings_flattened, gene_embeddings_tensor), dim=-1)  # Concatenate along the feature dimension

torch.Size([1175, 512])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 1 but got size 1175 for tensor number 1 in the list.

In [20]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss

optimizer = Adam(list(model.parameters()) + list(classifier_head.parameters()), lr=1e-5)
criterion = CrossEntropyLoss()

# Define the number of epochs
num_epochs = 10  # Adjust as needed

# Dummy labels for demonstration purposes
labels = torch.randint(0, num_classes, (batch_size,)).to(device)

# Training loop
for epoch in range(num_epochs):
    model.train()
    classifier_head.train()
    optimizer.zero_grad()

    # Forward pass
    logits = classifier_head(combined_embeddings)
    loss = criterion(logits, labels)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

In [None]:
model.eval()
classifier_head.eval()
with torch.no_grad():
    combined_output = combined_embeddings.view(batch_size, -1)
    logits = classifier_head(combined_output)
    probs = logits.softmax(dim=-1)