# Model initialization

In [1]:
import os
import torch
from torchvision import transforms
import timm
from huggingface_hub import login, hf_hub_download
from PIL import Image

# Define the local directory where the model checkpoint is stored
local_dir = "assets/ckpts/uni2-h/"

# Model configuration parameters for ViT-Giant (Vision Transformer)
timm_kwargs = {
   'model_name': 'vit_giant_patch14_224',  # Vision Transformer with giant architecture and 14x14 patches
   'img_size': 224,                        # Input image size
   'patch_size': 14,                       # Patch size for patch embedding
   'depth': 24,                            # Number of transformer layers
   'num_heads': 24,                        # Number of attention heads
   'init_values': 1e-5,                    # Initial value for layer scaling
   'embed_dim': 1536,                      # Embedding dimension
   'mlp_ratio': 2.66667*2,                 # Ratio of MLP hidden dimension to embedding dimension
   'num_classes': 0,                       # No classification head (used for feature extraction)
   'no_embed_class': True,                 # Do not use class embedding
   'mlp_layer': timm.layers.SwiGLUPacked,  # Use SwiGLU as MLP activation function
   'act_layer': torch.nn.SiLU,             # Use SiLU as activation function
   'reg_tokens': 8,                        # Number of regression tokens
   'dynamic_img_size': True                # Support dynamic image sizes
}

# Create the model using timm with the specified configuration
model = timm.create_model(**timm_kwargs)


# Load pre-trained model weights from local checkpoint
model.load_state_dict(
    torch.load(os.path.join(local_dir, "pytorch_model.bin"), map_location="cpu"),
    strict=True
)

# Define the preprocessing pipeline for input images
transform = transforms.Compose([
    transforms.Resize(224),                               # Resize the shorter side of the image to 224
    transforms.CenterCrop(224),                           # Crop the center 224x224 region
    transforms.ToTensor(),                                # Convert image to PyTorch tensor
    transforms.Normalize(mean=(0.485, 0.456, 0.406),       # Normalize with ImageNet mean
                         std=(0.229, 0.224, 0.225))        # and standard deviation
])

# Set the model to evaluation mode (important for inference)
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1536, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((1536,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1536, out_features=4608, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1536, out_features=1536, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((1536,), eps=1e-06, elementwise_affine=True)
      (mlp): GluMlp(
        (fc1): Linear(in_features=1536, out_features=8192, bias=True)
        (act): SiLU()
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
    

In [2]:
from PIL import Image

@torch.no_grad()  # Disable gradient computation for inference to save memory and speed up
def extract_feature(img_path):
    """
    Extract a feature vector from an input image using the pretrained ViT model.

    Args:
        img_path (str): Path to the input image.

    Returns:
        np.ndarray: Extracted feature vector with shape (1536,).
    """
    image = Image.open(img_path).convert("RGB")        # Open image and convert to RGB
    image = transform(image).unsqueeze(0)              # Apply preprocessing and add batch dimension [1, 3, 224, 224]
    feature = model(image)                             # Run image through the model to get feature [1, 1536]
    return feature.squeeze().cpu().numpy()             # Remove batch dimension and move to CPU as NumPy array



In [3]:
import pandas as pd

# Load the full cell type annotation table from CSV
all_df = pd.read_csv(f'/home/lbh/projects_dir/BigSlice/Celltype_Annotations/all_annotations.csv')
all_df

Unnamed: 0.1,Unnamed: 0,spot,organ,subregion,level1_annotation,level2_annotation,level0_annotation,celltype_prediction_1,score_prediction_1,celltype_prediction_2,...,x_scaled_image,y_scaled_image,x_scaled_image_organ,y_scaled_image_organ,batch,organ_encoded,subregion_encoded,level0_annotation_encoded,level1_annotation_encoded,level2_annotation_encoded
0,0,AACAACGACAACAGGTCG_0_0_0,LU,Alveoli,epithelial cell,specialized epithelial cell,epithelial cell,epithelial cell,0.5136,B cell,...,1667.161481,3329.065277,441.161481,866.065277,CTRL_1,8,0,104,6,68
1,1,AACAACGACAGCCTACAA_0_0_0,LU,Alveoli,blood & immune cell,B cell,B cell,B cell,0.4480,epithelial cell,...,2030.518557,3122.431157,804.518557,659.431157,CTRL_1,8,0,0,1,0
2,2,AACAACGACCTAAGGCAC_0_0_0,LU,Alveoli,blood & immune cell,B cell,B cell,B cell,0.5535,type II pneumocyte,...,2017.541518,3351.525507,791.541518,888.525507,CTRL_1,8,0,0,1,0
3,3,AACAACGACCTGTCTCTA_0_0_0,LU,Alveoli,endothelial cell,endothelial cell,endothelial cell,endothelial cell,0.5310,lung endothelial cell,...,1747.619119,2929.273176,521.619119,466.273176,CTRL_1,8,0,89,5,15
4,4,AACAACGACTAGTGATCG_0_0_0,LU,Alveoli,blood & immune cell,B cell,B cell,B cell,0.5385,type II pneumocyte,...,2201.815464,3526.715304,975.815464,1063.715304,CTRL_1,8,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2330873,2330873,TTGTTGCGATCCGTCTCA_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.7839,regular atrial cardiac myocyte,...,1333.176195,3415.031487,972.176195,678.031487,LPS_2,4,17,61,13,7
2330874,2330874,TTGTTGGAGAACGTTCCA_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.6857,"dividing cell,proliferating cell",...,830.630740,3872.347340,469.630740,1135.347340,LPS_2,4,17,61,13,7
2330875,2330875,TTGTTGGAGCGCTTCTTC_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.5192,regular atrial cardiac myocyte,...,1289.138707,3249.142403,928.138707,512.142403,LPS_2,4,17,61,13,7
2330876,2330876,TTGTTGGAGCTGTGCATA_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.6362,myocardial endocrine cell,...,1672.523899,3661.623368,1311.523899,924.623368,LPS_2,4,17,61,13,7


# Train set (~10h)

In [4]:
import os

# Set number of CPU threads for parallel computation
cpu_num = 16
os.environ["OMP_NUM_THREADS"] = str(cpu_num)
os.environ["MKL_NUM_THREADS"] = str(cpu_num)

import torch
torch.set_num_threads(cpu_num)


from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm


# Define device (use GPU if available, fallback to CPU)
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # Move the model to the selected device
model.eval()              # Set model to evaluation mode

# Define image preprocessing pipeline
transform = transforms.Compose([
    transforms.Resize(224),                             # Resize the shorter edge to 224
    transforms.CenterCrop(224),                         # Center crop to 224x224
    transforms.ToTensor(),                              # Convert PIL image to tensor
    transforms.Normalize(mean=(0.485, 0.456, 0.406),     # Normalize with ImageNet stats
                         std=(0.229, 0.224, 0.225)),
])

# Define paths for data
patch_size = 128
train_csv = "/home/lbh/projects_dir/BigSlice/dataset/train_labels.csv"
train_img_dir = f"/home/lbh/projects_dir/BigSlice/dataset/images_norm_x{patch_size}/"
train_df = pd.read_csv(train_csv)  # Load training metadata

# Define label columns and their encoded counterparts
label_columns = ["organ", "subregion", "level1_annotation", "level2_annotation", "level0_annotation"]
encoded_columns = [f"{col}_encoded" for col in label_columns]

# Randomly sample 80,000 entries from the training set
train_df_sampled = train_df.sample(n=80000, random_state=42).reset_index(drop=True)

# Save the sampled subset to a new CSV file
train_df_sampled.to_csv("/home/lbh/projects_dir/BigSlice/dataset/train_8k_samples.csv", index=False)


In [None]:
# Feature and label extraction
X_train = []  # To store extracted features
y_train_levels = [[] for _ in range(5)]  # To store labels for each of the 5 annotation levels

batch_size = 64  # Batch size for inference; adjust based on available GPU memory

# Loop through the dataset in batches
for i in tqdm(range(0, len(train_df_sampled), batch_size), desc="Extracting train features"):
    batch_df = train_df_sampled.iloc[i:i+batch_size]  # Select current batch
    batch_imgs = []
    batch_labels = [[] for _ in range(5)]  # Initialize label collectors for this batch

    # Load and preprocess each image in the batch
    for _, row in batch_df.iterrows():
        img_path = os.path.join(train_img_dir, row['filename'] + ".jpg")  # Build image path
        img = Image.open(img_path).convert("RGB")                         # Open and convert image to RGB
        img_tensor = transform(img)                                       # Apply preprocessing
        batch_imgs.append(img_tensor)                                     # Add to batch
        for j, col in enumerate(encoded_columns):                         # Collect labels for each level
            batch_labels[j].append(row[col])
    
    # Stack images and move to device
    batch_tensor = torch.stack(batch_imgs).to(device)  # Shape: [B, 3, 224, 224]

    # Extract features with the model (no gradient computation)
    with torch.no_grad():
        batch_features = model(batch_tensor).cpu().numpy()  # Shape: [B, 1536]

    # Save features and labels
    X_train.append(batch_features)
    for j in range(5):
        y_train_levels[j].extend(batch_labels[j])

# Concatenate all features into a single array
X_train = np.concatenate(X_train, axis=0)  # Final shape: [N, 1536]

# Save features to .npy file
np.save(f"/home/lbh/projects_dir/BigSlice/dataset/uni_feature_{patch_size}.npy", X_train)


# Validation set (~3h)

In [5]:
patch_size = 128  # Define patch size used for image preprocessing

# Define paths for evaluation set
eval_csv = "/home/lbh/projects_dir/BigSlice/dataset/test_labels.csv"
eval_img_dir = f"/home/lbh/projects_dir/BigSlice/dataset/images_norm_x{patch_size}/"

# Load evaluation metadata
eval_df = pd.read_csv(eval_csv)

# Use the full evaluation set (you may sample if needed)
eval_df_sampled = eval_df

# Display the sampled evaluation DataFrame
eval_df_sampled


Unnamed: 0,filename,organ_encoded,subregion_encoded,level0_annotation_encoded,level1_annotation_encoded,level2_annotation_encoded
0,ACAACTGGATATGTGTGC_0_0_0,13,28,31,1,2
1,AGCCATCTGCGGAGATAT_0_0_0,6,32,148,6,32
2,CTGGATCTTCGATAATCC_1,2,24,219,1,40
3,TACACTTGCGGTCTATCA_0_0_0,9,27,63,13,66
4,CTACATGGCCAACCAGCT_1_0_0,2,9,133,15,20
...,...,...,...,...,...,...
466171,ATCGGAGTATACGAAGCC_0_0_0,9,27,226,13,66
466172,CAGGAGTGAGCTAGACTT_1,9,27,63,13,66
466173,GCTAACACGCCATAAGAG_0_0_0,13,28,45,6,5
466174,GCTCTATGTCAAGTCTCC_1_0,9,27,226,13,66


In [6]:
# Filter annotations for evaluation samples
# Match rows in all_df where the 'spot' value appears in the evaluation filenames
validation_df = all_df.loc[all_df['spot'].isin(eval_df_sampled['filename'])]

# Display the resulting validation DataFrame
validation_df

Unnamed: 0.1,Unnamed: 0,spot,organ,subregion,level1_annotation,level2_annotation,level0_annotation,celltype_prediction_1,score_prediction_1,celltype_prediction_2,...,x_scaled_image,y_scaled_image,x_scaled_image_organ,y_scaled_image_organ,batch,organ_encoded,subregion_encoded,level0_annotation_encoded,level1_annotation_encoded,level2_annotation_encoded
1,1,AACAACGACAGCCTACAA_0_0_0,LU,Alveoli,blood & immune cell,B cell,B cell,B cell,0.4480,epithelial cell,...,2030.518557,3122.431157,804.518557,659.431157,CTRL_1,8,0,0,1,0
11,11,AACACACGGTACAGAGCT_0_0_0,LU,Alveoli,blood & immune cell,B cell,B cell,B cell,0.5057,epithelial cell,...,2077.235895,3373.985738,851.235895,910.985738,CTRL_1,8,0,0,1,0
12,12,AACACACGGTACGGCATA_0_0_0,LU,Alveoli,epithelial cell,specialized epithelial cell,epithelial cell,epithelial cell,0.4006,T cell,...,1659.375258,3342.541415,433.375258,879.541415,CTRL_1,8,0,104,6,68
20,20,AACACTACGCGAACGGTA_0_0_0,LU,Alveoli,epithelial cell,pneumocyte,type I pneumocyte,type I pneumocyte,0.8655,endothelial cell,...,2282.273102,3522.223258,1056.273102,1059.223258,CTRL_1,8,0,339,6,58
24,24,AACACTACGGTCCACCAA_0_0_0,LU,Alveoli,epithelial cell,pneumocyte,type I pneumocyte,type I pneumocyte,1.0000,,...,2061.663449,3266.176632,835.663449,803.176632,CTRL_1,8,0,339,6,58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2330867,2330867,TTGTTGACCGTGTACACT_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.5973,fibroblast,...,747.736645,3558.503127,386.736645,821.503127,LPS_2,4,17,61,13,7
2330869,2330869,TTGTTGACCTCTTGACAG_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.8586,regular atrial cardiac myocyte,...,1426.432052,3603.338014,1065.432052,866.338014,LPS_2,4,17,61,13,7
2330870,2330870,TTGTTGCGAAGATGCGTC_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.6812,fibroblast,...,1550.773196,3728.875699,1189.773196,991.875699,LPS_2,4,17,61,13,7
2330872,2330872,TTGTTGCGAGTCTTCACA_1,HE,Heart Tissue,muscle cell,cardiac muscle cell,cardiac muscle cell,cardiac muscle cell,0.4873,fibroblast of cardiac tissue,...,1188.111528,3917.182227,827.111528,1180.182227,LPS_2,4,17,61,13,7


In [9]:
import os
# Set number of CPU threads for performance tuning
cpu_num = 16
os.environ["OMP_NUM_THREADS"] = str(cpu_num)
os.environ["MKL_NUM_THREADS"] = str(cpu_num)

import torch
torch.set_num_threads(cpu_num)

from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm


# Set device (use GPU if available, fallback to CPU)
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # Ensure the model is already loaded and moved to device
model.eval()              # Set model to evaluation mode

# Define image preprocessing pipeline
transform = transforms.Compose([
    transforms.Resize(224),                             # Resize the shorter edge to 224
    transforms.CenterCrop(224),                         # Center crop to 224x224
    transforms.ToTensor(),                              # Convert to tensor
    transforms.Normalize(mean=(0.485, 0.456, 0.406),     # Normalize with ImageNet stats
                         std=(0.229, 0.224, 0.225)),
])

# Define patch size and batch size for evaluation
patch_size = 128
batch_size = 64
X_test = []  # List to store extracted features (as NumPy arrays)

# Extract features in batches
for i in tqdm(range(0, len(validation_df), batch_size), desc="Extracting train features"):
    batch_df = validation_df.iloc[i:i+batch_size]
    batch_imgs = []

    # Load and preprocess images
    for _, row in batch_df.iterrows():
        img_path = os.path.join(eval_img_dir, row['spot'] + ".jpg")  # Image path from 'spot' ID
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img)
        batch_imgs.append(img_tensor)

    # Stack batch and move to device
    batch_tensor = torch.stack(batch_imgs).to(device)  # Shape: [B, 3, 224, 224]

    # Inference without gradient computation
    with torch.no_grad():
        batch_features = model(batch_tensor).cpu().numpy()  # Shape: [B, 1536]

    X_test.append(batch_features)

# Concatenate all features into a single array
X_test = np.concatenate(X_test, axis=0)  # Final shape: [N, 1536]

# Save extracted validation features to file
np.save(f"/home/lbh/projects_dir/BigSlice/dataset/uni_validation_feature_{patch_size}_all.npy", X_test)

Extracting train features: 100%|██████████| 7284/7284 [3:23:04<00:00,  1.67s/it]  


# Additional dataset (~50min)

In [None]:
# Define paths for downsampled evaluation set
eval_csv = "/home/lbh/projects_dir/BigSlice/evalset/evallist_down.csv"
eval_img_dir = f"/home/lbh/projects_dir/BigSlice/evalset/images_x{patch_size}_down/"

# Load the evaluation metadata
eval_df = pd.read_csv(eval_csv)
eval_df_sampled = eval_df  # Use the full set; you may sample if needed

batch_size = 64  # Batch size for feature extraction; adjust based on GPU memory

X_test = []  # List to store extracted features

# Iterate through evaluation data in batches
for i in tqdm(range(0, len(eval_df_sampled), batch_size), desc="Extracting train features"):
    batch_df = eval_df_sampled.iloc[i:i+batch_size]
    batch_imgs = []

    # Load and preprocess each image
    for _, row in batch_df.iterrows():
        img_path = os.path.join(eval_img_dir, row['filename'] + ".jpg")  # Construct image path
        img = Image.open(img_path).convert("RGB")                         # Open and convert to RGB
        img_tensor = transform(img)                                       # Apply preprocessing
        batch_imgs.append(img_tensor)

    # Stack into a batch tensor and move to device
    batch_tensor = torch.stack(batch_imgs).to(device)  # Shape: [B, 3, 224, 224]

    # Extract features without computing gradients
    with torch.no_grad():
        batch_features = model(batch_tensor).cpu().numpy()  # Shape: [B, 1536]

    X_test.append(batch_features)

# Concatenate all batch features into one array
X_test = np.concatenate(X_test, axis=0)  # Final shape: [N, 1536]

# Save features to .npy file
np.save(f"/home/lbh/projects_dir/BigSlice/dataset/uni_test_feature_{patch_size}.npy", X_test)
