In [1]:
import numpy as np
import scanpy as sc
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

def preprocess_data_for_vit(adata, image_size=(224, 224)):
    data = adata.X.A if hasattr(adata.X, 'A') else adata.X
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    
    # Reshape data into a square image-like format
    data_reshaped = data_scaled.reshape(-1, image_size[0], image_size[1])
    
    # Convert to 3 channels if needed (e.g., RGB)
    data_reshaped = np.stack([data_reshaped]*3, axis=-1)
    return data_reshaped

# Example usage
adata = sc.read_h5ad('/home/yash-kumar-gola/Documents/SPACEL-main/dataset/visium_human_breast_cancer/human_bc_spatial_1142243F.h5ad')
images = preprocess_data_for_vit(adata)


ValueError: cannot reshape array of size 72372352 into shape (224,224)

In [2]:
import numpy as np
import scanpy as sc
from sklearn.preprocessing import MinMaxScaler

def preprocess_data_for_vit(adata, target_size=(224, 224)):
    data = adata.X.A if hasattr(adata.X, 'A') else adata.X
    num_cells, num_genes = data.shape
    
    # Normalize the data
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    
    # Calculate the required size for reshaping
    required_size = target_size[0] * target_size[1]
    if num_genes > required_size:
        # Truncate the data if more genes than pixels
        data_scaled = data_scaled[:, :required_size]
    elif num_genes < required_size:
        # Pad the data if fewer genes than pixels
        padding = np.zeros((num_cells, required_size - num_genes))
        data_scaled = np.hstack((data_scaled, padding))
    
    # Reshape to image format (assuming single channel here)
    data_reshaped = data_scaled.reshape(num_cells, target_size[0], target_size[1])
    
    # Convert to 3 channels (e.g., grayscale to RGB)
    data_reshaped = np.stack([data_reshaped]*3, axis=-1)
    return data_reshaped

# Example usage
adata = sc.read_h5ad('/home/yash-kumar-gola/Documents/SPACEL-main/dataset/visium_human_breast_cancer/human_bc_spatial_1142243F.h5ad')
images = preprocess_data_for_vit(adata)
