[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crunchdao/quickstarters/blob/master/competitions/broad-2/quickstarters/random-submission/random-submission.ipynb)

![Banner](https://raw.githubusercontent.com/crunchdao/quickstarters/refs/heads/master/competitions/broad-2/assets/banner.webp)

In [None]:
%pip install --upgrade crunch-cli

Get a new token: https://hub.crunchdao.com/competitions/broad-2/submit/via/notebook

In [None]:
!crunch setup --notebook broad-2 hello --token aaaabbbbccccddddeeeeffff

# To retrieve a larger dataset, include the --size large argument as shown below:
#!crunch setup --notebook --size large broad-2 hello --token aaaabbbbccccddddeeeeffff

In [None]:
!pip install spatialdata

In [2]:
import spatialdata
import scanpy
import numpy
import pandas
import os

In [None]:
import crunch
crunch = crunch.load_notebook()

In [30]:
def log1p_normalization(arr):
    return numpy.log1p((arr/numpy.sum(arr, axis=1, keepdims=True)) * 10000)

In [34]:
# In the training function, users build and train the model to make inferences on the test data.
# Your model must be stored in the `model_directory_path`.
def train(
    data_directory_path: str, 
    model_directory_path: str
):    
    # Loading scRNAseq data
    scRNAseq = scanpy.read_h5ad(os.path.join(data_directory_path, 'Crunch2_scRNAseq.h5ad'))
    
    # Loading Spatial Data
    # UC1_NI.zarr is an example among the available samples. 
    sdata = spatialdata.read_zarr(os.path.join(data_directory_path, 'UC1_NI.zarr'))
        
    # TODO Put your train code here!    
    

In [33]:
# In the inference function, the trained model is loaded and used to make inferences on a
# sample of data that matches the characteristics of the training test.
def infer(
    data_file_path: str,
):
    data_path = os.path.dirname(data_file_path)
    
    # Load the list of genes to predict if not already loaded                 
    if not hasattr(infer, "gene_list"):
        print('Loading Genes to predict')
        infer.gene_list = pandas.read_csv(os.path.join(data_path, 'Crunch2_gene_list.csv'))        
    
    gene_names = infer.gene_list['gene_symbols']
    
    # Load the spatial data file to make predictions
    print(f"Loading spatial data from {data_file_path}...")
    sdata = spatialdata.read_zarr(data_file_path)
    
    # Identify the cells to predict: cells in 'test' or 'validation' groups 
    cell_ids = sdata["cell_id-group"].obs.query("group == 'test' or group == 'validation'")["cell_id"]
    
    # Generate random predictions as a placeholder
    # Replace this with the actual model inference  
    values = numpy.random.rand(len(cell_ids), len(gene_names))
    prediction = pandas.DataFrame(values, index=cell_ids, columns=gene_names)
    
    # Apply log1p normalization and round to 2 decimal points
    prediction.iloc[:, :] = numpy.round(log1p_normalization(prediction.values), 2)    
    
    return prediction

In [None]:
# This command is running a local test with your submission
# making sure that your submission can be accepted by the system
crunch.test(
    no_determinism_check=True,
)

Now remember to download this notebook and then submit it at https://hub.crunchdao.com/competitions/broad-2/submit/