#### Copyright 2025 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications

*Yana Hasson, Pauline Luc, Liliane Momeni, Maks Ovsjanikov, Guillaume Le Moing, Alina Kuznetsova, Ira Ktena, Jennifer J. Sun, Skanda Koppula, Dilara Gokay, Joseph Heyward, Etienne Pot, Andrew Zisserman*

[Paper](https://arxiv.org/abs/2507.03578) | [GitHub](https://github.com/google-deepmind/scivid)


# Colab demo

This Colab provides a hands-on demonstration to:

- Visualize data samples from the five scientific video datasets included in SciVid.

![image](https://storage.googleapis.com/scivid/assets/scivid_overview.gif)

- Initialize and visualize a model composed of a video backbone and a task-specific readout.

- Run inference and visualize predictions.

<img src="https://storage.googleapis.com/scivid/assets/evaluation_overview.png" alt="Evaluation overview" width="55%">

**Evaluation overview**. For each task, we train a lightweight readout on top of the backbone.


# Setup SciVid code and data

In [None]:
# @title Download SciVid code from github

# Set directory to which the scivid code will be downloaded
SCIVID_CODE_DIR = '/content/scivid'  # @param {type:"string", isTemplate: true}

# Download scivid code from github to target directory
!git clone https://github.com/google-deepmind/scivid {SCIVID_CODE_DIR}


In [None]:
# @title Install scivid with dependencies
import os
import sys

# Install scivid repository in editable mode,
!pip install -e {SCIVID_CODE_DIR}

# Add the parent directory of the scivid package to sys.path.
scivid_parent_dir = os.path.dirname(SCIVID_CODE_DIR)
if scivid_parent_dir not in sys.path:
    sys.path.append(scivid_parent_dir)


In [None]:
# @title Mount the SciVid data bucket
SCIVID_DATA_DIR = '/content/data/scivid'  # @param {type:"string", isTemplate: true}

# Create local folder where SciVid data will be mounted
!mkdir -p {SCIVID_DATA_DIR}

# Install gcsfuse using fix from stackoverflow.com/q/68568808
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!echo "deb https://packages.cloud.google.com/apt gcsfuse-bionic main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!apt-get update
!apt-get install gcsfuse

# Authenticate with Google Cloud (if not already done)
from google.colab import auth
auth.authenticate_user()

# Mount the bucket
!gcsfuse --implicit-dirs scivid {SCIVID_DATA_DIR}


In [None]:
# @title Set SCIVID_DATA_DIR environment variable to data location

# This variable is used by the data readers to locate the data.
%env SCIVID_DATA_DIR={SCIVID_DATA_DIR}

# Initialize [kauldron](https://github.com/google-research/kauldron) trainer and display model and example batch


In [None]:
# @title Initialize kauldron trainer.

from etils import ecolab
from kauldron import kd
from scivid.configs import launch_config


MODEL_NAME = "mock_model"  # @param ["mock_model", "hf_videomae", "scaling4d"] {allow-input: true}
EVAL_NAME = "calms21_classification"  # @param ["flyvsfly_classification", "calms21_classification", "typhoon_future_pred", "weatherbench_future_pred", "stir_2d_tracking"] {allow-input: true}

# If using scaling4d model, download pretrained checkpoint.
# We load the weights from the 4DS-B-dist-e released checkpoint which is a
# B model distilled from 4DS-e. We refer readers to Table 2 of the
# [Scaling 4D Representation](https://arxiv.org/pdf/2412.15212)
# paper for a comparison between different 4DS model variants.
SCALING4D_CHECKPOINT_LOCAL_DIR = "/content/models"  # @param {type:"string"}
SCALING4D_CHECKPOINT_NAME = "scaling4d_dist_b.npz"  # @param {type:"string"}

# Set appropriate environment variable
%env SCALING4D_CHECKPOINT_PATH={SCALING4D_CHECKPOINT_LOCAL_DIR}/{SCALING4D_CHECKPOINT_NAME}

if MODEL_NAME == "scaling4d":
  !mkdir -p {SCALING4D_CHECKPOINT_LOCAL_DIR}
  # Construct the checkpoint URL
  checkpoint_url = os.path.join(
      "https://storage.googleapis.com/representations4d/checkpoints",
      SCALING4D_CHECKPOINT_NAME,
  )
  # Download checkpoint to local directory
  print(
      f"Downloading {checkpoint_url} to"
      f" {SCALING4D_CHECKPOINT_LOCAL_DIR}"
  )
  !wget -O {SCALING4D_CHECKPOINT_LOCAL_DIR}/{SCALING4D_CHECKPOINT_NAME} {checkpoint_url}

# Where the experiment artefacts are stored
WORKDIR = "/content/tmp/workdir"  # @param {type:"string"}

# Initialize the kd.train.Trainer configwhich defines which model, dataset and
# metrics to use
cfg = launch_config.get_config(f"{MODEL_NAME}:{EVAL_NAME}")
cfg.workdir = WORKDIR

with ecolab.collapse('Config (modified)'):
  cfg;

trainer = kd.konfig.resolve(cfg)

In [None]:
# @title Get and visualize one batch from the training dataset

# Get first batch from the training dataset
batch = next(iter(trainer.train_ds))

with ecolab.collapse("Batch statistics"):
  ecolab.disp(kd.inspect.get_batch_stats(batch))

with ecolab.collapse("Batch images"):
  kd.inspect.plot_batch(batch)

In [None]:
# @title Visualize model architecture
model = trainer.model

model_overview = kd.inspect.get_colab_model_overview(
    model=model,
    # model_config=None if cfg is None else cfg.model,
    train_ds=trainer.train_ds,
    ds_sharding=trainer.sharding.ds,
    rngs=trainer.rng_streams.init_rngs(),
)

# Compute total number of parameters
total_params = model_overview['Own Params'].sum()

# Display model structure in collapsible window.
with ecolab.collapse(f'Model Overview (#Params: {total_params:,})'):
  model_overview;


In [None]:
# @title Run forward pass

# Add metrics, losses and summaries
aux_wrapper = kd.train.Auxiliaries(
    losses=trainer.train_losses,
    metrics=trainer.train_metrics,
    summaries=trainer.train_summaries,
)

# Initialize model state
trainstep = kd.train.TrainStep(
    model=trainer.model,
    optimizer=trainer.optimizer,
    rng_streams=trainer.rng_streams,
    sharding=trainer.sharding,
    init_transform=trainer.init_transform,
    aux=aux_wrapper,
)
element_spec = trainer.train_ds.element_spec
state = trainstep.init(element_spec)


# Run the model
context = kd.train.Context.from_state_and_batch(state=state, batch=batch)
context = kd.train.forward(
    context,
    model=trainer.model,
    rngs=trainstep.rng_streams.train_rngs(state.step),
    is_training=True,
)

# Display prediction summary
predictions = context.preds
with ecolab.collapse('Predictions'):
  ecolab.disp(kd.inspect.get_batch_stats(predictions))