## Loading pretrained model weights

If the loading of parameters was successful, a message should be printed out saying `Loaded parameters from /PATH/TO/WEIGHTS`.

Note that you will have to change the directories: `ROOT_DR` in `mpmodels/core/default_dirs.py` if loading the models outside of your home directory.

In [1]:
%load_ext autoreload
%autoreload 2

import os
from ptutils.models.utils import load_model
import mpmodels
from mpmodels.models.paths import PATHS
from mpmodels.models.layers import LAYERS
from mpmodels.core.feature_extractor import get_model_kwargs
from brainmodel_utils.models.utils import get_base_model_name, get_model_func_from_name, get_model_path_from_name, get_model_layers_from_name

In [2]:
def load_pretrained_model(model_name):
    model_name, trained = get_base_model_name(model_name)
    model_path = get_model_path_from_name(
                model_paths_dict=PATHS, model_name=model_name
            )
    if trained:
        # sometimes model functions automatically load the model ckpt, so no model_path needs to be passed in
        # in which case, we alert the user
        if model_path is None:
            print(
                "No model path was explictly passed in to this trained model. If you meant to do this and didn't load the model checkpoint some other way (e.g. in the model func), then please cancel and retry!"
            )
        else:
            assert os.path.isfile(model_path)
    else:
        assert model_path is None
    
    model_layers = get_model_layers_from_name(
            model_layers_dict=LAYERS, model_name=model_name
        )
    model = get_model_func_from_name(
            model_func_dict=mpmodels.models.__dict__,
            model_name=model_name,
            model_kwargs=get_model_kwargs(),
        )
    model = load_model(
        model=model, 
        trained=trained, 
        model_path=model_path,
    )
    return model, model_layers

# 1. VC-1 + Dynamics

## 1a. VC-1 + CTRNN, dynamics module pretrained on Physion

In [3]:
name = "pfVC1_CTRNN_physion"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/VC-1+CTRNN_physion.pt
 FrozenPretrainedEncoder(
  (encoder): VC1_pretrained(
    (vc1): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Lin

## 1b. VC-1 + CTRNN, dynamics module pretrained on Kinetics-700

In [4]:
name = "pfVC1_CTRNN_k700"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/VC-1+CTRNN_k700.pt
 FrozenPretrainedEncoder(
  (encoder): VC1_pretrained(
    (vc1): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear

## 1c. VC-1 + LSTM, dynamics module pretrained on Physion

In [5]:
name = "pfVC1_LSTM_physion"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/VC-1+LSTM_physion.pt
 FrozenPretrainedEncoder(
  (encoder): VC1_pretrained(
    (vc1): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Line

## 1d. VC-1 + LSTM, dynamics module pretrained on Kinetics-700

In [6]:
name = "pfVC1_LSTM_k700"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/VC-1+LSTM_k700.pt
 FrozenPretrainedEncoder(
  (encoder): VC1_pretrained(
    (vc1): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(

# 2. R3M + Dynamics

## 2a. R3M + CTRNN, dynamics module pretrained on Physion

In [7]:
name = "pfR3M_CTRNN_physion"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")



Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/R3M+CTRNN_physion.pt
 FrozenPretrainedEncoder(
  (encoder): R3M_pretrained(
    (r3m): DataParallel(
      (module): R3M(
        (cs): CosineSimilarity()
        (bce): BCELoss()
        (sigm): Sigmoid()
        (convnet): ResNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2)

## 2b. R3M + CTRNN, dynamics module pretrained on Kinetics-700

In [8]:
name = "pfR3M_CTRNN_k700"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/R3M+CTRNN_k700.pt
 FrozenPretrainedEncoder(
  (encoder): R3M_pretrained(
    (r3m): DataParallel(
      (module): R3M(
        (cs): CosineSimilarity()
        (bce): BCELoss()
        (sigm): Sigmoid()
        (convnet): ResNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): B

## 2c. R3M + LSTM, dynamics module pretrained on Physion

In [9]:
name = "pfR3M_LSTM_physion"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/R3M+LSTM_physion.pt
 FrozenPretrainedEncoder(
  (encoder): R3M_pretrained(
    (r3m): DataParallel(
      (module): R3M(
        (cs): CosineSimilarity()
        (bce): BCELoss()
        (sigm): Sigmoid()
        (convnet): ResNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2):

## 2d. R3M + LSTM, dynamics module pretrained on Kinetics-700

In [10]:
name = "pfR3M_LSTM_k700"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/R3M+LSTM_k700.pt
 FrozenPretrainedEncoder(
  (encoder): R3M_pretrained(
    (r3m): DataParallel(
      (module): R3M(
        (cs): CosineSimilarity()
        (bce): BCELoss()
        (sigm): Sigmoid()
        (convnet): ResNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): Ba

# 3. FitVid end-to-end pretrained on Physion with RandAugment (64x64 px images)

In [30]:
name = "fitvid_ctxt7_physion_aug_frames_64x64"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/FitVid_physion_64x64.pt
 FitVid(
  (encoder): NvaeEncoder(
    (blocks): ModuleList(
      (0): ModuleList(
        (0): EncoderBlock(
          (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
          (shortcut): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (normalize_input): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (normalize_hidden): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (normalize_residual): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (se): SEBlock(
            (squeeze): Conv2d(64, 4, kernel_size=(1, 1), stride=(1, 1))
            (expand): Conv2d(4, 64, kernel_size=(1, 1), stride=(1, 1))
          )
       

# 4. SVG end-to-end pretrained on Physion with 128x128 px images

In [31]:
name = "svg_physion_128x128"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/SVG_physion_128x128.pt
 SVG(
  (encoder): encoder(
    (c1): Sequential(
      (0): vgg_layer(
        (main): Sequential(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.2, inplace=True)
        )
      )
      (1): vgg_layer(
        (main): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.2, inplace=True)
        )
      )
    )
    (c2): Sequential(
      (0): vgg_layer(
        (main): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stat

# 5. C-SWM with Large Encoder end-to-end pretrained on Physion

In [32]:
name = "large_CSWM_physion"
model, model_layers = load_pretrained_model(name)
print("======= Model architecture =======\n", model)
print(f"======= Model layers =======\n{model_layers}")

Loaded parameters from /om/weka/yanglab/anayebi/mental-sim/trained_models/CSWM_large_physion.pt
 PassiveVideoCSWM(
  (model): ContrastiveSWM(
    (obj_extractor): EncoderCNNLarge(
      (cnn1): Conv2d(21, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act1): ReLU()
      (ln1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (cnn2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act2): ReLU()
      (ln2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (cnn3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act3): ReLU()
      (ln3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (cnn4): Conv2d(32, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act4): Sigmoid()
    )
    (obj_encoder): EncoderMLP(
      (fc1): Linear(in_features=50176, out_features=512, bias=True)
      (fc2): Linear(in_featur