# Load wandb metrics and create nicer plots

In [11]:
%load_ext autoreload
%autoreload 2

In [35]:
import os
from copy import deepcopy
from pathlib import Path

import dotenv
import molfeat
import pandas as pd
import torch
import torch.nn as nn
from hydra import compose, initialize
from hydra.core.global_hydra import GlobalHydra
from hydra.utils import instantiate
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import WandbLogger
from omegaconf import DictConfig, OmegaConf, open_dict

import wandb
from src import utils
from src.models.jump_cl import BasicJUMPModule
from src.utils import instantiate_evaluator_list

In [13]:
for i in range(1, 4):
    if not Path(f"../cpjump{i}/jump/").exists():
        print(f"Mounting cpjump{i}...")
        os.system(f"sshfs bioclust:/projects/cpjump{i}/ ../cpjump{i}")
    else:
        print(f"cpjump{i} already mounted.")

Mounting cpjump1...
Mounting cpjump2...
Mounting cpjump3...


In [14]:
run = "2023-08-17_13-32-50/0"
epoch = 41

In [15]:
ckpt_str = "../cpjump1/jump/logs/train/multiruns/{run}/checkpoints/epoch_{epoch:0>3}.ckpt"

run_dict = {
    "small1": (run := "2023-08-16_11-59-26/0", "small_jump_cl", epoch := 43, ckpt_str.format(run=run, epoch=epoch)),
    "small": (run := "2023-08-17_13-32-50/0", "small_jump_cl", epoch := 41, ckpt_str.format(run=run, epoch=epoch)),
    "med": (run := "2023-08-07_11-55-54", "med_jump_cl", epoch := 5, ckpt_str.format(run=run, epoch=epoch)),
    "big": (run := "2023-08-01_11-37-40", "big_jump_cl", epoch := 1, ckpt_str.format(run=run, epoch=epoch)),
}

In [16]:
run, experiment, epoch, ckpt = run_dict["small"]

In [17]:
os.system(f"cat ../cpjump1/jump/logs/train/multiruns/{run}/.hydra/overrides.yaml");

- experiment=small_jump_cl
- trainer.devices=[0,2]
- data.num_workers=16
- data.prefetch_factor=2
- model/criterion=frozen_contrastive.yaml


In [18]:
os.listdir(f"../cpjump1/jump/logs/train/multiruns/{run}/checkpoints")

['last.ckpt', 'epoch_041.ckpt']

In [19]:
initialize(version_base=None, config_path="../configs/")

hydra.initialize()

In [20]:
run, experiment, epoch, ckpt = run_dict["small"]

In [21]:
cfg = compose(
    config_name="train.yaml",
    overrides=[
        "evaluate=true",
        "eval=retrieval",
        "paths.projects_dir=..",
        f"paths.output_dir=../cpjump1/jump/logs/train/multiruns/{run}",
        "experiment=fp_small",
        "data.batch_size=4",
        # "model/molecule_encoder=gin_masking.yaml",
        "trainer.devices=1",
        # "eval.moa_image_task.datamodule.data_root_dir=../",
    ],
)
print(OmegaConf.to_yaml(cfg))

task_name: train
tags:
- small_jump_cl
- fingerprints
- clip_like
- ${model.image_encoder.instance_model_name}
train: true
test: true
evaluate: true
compile: false
ckpt_path: null
seed: 12345
data:
  compound_transform:
    _target_: src.modules.compound_transforms.fp_transform.FPTransform
    fps:
    - maccs
    - ecfp
    compound_str_type: inchi
    params:
      ecfp:
        radius: 2
  _target_: src.models.jump_cl.datamodule.BasicJUMPDataModule
  batch_size: 4
  num_workers: 24
  pin_memory: null
  prefetch_factor: 3
  drop_last: true
  transform:
    _target_: src.modules.transforms.DefaultJUMPTransform
    _convert_: object
    size: 128
    dim:
    - -2
    - -1
  force_split: false
  splitter:
    _target_: src.splitters.ScaffoldSplitter
    train: 800
    test: 200
    val: 100
    retrieval: 0
  use_compond_cache: false
  data_root_dir: ${paths.projects_dir}/
  split_path: ${paths.split_path}/fp_small/
  dataloader_config:
    train:
      batch_size: ${data.batch_size}
 

In [61]:
dm = instantiate(cfg.data)



In [63]:
dm.prepare_data()

In [64]:
dm.setup("train")

In [65]:
dm.val_dataset

MoleculeImageDataset(n_compounds=100, n_images=2860)

In [67]:
dl = dm.val_dataloader()



In [68]:
b = next(iter(dl))



In [74]:
cfg.model["_target_"] += ".load_from_checkpoint"
with open_dict(cfg.model):
    cfg.model["checkpoint_path"] = ckpt

In [75]:
model = instantiate(cfg.model)

InstantiationException: Error in call to target 'lightning.pytorch.core.module.LightningModule.load_from_checkpoint':
RuntimeError('Error(s) in loading state_dict for BasicJUMPModule:\n\tMissing key(s) in state_dict: "molecule_encoder.backbone.0.0.weight", "molecule_encoder.backbone.0.0.bias", "molecule_encoder.backbone.0.1.weight", "molecule_encoder.backbone.0.1.bias", "molecule_encoder.backbone.0.1.running_mean", "molecule_encoder.backbone.0.1.running_var", "molecule_encoder.backbone.1.0.weight", "molecule_encoder.backbone.1.0.bias", "molecule_encoder.backbone.1.1.weight", "molecule_encoder.backbone.1.1.bias", "molecule_encoder.backbone.1.1.running_mean", "molecule_encoder.backbone.1.1.running_var", "molecule_encoder.backbone.2.0.weight", "molecule_encoder.backbone.2.0.bias", "molecule_encoder.backbone.2.1.weight", "molecule_encoder.backbone.2.1.bias", "molecule_encoder.backbone.2.1.running_mean", "molecule_encoder.backbone.2.1.running_var", "molecule_encoder.backbone.3.0.weight", "molecule_encoder.backbone.3.0.bias", "molecule_encoder.backbone.3.1.weight", "molecule_encoder.backbone.3.1.bias", "molecule_encoder.backbone.3.1.running_mean", "molecule_encoder.backbone.3.1.running_var", "molecule_encoder.backbone.4.0.weight", "molecule_encoder.backbone.4.0.bias". \n\tUnexpected key(s) in state_dict: "molecule_backbone.node_embeddings.0.weight", "molecule_backbone.node_embeddings.1.weight", "molecule_backbone.gnn_layers.0.mlp.0.weight", "molecule_backbone.gnn_layers.0.mlp.0.bias", "molecule_backbone.gnn_layers.0.mlp.2.weight", "molecule_backbone.gnn_layers.0.mlp.2.bias", "molecule_backbone.gnn_layers.0.edge_embeddings.0.weight", "molecule_backbone.gnn_layers.0.edge_embeddings.1.weight", "molecule_backbone.gnn_layers.0.bn.weight", "molecule_backbone.gnn_layers.0.bn.bias", "molecule_backbone.gnn_layers.0.bn.running_mean", "molecule_backbone.gnn_layers.0.bn.running_var", "molecule_backbone.gnn_layers.0.bn.num_batches_tracked", "molecule_backbone.gnn_layers.1.mlp.0.weight", "molecule_backbone.gnn_layers.1.mlp.0.bias", "molecule_backbone.gnn_layers.1.mlp.2.weight", "molecule_backbone.gnn_layers.1.mlp.2.bias", "molecule_backbone.gnn_layers.1.edge_embeddings.0.weight", "molecule_backbone.gnn_layers.1.edge_embeddings.1.weight", "molecule_backbone.gnn_layers.1.bn.weight", "molecule_backbone.gnn_layers.1.bn.bias", "molecule_backbone.gnn_layers.1.bn.running_mean", "molecule_backbone.gnn_layers.1.bn.running_var", "molecule_backbone.gnn_layers.1.bn.num_batches_tracked", "molecule_backbone.gnn_layers.2.mlp.0.weight", "molecule_backbone.gnn_layers.2.mlp.0.bias", "molecule_backbone.gnn_layers.2.mlp.2.weight", "molecule_backbone.gnn_layers.2.mlp.2.bias", "molecule_backbone.gnn_layers.2.edge_embeddings.0.weight", "molecule_backbone.gnn_layers.2.edge_embeddings.1.weight", "molecule_backbone.gnn_layers.2.bn.weight", "molecule_backbone.gnn_layers.2.bn.bias", "molecule_backbone.gnn_layers.2.bn.running_mean", "molecule_backbone.gnn_layers.2.bn.running_var", "molecule_backbone.gnn_layers.2.bn.num_batches_tracked", "molecule_backbone.gnn_layers.3.mlp.0.weight", "molecule_backbone.gnn_layers.3.mlp.0.bias", "molecule_backbone.gnn_layers.3.mlp.2.weight", "molecule_backbone.gnn_layers.3.mlp.2.bias", "molecule_backbone.gnn_layers.3.edge_embeddings.0.weight", "molecule_backbone.gnn_layers.3.edge_embeddings.1.weight", "molecule_backbone.gnn_layers.3.bn.weight", "molecule_backbone.gnn_layers.3.bn.bias", "molecule_backbone.gnn_layers.3.bn.running_mean", "molecule_backbone.gnn_layers.3.bn.running_var", "molecule_backbone.gnn_layers.3.bn.num_batches_tracked", "molecule_backbone.gnn_layers.4.mlp.0.weight", "molecule_backbone.gnn_layers.4.mlp.0.bias", "molecule_backbone.gnn_layers.4.mlp.2.weight", "molecule_backbone.gnn_layers.4.mlp.2.bias", "molecule_backbone.gnn_layers.4.edge_embeddings.0.weight", "molecule_backbone.gnn_layers.4.edge_embeddings.1.weight", "molecule_backbone.gnn_layers.4.bn.weight", "molecule_backbone.gnn_layers.4.bn.bias", "molecule_backbone.gnn_layers.4.bn.running_mean", "molecule_backbone.gnn_layers.4.bn.running_var", "molecule_backbone.gnn_layers.4.bn.num_batches_tracked", "molecule_head.weight", "molecule_head.bias", "molecule_encoder.projection_head.weight", "molecule_encoder.projection_head.bias", "molecule_encoder.backbone.node_embeddings.0.weight", "molecule_encoder.backbone.node_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.0.mlp.0.weight", "molecule_encoder.backbone.gnn_layers.0.mlp.0.bias", "molecule_encoder.backbone.gnn_layers.0.mlp.2.weight", "molecule_encoder.backbone.gnn_layers.0.mlp.2.bias", "molecule_encoder.backbone.gnn_layers.0.edge_embeddings.0.weight", "molecule_encoder.backbone.gnn_layers.0.edge_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.0.bn.weight", "molecule_encoder.backbone.gnn_layers.0.bn.bias", "molecule_encoder.backbone.gnn_layers.0.bn.running_mean", "molecule_encoder.backbone.gnn_layers.0.bn.running_var", "molecule_encoder.backbone.gnn_layers.0.bn.num_batches_tracked", "molecule_encoder.backbone.gnn_layers.1.mlp.0.weight", "molecule_encoder.backbone.gnn_layers.1.mlp.0.bias", "molecule_encoder.backbone.gnn_layers.1.mlp.2.weight", "molecule_encoder.backbone.gnn_layers.1.mlp.2.bias", "molecule_encoder.backbone.gnn_layers.1.edge_embeddings.0.weight", "molecule_encoder.backbone.gnn_layers.1.edge_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.1.bn.weight", "molecule_encoder.backbone.gnn_layers.1.bn.bias", "molecule_encoder.backbone.gnn_layers.1.bn.running_mean", "molecule_encoder.backbone.gnn_layers.1.bn.running_var", "molecule_encoder.backbone.gnn_layers.1.bn.num_batches_tracked", "molecule_encoder.backbone.gnn_layers.2.mlp.0.weight", "molecule_encoder.backbone.gnn_layers.2.mlp.0.bias", "molecule_encoder.backbone.gnn_layers.2.mlp.2.weight", "molecule_encoder.backbone.gnn_layers.2.mlp.2.bias", "molecule_encoder.backbone.gnn_layers.2.edge_embeddings.0.weight", "molecule_encoder.backbone.gnn_layers.2.edge_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.2.bn.weight", "molecule_encoder.backbone.gnn_layers.2.bn.bias", "molecule_encoder.backbone.gnn_layers.2.bn.running_mean", "molecule_encoder.backbone.gnn_layers.2.bn.running_var", "molecule_encoder.backbone.gnn_layers.2.bn.num_batches_tracked", "molecule_encoder.backbone.gnn_layers.3.mlp.0.weight", "molecule_encoder.backbone.gnn_layers.3.mlp.0.bias", "molecule_encoder.backbone.gnn_layers.3.mlp.2.weight", "molecule_encoder.backbone.gnn_layers.3.mlp.2.bias", "molecule_encoder.backbone.gnn_layers.3.edge_embeddings.0.weight", "molecule_encoder.backbone.gnn_layers.3.edge_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.3.bn.weight", "molecule_encoder.backbone.gnn_layers.3.bn.bias", "molecule_encoder.backbone.gnn_layers.3.bn.running_mean", "molecule_encoder.backbone.gnn_layers.3.bn.running_var", "molecule_encoder.backbone.gnn_layers.3.bn.num_batches_tracked", "molecule_encoder.backbone.gnn_layers.4.mlp.0.weight", "molecule_encoder.backbone.gnn_layers.4.mlp.0.bias", "molecule_encoder.backbone.gnn_layers.4.mlp.2.weight", "molecule_encoder.backbone.gnn_layers.4.mlp.2.bias", "molecule_encoder.backbone.gnn_layers.4.edge_embeddings.0.weight", "molecule_encoder.backbone.gnn_layers.4.edge_embeddings.1.weight", "molecule_encoder.backbone.gnn_layers.4.bn.weight", "molecule_encoder.backbone.gnn_layers.4.bn.bias", "molecule_encoder.backbone.gnn_layers.4.bn.running_mean", "molecule_encoder.backbone.gnn_layers.4.bn.running_var", "molecule_encoder.backbone.gnn_layers.4.bn.num_batches_tracked". \n\tsize mismatch for image_encoder.projection_head.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([256, 512]).\n\tsize mismatch for image_encoder.projection_head.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([256]).\n\tsize mismatch for image_head.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([256, 512]).\n\tsize mismatch for image_head.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([256]).')
full_key: model

In [None]:
evaluators = instantiate_evaluator_list(cfg.eval, cross_modal_module=None, logger=None)

In [73]:
instantiate_evaluator_list?

[0;31mSignature:[0m
[0minstantiate_evaluator_list[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mevaluator_list_cfg[0m[0;34m:[0m [0momegaconf[0m[0;34m.[0m[0mdictconfig[0m[0;34m.[0m[0mDictConfig[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcross_modal_module[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mmodules[0m[0;34m.[0m[0mmodule[0m[0;34m.[0m[0mModule[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlogger[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mlightning[0m[0;34m.[0m[0mpytorch[0m[0;34m.[0m[0mloggers[0m[0;34m.[0m[0mlogger[0m[0;34m.[0m[0mLogger[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mckpt_path[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[

In [36]:
logger = utils.instantiate_loggers(cfg.get("logger"))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgabriel-watkinson-work[0m ([33mjump_models[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [83]:
Trainer?

[0;31mInit signature:[0m
[0mTrainer[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maccelerator[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mlightning[0m[0;34m.[0m[0mpytorch[0m[0;34m.[0m[0maccelerators[0m[0;34m.[0m[0maccelerator[0m[0;34m.[0m[0mAccelerator[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstrategy[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mlightning[0m[0;34m.[0m[0mpytorch[0m[0;34m.[0m[0mstrategies[0m[0;34m.[0m[0mstrategy[0m[0;34m.[0m[0mStrategy[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdevices[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mstr[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_nodes[0m[0;34m:[0m [0mint[0m [0;3

In [85]:
trainer._logger_connector.log_metrics?

[0;31mSignature:[0m
[0mtrainer[0m[0;34m.[0m[0m_logger_connector[0m[0;34m.[0m[0mlog_metrics[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mmetrics[0m[0;34m:[0m [0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstep[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses
metrics["step"] as a step.

Args:
    metrics: Metric values
    step: Step for which metrics should be logged. Default value is `self.global_step` during training or
        the total validation / test log step count during validation and testing.
[0;31mFile:[0m      ~/miniconda3/envs/jump_models/lib/python3.10/site-packages/lightning/pytorch/traine

In [88]:
dl

<torch.utils.data.dataloader.DataLoader at 0x7fe0feeed120>

In [81]:
trainer = instantiate(cfg.trainer, logger=logger, name="test")

InstantiationException: Error in call to target 'lightning.pytorch.trainer.trainer.Trainer':
TypeError("Trainer.__init__() got an unexpected keyword argument 'name'")
full_key: trainer

In [56]:
[x for x in trainer.loggers[2].__dir__() if "log" in x]

['_log_model',
 '_logged_model_time',
 'log_hyperparams',
 'log_metrics',
 'log_table',
 'log_text',
 'log_image',
 '_scan_and_log_checkpoints',
 'log_dir',
 'log_graph']

In [59]:
trainer.loggers[2].log_image?

[0;31mSignature:[0m
[0mtrainer[0m[0;34m.[0m[0mloggers[0m[0;34m[[0m[0;36m2[0m[0;34m][0m[0;34m.[0m[0mlog_image[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mkey[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mimages[0m[0;34m:[0m [0mList[0m[0;34m[[0m[0mAny[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstep[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m:[0m [0mAny[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Log images (tensors, numpy arrays, PIL Images or file paths).

Optional kwargs are lists passed to each image (ex: caption, masks, boxes).
[0;31mFile:[0m      ~/miniconda3/envs/jump_models/lib/python3.10/site-packages/lightning/pytorch/loggers/wandb.py
[0;31mType:[0m      method

In [4]:
dotenv.load_dotenv()

True

In [5]:
api = wandb.Api()

run = api.run("gabriel-watkinson-work/fp_small/pgwoh2f5")

KeyError: 'accuracy'

In [None]:
if run.state == "finished":
    for i, row in run.history().iterrows():
        print(row["_timestamp"], row["accuracy"])

In [10]:
list(run.summary.keys())

['gradients/image_encoder.backbone.layer1.0.conv1.weight',
 'gradients/image_encoder.backbone.layer2.1.conv2.weight',
 'gradients/molecule_encoder.backbone.2.1.weight',
 '_wandb',
 'gradients/image_encoder.backbone.layer1.1.bn2.bias',
 'gradients/image_encoder.backbone.layer2.0.downsample.1.weight',
 'jump_moa/image/val/Accuracy_top_3',
 'gradients/image_encoder.backbone.layer2.1.bn1.weight',
 'jump_moa/image/val/F1Score_top_5',
 'gradients/image_encoder.backbone.layer1.0.bn2.weight',
 'gradients/image_encoder.backbone.layer2.1.bn2.bias',
 'gradients/image_encoder.backbone.layer2.1.conv1.weight',
 'gradients/molecule_encoder.backbone.4.0.weight',
 'gradients/image_encoder.backbone.layer1.1.bn1.bias',
 'gradients/image_encoder.backbone.layer2.1.bn2.weight',
 'gradients/image_encoder.backbone.layer2.1.bn1.bias',
 'gradients/image_encoder.backbone.layer3.0.downsample.1.bias',
 'gradients/image_encoder.backbone.bn1.weight',
 'gradients/image_encoder.backbone.layer4.1.conv2.weight',
 'lr-Ad