In [1]:
%reload_ext autoreload
%autoreload 2

In [1]:
import os
import random

import accelerate
import torch
import transformers

from src._shared import (
    apply_lora_to_model,
    freeze_base_models,
    load_clip_model,
    load_config,
    load_tokenizers,
    prepare_dataset,
    save_model_and_logs,
    setup_environment,
    setup_trainer,
    train_model,
)

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

train_config = load_config()
model_name_identifier, device, report_to, run, USE_WANDB, SEED = setup_environment(train_config)

accelerate.utils.set_seed(SEED + 1)
transformers.set_seed(SEED + 2)
torch.manual_seed(SEED + 3)
random.seed(SEED + 4)

Matplotlib created a temporary cache directory at /tmp/matplotlib-uiutyvql because the default path (/home/lfi/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Using device: cuda
Model identifier: protT5-CLIP-2025-01-18-14-32-00


In [2]:
tokenizer_plm, tokenizer_llm = load_tokenizers(train_config)
dataset = prepare_dataset(train_config, tokenizer_plm, tokenizer_llm)

Loading dataset from disk...


In [3]:
model = load_clip_model(train_config, device)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loaded model...
All model parameters are on CUDA


In [4]:
print(model.logit_scale.scale)
print(model.protein_projection.weight)
print(model.text_projection.weight)

Parameter containing:
tensor(2.6592, device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-0.0109, -0.0086, -0.0193,  ..., -0.0178,  0.0290, -0.0130],
        [-0.0093, -0.0273,  0.0015,  ...,  0.0072, -0.0094, -0.0289],
        [-0.0274, -0.0223, -0.0266,  ...,  0.0258, -0.0009, -0.0072],
        ...,
        [ 0.0085,  0.0016, -0.0294,  ...,  0.0262, -0.0267,  0.0040],
        [-0.0139, -0.0153,  0.0166,  ...,  0.0137, -0.0211, -0.0101],
        [ 0.0160,  0.0302,  0.0211,  ...,  0.0212,  0.0074, -0.0106]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.0009,  0.0171,  0.0047,  ...,  0.0172,  0.0151,  0.0067],
        [-0.0015, -0.0032, -0.0173,  ..., -0.0150, -0.0143, -0.0141],
        [-0.0013, -0.0166, -0.0012,  ...,  0.0174,  0.0041, -0.0122],
        ...,
        [ 0.0145,  0.0071, -0.0127,  ...,  0.0121, -0.0071, -0.0050],
        [ 0.0110,  0.0089, -0.0134,  ...,  0.0075, -0.0094, -0.0028],
        [ 0.0069,  0.0146,  0.0052,  ...,

In [5]:
model.load_projections_from_safetensors('../tmp/models/protT5-CLIP-2025-01-17-21-47-10-0/')

In [6]:
print(model.logit_scale.scale)
print(model.protein_projection.weight)
print(model.text_projection.weight)

Parameter containing:
tensor(2.6643, device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.0074, -0.0227, -0.0058,  ..., -0.0573,  0.0572,  0.0064],
        [ 0.0138, -0.0352, -0.0175,  ..., -0.0012, -0.0031, -0.0709],
        [ 0.0109,  0.0272, -0.0328,  ...,  0.0176, -0.0004,  0.0158],
        ...,
        [ 0.0056, -0.0115,  0.0165,  ..., -0.0119, -0.0191,  0.0207],
        [ 0.0204,  0.0111,  0.0314,  ..., -0.0556, -0.0031,  0.0595],
        [ 0.0227,  0.0077,  0.0275,  ...,  0.0031, -0.0162, -0.0133]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-3.4211e-02,  2.6319e-03,  3.5704e-02,  ..., -5.5075e-02,
          2.7038e-02,  2.0645e-02],
        [-4.1908e-02, -2.2629e-02, -5.6813e-03,  ..., -4.8774e-02,
         -4.0731e-02, -2.6518e-03],
        [ 3.9049e-02, -2.5323e-02,  4.3649e-02,  ..., -2.4687e-02,
         -2.8779e-02, -3.0384e-03],
        ...,
        [ 1.2926e-01,  2.1493e-02,  2.8456e-03,  ..., -5.8982e-03,
          1.910