In [2]:
from mb.gpt.models.connecter_type1 import BasicLLM
from mb.gpt.models.vlm_encoder_clip import VlmEncoderTest
from mb.gpt.models.text_encoder_smalvlm import TextEncoderTest

In [3]:
vlm_encoder = VlmEncoderTest(128,128)
text_encoder = TextEncoderTest(256,64)
model = BasicLLM(vlm_emb_dim=128, text_emb_dim=64, embedding_dim=256)
model

BasicLLM(embedding_dim=256)

In [4]:
for module in model.modules():
    print(module)

BasicLLM(embedding_dim=256)
Linear(in_features=192, out_features=256, bias=True)


In [4]:
import torch

data1 = torch.randn(10, 128)  # Simulated VLM embedding
data2 = torch.randn(10, 256)   # Simulated text embedding


In [5]:
data1[0,:10]

tensor([ 0.2105,  1.0456,  2.1830,  0.0887, -0.7237,  0.3205, -1.1705, -1.0274,
         0.2224,  0.1359])

In [6]:
vlm_data = vlm_encoder(data1)
text_data = text_encoder(data2)

In [7]:
vlm_encoder.state_dict()['linear.weight']

tensor([[ 0.0598, -0.0019,  0.0545,  ...,  0.0385,  0.0752,  0.0125],
        [-0.0160, -0.0144, -0.0794,  ...,  0.0388, -0.0173,  0.0681],
        [ 0.0487, -0.0142, -0.0423,  ..., -0.0376,  0.0085,  0.0322],
        ...,
        [ 0.0200,  0.0174, -0.0591,  ...,  0.0568, -0.0788, -0.0445],
        [ 0.0400,  0.0727, -0.0876,  ...,  0.0308, -0.0400,  0.0083],
        [ 0.0544, -0.0225, -0.0853,  ...,  0.0513,  0.0734, -0.0573]])

In [8]:
optimizer = torch.optim.Adam(
    list(vlm_encoder.parameters()) + list(text_encoder.parameters()) + list(model.parameters()),
    lr=1e-3
)
optimizer.zero_grad()
output = model(vlm_data, text_data,None)
loss = output.sum()  # Dummy loss
loss.backward()
optimizer.step()

In [9]:
vlm_encoder.state_dict()['linear.weight']

tensor([[ 0.0608, -0.0029,  0.0555,  ...,  0.0395,  0.0762,  0.0135],
        [-0.0170, -0.0134, -0.0804,  ...,  0.0378, -0.0183,  0.0671],
        [ 0.0497, -0.0152, -0.0413,  ..., -0.0366,  0.0095,  0.0332],
        ...,
        [ 0.0210,  0.0164, -0.0581,  ...,  0.0578, -0.0778, -0.0435],
        [ 0.0410,  0.0717, -0.0866,  ...,  0.0318, -0.0390,  0.0093],
        [ 0.0534, -0.0215, -0.0863,  ...,  0.0503,  0.0724, -0.0583]])

In [29]:
import torch
from torch import nn
from typing import List, Optional

class BasicLLM(nn.Module):
    def __init__(self, 
                 vlm_encoder: nn.Module,
                 text_encoder: nn.Module,
                 vlm_emb_dim: Optional[int] = None,
                 text_emb_dim: Optional[int] = None,
                 output_classes: int = 10):
        super().__init__()

        self.vlm_encoder = vlm_encoder if vlm_encoder is not None else nn.Identity()
        self.text_encoder = text_encoder if text_encoder is not None else nn.Identity()

        if vlm_emb_dim is not None and text_emb_dim is not None:
            self.linear1 = nn.Linear(vlm_emb_dim + text_emb_dim, output_classes)
        else:
            self.linear1 = nn.LazyLinear(output_classes)

    def forward(self, 
                vlm_emb: torch.Tensor, 
                text_emb: torch.Tensor) -> torch.Tensor:
        
        vlm_emb = self.vlm_encoder(vlm_emb)
        text_emb = self.text_encoder(text_emb)

        combined_emb = torch.cat([vlm_emb, text_emb], dim=-1)

        projected_emb = self.linear1(combined_emb)

        if projected_emb.dim() == 2:
            projected_emb = projected_emb.unsqueeze(1)

        return projected_emb

In [30]:
import torch
import torch.nn as nn

class VlmEncoderTest(nn.Module):
    def __init__(self,in_dim=512,out_dim=128):
        super(VlmEncoderTest, self).__init__()
        self.linear = nn.Linear(in_dim, out_dim)
        
    def forward(self, x):
            return self.linear(x)
        
class TextEncoderTest(nn.Module):
    def __init__(self,in_dim=512,out_dim=128):
        super(TextEncoderTest, self).__init__()
        self.linear = nn.Linear(in_dim, out_dim)
        
    def forward(self, x):
        return self.linear(x)
    
    

In [31]:
vlm_encoder = VlmEncoderTest(in_dim=128, out_dim=128)
text_encoder = TextEncoderTest(in_dim=256, out_dim=64)
model = BasicLLM(
    vlm_encoder=vlm_encoder,
    text_encoder=text_encoder,
    # vlm_emb_dim=128,
    # text_emb_dim=64,
    output_classes=10,
 )
model

BasicLLM(
  (vlm_encoder): VlmEncoderTest(
    (linear): Linear(in_features=128, out_features=128, bias=True)
  )
  (text_encoder): TextEncoderTest(
    (linear): Linear(in_features=256, out_features=64, bias=True)
  )
  (linear1): LazyLinear(in_features=0, out_features=10, bias=True)
)

In [32]:
import torch

In [33]:
def can_fit_in_single_gpu(
    model,
    batch_size=10,
    vlm_input_dim=128,
    text_input_dim=256,
    dtype=torch.float32,
    device='cuda:0',
):
    if not torch.cuda.is_available():
        print('CUDA is not available on this machine.')
        return False

    model = model.to(device).eval()
    vlm_x = torch.randn(batch_size, vlm_input_dim, dtype=dtype, device=device)
    text_x = torch.randn(batch_size, text_input_dim, dtype=dtype, device=device)

    try:
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats(device=device)
        with torch.no_grad():
            _ = model(vlm_x, text_x)
        peak_mb = torch.cuda.max_memory_allocated(device=device) / (1024 ** 2)
        print(f'Fits on {device}. Peak allocated: {peak_mb:.2f} MB')
        return True
    except RuntimeError as e:
        if 'out of memory' in str(e).lower():
            print(f'OOM on {device}: {e}')
            return False
        raise

In [34]:
can_fit_in_single_gpu(
    model,
    batch_size=10,
    vlm_input_dim=128,
    text_input_dim=256,
    device='cuda:0',
)

Fits on cuda:0. Peak allocated: 9.30 MB


True

In [36]:
def can_fit_training_step(
    model,
    batch_size=10,
    vlm_input_dim=128,
    text_input_dim=256,
    dtype=torch.float32,
    device='cuda:0',
    lr=1e-3,
):
    if not torch.cuda.is_available():
        print('CUDA is not available on this machine.')
        return False

    model = model.to(device).train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    vlm_x = torch.randn(batch_size, vlm_input_dim, dtype=dtype, device=device)
    text_x = torch.randn(batch_size, text_input_dim, dtype=dtype, device=device)

    try:
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats(device=device)
        optimizer.zero_grad(set_to_none=True)
        out = model(vlm_x, text_x)
        loss = out.mean()
        loss.backward()
        optimizer.step()
        peak_mb = torch.cuda.max_memory_allocated(device=device) / (1024 ** 2)
        print(f'Training step fits on {device}. Peak allocated: {peak_mb:.2f} MB')
        return True
    except RuntimeError as e:
        if 'out of memory' in str(e).lower():
            print(f'OOM on {device}: {e}')
            return False
        raise

In [37]:
can_fit_training_step(model,
    batch_size=10,
    vlm_input_dim=128,
    text_input_dim=256,
    device='cuda:0',
)

Training step fits on cuda:0. Peak allocated: 17.93 MB


True

In [None]:
from mb.gpt.utils.gpu_tools import get_gpus_by_least_usage

ModuleNotFoundError: No module named 'mb.gpt.utils.gpu_tools'

In [46]:
get_gpus_by_least_usage(return_stats=True)

[{'gpu_id': 0,
  'name': 'NVIDIA TITAN RTX',
  'free_gb': 22.35418701171875,
  'used_gb': 1.26904296875,
  'total_gb': 23.62322998046875,
  'usage_ratio': 0.05372012928796025}]