In [2]:
#!/usr/bin/env python
# coding: utf-8

import os
import torch
import pickle
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from model_network import CLIPPhi2Model, train_model
from dataset import collate_fn, llavadataset
os.environ['HTTP_PROXY'] = 'http://185.46.212.90:80'
os.environ['HTTPS_PROXY'] = 'http://185.46.212.90:80'
# Proxy setup, if necessary
try:
    os.environ['HTTP_PROXY'] = 'http://185.46.212.90:80'
    os.environ['HTTPS_PROXY'] = 'http://185.46.212.90:80'
    os.environ['CUDA_VISIBLE_DEVICES']= '0, 1, 2'
    print("Proxy exported")
except Exception as e:
    print("Could not set proxy:", e)

# Ensure CUDA is available, otherwise fall back to CPU
if torch.cuda.is_available():
    print(f"Using CUDA: {torch.cuda.device_count()} GPUs available")
    device = torch.device('cuda')
else:
    print("CUDA is not available. Using CPU instead.")
    device = torch.device('cpu')

# Load your dataset
with open("coco_dataset_pickle", "rb") as fp:
    coco_unpickle = pickle.load(fp)

# Tokenizer and model setup
clip_model_name = "openai/clip-vit-base-patch32"
phi_model_name = "microsoft/phi-2"
train_batch_size = 4
val_batch_size = 4
tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, use_cache=True)
tokenizer.save_pretrained("saved_tokenizer")

# Model initialization and DataParallel wrapping
MModalGPT = CLIPPhi2Model()
if torch.cuda.is_available():
    MModalGPT = torch.nn.DataParallel(MModalGPT).to(device)

# Data loaders setup
train_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'train', tokenizer),
    collate_fn=collate_fn, batch_size=train_batch_size, num_workers=20, shuffle=True, pin_memory=True)

val_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'val', tokenizer),
    collate_fn=collate_fn, batch_size=val_batch_size, num_workers=20, shuffle=True, pin_memory=True)

# Optimizer setup
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, MModalGPT.parameters()), lr=1e-6)

# Set float32_matmul_precision to 'medium'
torch.set_float32_matmul_precision('medium')

# Train the model
train_model(MModalGPT, train_dataloader, val_dataloader, optimizer, device, max_steps=100000, model_save_step=1000, model_val_step=1000, log_step=100, max_token_filter=35, tokenizer=tokenizer)


Proxy exported
Using CUDA: 5 GPUs available


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

    There is an imbalance between your GPUs. You may want to exclude GPU 4 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


Train size 532577 and validation size 59176
Train size 532577 and validation size 59176
Training started.




Step 0/100000: Avg Running Loss = 10.518048286437988
Step 100/100000: Avg Running Loss = 8.054579963684082
Step 200/100000: Avg Running Loss = 7.862500190734863
Batch skipped as captions too long.
Step 300/100000: Avg Running Loss = 7.869241786003113
Step 400/100000: Avg Running Loss = 7.66180025100708
Step 500/100000: Avg Running Loss = 7.390713806152344
Step 600/100000: Avg Running Loss = 7.151144347190857
Step 700/100000: Avg Running Loss = 7.149825778007507
Step 800/100000: Avg Running Loss = 7.110813059806824


In [None]:
### this is for running in local ###
import os
try:
    os.environ['HTTP_PROXY']='http://185.46.212.90:80'
    os.environ['HTTPS_PROXY']='http://185.46.212.90:80'
    print ("proxy_exported")
except:
    None

In [None]:
import torch
from step1_network import CLIPPhi2Model, train_model
from step1_dataset import collate_fn, llavadataset
from torch.utils.data import random_split, DataLoader
import torch.nn as nn
from transformers import AutoTokenizer
import pickle
import os

In [None]:
# Check for GPU availability and fallback to CPU if not available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
with open("coco_dataset_pickle", "rb") as fp:   # Unpickling
    coco_unpickle = pickle.load(fp)

In [None]:
coco_unpickle

In [None]:
clip_model_name  = "openai/clip-vit-base-patch32"
phi_model_name   = "microsoft/phi-2"
train_batch_size = 2 #2
val_batch_size   = 4 #4
tokenizer  = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, use_cache=True)

In [None]:
tokenizer.save_pretrained("saved_tokenizer")

In [None]:
print (device)

In [None]:
# model
MModalGPT        = CLIPPhi2Model().to(device)
max_steps        = 100 #100000
model_save_step  = 10 #1000
model_val_step   = 2 #1000
log_step         = 2 #1000
max_token_filter = 35 #35 # memory management restriction

In [None]:
# data loaders
train_dataloader = DataLoader(llavadataset(coco_unpickle[0:100], phi_model_name,clip_model_name,'train',tokenizer),
                  collate_fn=collate_fn, batch_size=train_batch_size, num_workers = 2, shuffle=True, pin_memory=True)
val_dataloader   = DataLoader(llavadataset(coco_unpickle[0:100], phi_model_name,clip_model_name,'val',tokenizer),
                  collate_fn=collate_fn, batch_size=val_batch_size, num_workers = 2, shuffle=True, pin_memory=True)



In [None]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, MModalGPT.parameters()), lr=1e-5)

In [None]:
torch.set_float32_matmul_precision('medium')
train_model(MModalGPT, train_dataloader, val_dataloader, optimizer, device, max_steps,model_save_step,model_val_step,log_step,max_token_filter,tokenizer)

In [None]:
#!/usr/bin/env python
# coding: utf-8

import os
import torch
import pickle
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from model_network import CLIPPhi2Model, train_model
from dataset import collate_fn, llavadataset
os.environ['HTTP_PROXY'] = 'http://185.46.212.90:80'
os.environ['HTTPS_PROXY'] = 'http://185.46.212.90:80'
# Proxy setup, if necessary
try:
    os.environ['HTTP_PROXY'] = 'http://185.46.212.90:80'
    os.environ['HTTPS_PROXY'] = 'http://185.46.212.90:80'
    os.environ['CUDA_VISIBLE_DEVICES']= '0, 1, 2, 4'
    print("Proxy exported")
except Exception as e:
    print("Could not set proxy:", e)

# Ensure CUDA is available, otherwise fall back to CPU
if torch.cuda.is_available():
    print(f"Using CUDA: {torch.cuda.device_count()} GPUs available")
    device = torch.device('cuda')
else:
    print("CUDA is not available. Using CPU instead.")
    device = torch.device('cpu')

# Load your dataset
with open("coco_dataset_pickle", "rb") as fp:
    coco_unpickle = pickle.load(fp)

# Tokenizer and model setup
clip_model_name = "openai/clip-vit-base-patch32"
phi_model_name = "microsoft/phi-2"
train_batch_size = 4
val_batch_size = 4
tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, use_cache=True)
tokenizer.save_pretrained("saved_tokenizer")

# Model initialization and DataParallel wrapping
MModalGPT = CLIPPhi2Model()
if torch.cuda.is_available():
    MModalGPT = torch.nn.DataParallel(MModalGPT).to(device)

# Data loaders setup
train_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'train', tokenizer),
    collate_fn=collate_fn, batch_size=train_batch_size, num_workers=20, shuffle=True, pin_memory=True)

val_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'val', tokenizer),
    collate_fn=collate_fn, batch_size=val_batch_size, num_workers=20, shuffle=True, pin_memory=True)

# Optimizer setup
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, MModalGPT.parameters()), lr=1e-5)

# Set float32_matmul_precision to 'medium'
torch.set_float32_matmul_precision('medium')

# Train the model
train_model(MModalGPT, train_dataloader, val_dataloader, optimizer, device, max_steps=100000, model_save_step=500, model_val_step=500, log_step=100, max_token_filter=35, tokenizer=tokenizer)
