# Some preparation

In [1]:
%cd /workspace/ToxVidLM_ACL_2024

/workspace/ToxVidLM_ACL_2024


In [2]:
!pwd

/workspace/ToxVidLM_ACL_2024


In [None]:
!pip install -r requirements.txt

In [None]:
!pip install peft==0.6.2

In [None]:
!pip install ipywidgets

In [1]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"PyTorch version: {torch.__version__}")

CUDA available: True
CUDA version: 10.2
PyTorch version: 1.12.1+cu102


In [6]:
# Add this cell before the train cell
!pip uninstall torch torchvision torchaudio -y
!pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102

# Verify installation
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"PyTorch version: {torch.__version__}")

Found existing installation: torch 2.0.0
Uninstalling torch-2.0.0:
  Successfully uninstalled torch-2.0.0
Found existing installation: torchvision 0.15.1
Uninstalling torchvision-0.15.1:
  Successfully uninstalled torchvision-0.15.1
Found existing installation: torchaudio 2.0.1
Uninstalling torchaudio-2.0.1:
  Successfully uninstalled torchaudio-2.0.1
[0mLooking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu102
Collecting torch==1.12.1+cu102
  Using cached https://download.pytorch.org/whl/cu102/torch-1.12.1%2Bcu102-cp38-cp38-linux_x86_64.whl (776.3 MB)
Collecting torchvision==0.13.1+cu102
  Using cached https://download.pytorch.org/whl/cu102/torchvision-0.13.1%2Bcu102-cp38-cp38-linux_x86_64.whl (19.1 MB)
Collecting torchaudio==0.12.1
  Using cached https://download.pytorch.org/whl/cu102/torchaudio-0.12.1%2Bcu102-cp38-cp38-linux_x86_64.whl (3.7 MB)
Installing collected packages: torch, torchvision, torchaudio
[31mERROR: pip's dependency resolver does not curr

# Download Dataset

In [3]:
%cd /workspace/

/workspace


In [4]:
!mkdir downloaded_data

In [None]:
# Install gdown first
!pip install gdown

# For a public folder, you can try:
import gdown
import os

# Create download directory
os.makedirs("downloaded_data", exist_ok=True)

# Download the entire folder (this works for some public folders)
folder_id = "1lAl6KpewLv9bO64Ad5fccBOImSZgRPPP"
gdown.download_folder(f"https://drive.google.com/drive/folders/{folder_id}", output="downloaded_data/", quiet=False, use_cookies=False)

In [11]:
import os
import zipfile
download_dir = "downloaded_data"
# List all files in the download directory
for filename in os.listdir(download_dir):
    if filename.endswith(".zip"):
        zip_path = os.path.join(download_dir, filename)
        print(f"Extracting {zip_path}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(download_dir)      
print("Extraction complete.")

Extracting downloaded_data/classification_final_data-20250630T215505Z-1-003.zip...


BadZipFile: File is not a zip file

In [None]:
!mv downloaded_data/classification_final_data final_data

In [11]:
import shutil
import os

source_folder = "final_data/classification_final_data"
destination_folder = "final_data"

# Check if source folder exists
if os.path.exists(source_folder):
    # List all files in the source folder
    files = os.listdir(source_folder)
    
    # Move each file to the destination folder
    for file in files:
        source_path = os.path.join(source_folder, file)
        destination_path = os.path.join(destination_folder, file)
        shutil.move(source_path, destination_path)
    
    print("Files moved successfully.")
else:
    print("Source folder does not exist.")

Files moved successfully.


# Train

In [4]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Fri_Feb_21_20:23:50_PST_2025
Cuda compilation tools, release 12.8, V12.8.93
Build cuda_12.8.r12.8/compiler.35583870_0


In [19]:
# Set CUDA memory allocation configuration to reduce fragmentation
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256'

In [None]:
!python train.py


In [4]:
!python test.py

python: can't open file 'test.py': [Errno 2] No such file or directory


# For debug

In [3]:
import torch
import gc

# Clear GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()
    print(f"GPU memory cleared. Available: {torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)} bytes")

GPU memory cleared. Available: 34072559616 bytes


In [1]:
!git status

On branch main
Your branch is ahead of 'origin/main' by 4 commits.
  (use "git push" to publish your local commits)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   Untitled-1.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [5]:
import torch
import torch.nn as nn
from tokenizers import AddedToken
from transformers import CLIPModel, VideoMAEModel, Wav2Vec2Model, VideoMAEConfig, CLIPConfig, Wav2Vec2Config, XLMRobertaConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModelForSeq2SeqLM
from model.additional_modules import LSTM_fc, FC_head, Gate_Attention
from argparse import Namespace 
from model.model import Multimodal_LLM
from data.dataset import CustomDataset
from iteration import train_model, train_one_epoch, validate
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from copy import deepcopy
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, AlbertTokenizer, XLMRobertaTokenizerFast, PreTrainedTokenizerFast #only for gpt2 and assign values
from transformers import GPT2Model, BertModel, AlbertModel, XLMRobertaModel
import pickle
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
import os

tasks_bool = {"offensive" : True, "offensive_level": True, "sentiment" : True}
tasks = []
name = "gpt2_vidmae_whisper_"

for k, v in tasks_bool.items():
    if tasks_bool[k]:
        tasks.append(k)
        name += k + "_"
        
config = Namespace(
    file_name=name + "0",
    device=torch.device("cuda:1"),
    tokenizer_path="ckpts",
    tasks = tasks,
    offensive_bool = tasks_bool["offensive"],
    offensive_level_bool = tasks_bool["offensive_level"],
    sentiment_bool = tasks_bool["sentiment"],
    video_encoder="MCG-NJU/videomae-base",
    audio_encoder="openai/whisper-small",
    lstm_or_conv = False,
    image_conv_kernel=23,
    image_conv_stride=3,
    image_conv_padding=8,
    video_conv_kernel=36,
    video_conv_stride=24,
    video_conv_padding=0,
    audio_conv_kernel=50,
    audio_conv_stride=23,
    audio_conv_padding=1,
    llm_embed_dim=768,
    llm_output_dim=768,
    attn_dropout=0.1,
    is_add_bias_kv=True,
    is_add_zero_attn=True,
    attention_heads=8,
    image_dim=768,
    video_dim=768,
    audio_dim=768,
    image_seq_len=197,
    video_seq_len=1568,
    audio_seq_len=1500,
    min_mm_seq_len=64,
    lstm_num_layers=1,
    tokenizer_max_len=128,
    add_pooling = False,
    train=True,
    directory = "checkpoints/",
    results_directory = "results/"
)


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [6]:
json_save_path = config.results_directory + config.file_name + ".json"

In [8]:
import json
history={"a":"b"}
with open(json_save_path, 'w') as json_file:
    json.dump(history, json_file)

In [None]:
train_model(model, train_dataloader, val_dataloader, config, num_epochs, "offensive", "f1", devices=None)

def train_model(model, train_dataloader, val_dataloader, config, num_epochs, track_task, track_metric, devices=None):
    
    model = model.to(config.device)
    history = {"train_validation": []}
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
    
    best_val_metric = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1} out of {num_epochs}")

        # Training
        train_loss = train_one_epoch(model, train_dataloader, optimizer, config)
        
        print(train_loss)

        # Validation
        val_metrics, _, _ = validate(model, val_dataloader, config)
        
        # print(val_metrics)

        # Save metrics to history
        epoch_data = {
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_metrics": val_metrics
        }
        
        history["train_validation"].append(epoch_data)
        
        json_save_path = config.results_directory + config.file_name + ".json"

        # Save to JSON file
        with open(json_save_path, 'w') as json_file:
            json.dump(history, json_file)
        
        
        if val_metrics[track_task][track_metric] > best_val_metric:
            best_val_metric = val_metrics[track_task][track_metric]
            # torch.save(model.state_dict(), config.directory + config.file_name + ".pth")

    
    print("Training finished!")


# Old ones

In [9]:
import shutil

source_folder = "final_data/classification_final_data"
destination_folder = "final_data"

# List all files in the source folder
files = os.listdir(source_folder)

# Move each file to the destination folder
for file in files:
    source_path = os.path.join(source_folder, file)
    destination_path = os.path.join(destination_folder, file)
    shutil.move(source_path, destination_path)

print("Files moved successfully.")

Files moved successfully.


In [22]:

for k, v in tasks_bool.items():
    if tasks_bool[k]:
        tasks.append(k)
        name += k + "_"
        
config = Namespace(
    file_name=name + "0",
    device=torch.device("cuda:1"),
    tokenizer_path="ckpts",
    tasks = tasks,
    offensive_bool = tasks_bool["offensive"],
    offensive_level_bool = tasks_bool["offensive_level"],
    sentiment_bool = tasks_bool["sentiment"],
    video_encoder="MCG-NJU/videomae-base",
    audio_encoder="openai/whisper-small",
    lstm_or_conv = False,
    image_conv_kernel=23,
    image_conv_stride=3,
    image_conv_padding=8,
    video_conv_kernel=36,
    video_conv_stride=24,
    video_conv_padding=0,
    audio_conv_kernel=50,
    audio_conv_stride=23,
    audio_conv_padding=1,
    llm_embed_dim=768,
    llm_output_dim=768,
    attn_dropout=0.1,
    is_add_bias_kv=True,
    is_add_zero_attn=True,
    attention_heads=8,
    image_dim=768,
    video_dim=768,
    audio_dim=768,
    image_seq_len=197,
    video_seq_len=1568,
    audio_seq_len=1500,
    min_mm_seq_len=64,
    lstm_num_layers=1,
    tokenizer_max_len=128,
    add_pooling = False,
    train=True,
    directory = "checkpoints/",
    results_directory = "results/"
)

df = pd.read_csv("final_data/final_processed_data_one_hot.csv")
df_train_val, df_test = train_test_split(df, test_size=0.1, random_state=28703)
df_train, df_val = train_test_split(df_train_val, test_size=0.1, random_state=28703)

num_epochs = 30
patience = 10
batch_size = 2


In [23]:
#for roberta
tokenizer = XLMRobertaTokenizerFast.from_pretrained("l3cube-pune/hing-roberta")
model = XLMRobertaModel.from_pretrained("l3cube-pune/hing-roberta", torch_dtype=torch.float32)

#for gpt2
# tokenizer = PreTrainedTokenizerFast.from_pretrained('l3cube-pune/hing-gpt')
# model = GPT2Model.from_pretrained('l3cube-pune/hing-gpt', torch_dtype=torch.float32)
# tokenizer.bos_token_id = 1
# tokenizer.eos_token_id = 2

Some weights of XLMRobertaModel were not initialized from the model checkpoint at l3cube-pune/hing-roberta and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
model = Multimodal_LLM(batch_size=batch_size, config=config, tokenizer=tokenizer, adapter_llm=model)

train_ds = CustomDataset(dataframe=df_train, train=True, tokenizer=tokenizer)
val_ds = CustomDataset(df_val, train=True, tokenizer=tokenizer)
test_ds = CustomDataset(df_test, train=False, tokenizer=tokenizer)

train_dataloader = DataLoader(train_ds, batch_size=batch_size, num_workers=16, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=batch_size, num_workers=16)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, num_workers=16)



NameError: name 'Multimodal_LLM' is not defined

In [17]:
from PIL import Image

In [18]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
train_one_epoch(model, train_dataloader, optimizer, config, devices=None)

  0%|          | 0/1628 [00:00<?, ?it/s]

  0%|          | 0/1628 [00:11<?, ?it/s]


KeyError: 'image'

In [14]:
train_model(model, train_dataloader, val_dataloader, config, num_epochs, "offensive", "f1", devices=None)

Epoch 1 out of 30


  0%|          | 0/1628 [00:11<?, ?it/s]


KeyError: 'image'