In [1]:
# Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
PROJECT_DIR = "/vol/bitbucket/ag724/individual_project/cah/personalized-humour-generation/python-backend"
print(f"Using project directory: {PROJECT_DIR}")
import os
os.chdir(PROJECT_DIR)
print(os.getcwd())

Using project directory: /vol/bitbucket/ag724/individual_project/cah/personalized-humour-generation/cah-app
/vol/bitbucket/ag724/individual_project/cah/personalized-humour-generation/cah-app


In [19]:
# Install required packages
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers==4.38.1 datasets accelerate evaluate peft bitsandbytes
!pip install sentencepiece scikit-learn pandas tqdm wandb
!pip install fastapi==0.110.1 uvicorn==0.30.1 pydantic==2.6.3 python-dotenv==1.0.1

Looking in indexes: https://download.pytorch.org/whl/cu118


In [12]:

# Verify GPU availability
import torch
print('GPU available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU device:', torch.cuda.get_device_name(0))

# Set up project directory
import os
PROJECT_DIR = os.getcwd()
print(f"Using project directory: {PROJECT_DIR}")

# Create necessary directories if they don't exist
!mkdir -p data/{raw,processed,debug}

GPU available: True
GPU device: NVIDIA A30
Using project directory: /vol/bitbucket/ag724/individual_project/cah/personalized-humour-generation/cah-app


In [27]:
# Run the data processing script
!python src/download_data.py


Processing CAH data...
Reading JSON file...
Processing cards...
Processing white cards:   0% 0/22441 [00:00<?, ?it/s]Processing white cards: 100% 22441/22441 [00:00<00:00, 2418654.95it/s]
Processing black cards:   0% 0/6308 [00:00<?, ?it/s]Processing black cards: 100% 6308/6308 [00:00<00:00, 1800576.40it/s]
Creating DataFrame...
Splitting data...
Saving processed data...

Processing complete!
Processed 28748 unique cards
Black cards: 6308
White cards: 22440
Train: 24579, Validation: 1294, Test: 2875

Example black cards:
22441          _ ? there is medication for that, you know.
22442               _ is a slippery slope that leads to _.
22443                                       _ It's a trap!
22444    _ really helped my dad through his midlife cri...
22445                 _ will finally put an end to racism.

Example white cards:
0                                                69
1                                               420
2                                              1

In [28]:
import pandas as pd
from pathlib import Path

# Define the data directory
data_dir = Path('data/processed')

# Load the data
train_df = pd.read_parquet(data_dir / 'cah_train.parquet')
valid_df = pd.read_parquet(data_dir / 'cah_valid.parquet')
test_df = pd.read_parquet(data_dir / 'cah_test.parquet')

# Print the sizes of the datasets
print(f"Train set size: {len(train_df)}")
print(f"Validation set size: {len(valid_df)}")
print(f"Test set size: {len(test_df)}")

# Display some examples from the training set
print("\nSample training examples:")
print(train_df.head())


Train set size: 24579
Validation set size: 1294
Test set size: 2875

Sample training examples:
      card_type                                               text pack  pick
25268     black   In a fight to the death against _ I would use _.  all   2.0
20275     white                              The walls of Jericho.  all   NaN
10161     white                   Going to the movies by yourself.  all   NaN
2952      white           a sort of whisker, or rather a moustache  all   NaN
28011     black  What the hell?! They added a 6/6 with flying, ...  all   1.0


In [4]:
!pip install matplotlib seaborn wordcloud nltk

Collecting matplotlib
  Downloading matplotlib-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting wordcloud
  Downloading wordcloud-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.57.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.5/102.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hColl

In [30]:
!python src/analyze_data.py

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
Dataset Statistics:
Total cards: 28748
Black cards: 5393
White cards: 19186

Card Length Statistics:
             count       mean        std  min   25%   50%   75%    max
card_type                                                             
black       5393.0  54.082885  26.683258  2.0  36.0  50.0  68.0  908.0
white      19186.0  29.787293  18.226663  2.0  16.0  26.0  39.0  249.0

Processing black cards...
Processing white cards...

Most Common Words in Black Cards:
[('like', 231), ('new', 204), ('name', 177), ('would', 167), ('get', 165), ('insert', 147), ('time', 140), ('


*   Black cards are significantly longer (mean: 54.1 chars) with more variation. White cards are shorter (mean: 29.8 chars) and more concise.

*   Black cards: Common words are structural ("like", "new"). White cards: More action/descriptive words ("getting", "sex")

*   Both types show a wide range of sentiment.
*   Use separate models for black and white cards due to their different characteristics


In [17]:
!pip install -r requirements.txt

Collecting textblob>=0.17.0 (from -r python-backend/requirements.txt (line 23))
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting detoxify>=0.5.0 (from -r python-backend/requirements.txt (line 25))
  Downloading detoxify-0.5.2-py3-none-any.whl.metadata (13 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m624.3/624.3 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading detoxify-0.5.2-py3-none-any.whl (12 kB)
Installing collected packages: textblob, detoxify
Successfully installed detoxify-0.5.2 textblob-0.19.0


# DETOXIFY

In [48]:
!python src/content_filter.py

/content/drive/MyDrive/cah-app
total 305
drwx------ 2 root root   4096 May  7 17:20 analysis
-rw------- 1 root root  66791 May  7 17:26 CAH.ipynb
drwx------ 5 root root   4096 May  7 15:58 data
-rw------- 1 root root    393 Apr 17 13:20 eslint.config.mjs
-rw------- 1 root root    480 Apr 17 13:20 .gitignore
drwx------ 2 root root   4096 May  7 10:35 .next
-rw------- 1 root root    133 Apr 17 13:20 next.config.ts
-rw------- 1 root root    211 Apr 17 13:20 next-env.d.ts
drwx------ 2 root root   4096 May  7 10:35 node_modules
-rw------- 1 root root    620 Apr 17 14:10 package.json
-rw------- 1 root root 209779 Apr 17 14:14 package-lock.json
-rw------- 1 root root     81 Apr 17 13:20 postcss.config.mjs
drwx------ 2 root root   4096 May  7 10:35 public
drwx------ 5 root root   4096 May  7 16:09 python-backend
-rw------- 1 root root   1450 Apr 17 13:20 README.md
drwx------ 2 root root   4096 May  7 10:35 src
-rw------- 1 root root    602 Apr 17 13:20 tsconfig.json
total 18
-rw------- 1 root 

In [5]:
!python src/compare_datasets.py

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Loading datasets...

=== Basic Statistics Comparison ===

Dataset Sizes:
Original dataset: 24579 cards
Safe dataset: 20139 cards
Filtered out: 4440 cards

Card Type Distribution:
Original dataset:
card_type
white    0.780585
black    0.219415
Name: proportion, dtype: float64

Safe dataset:
card_type
white    0.746661
black    0.253339
Name: proportion, dtype: float64

Length Statistics:

Original dataset:
             count       mean        std  min   25%   50%   75%    max
card_type                                                             
black       5393.0  54.082885  26.683258  2.0  36.0  50.0  68.0  908.0
white      19186.0  29.787293  18.226663  2.0  16.0  26.0  39.0  249.0

Safe dataset:
             count       mean        std  min   25%   50%   75%    max
card

4,440 cards were filtered out


In [6]:
!pip install rouge_score nltk sacrebleu

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting absl-py (from rouge_score)
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Collecting tabulate>=0.8.9 (from sacrebleu)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting lxml (from sacrebleu)
  Downloading lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.5 kB)
Downloading sacrebleu

In [14]:
!python src/model_comparison.py

2025-05-07 19:56:32.897958: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746647792.921742   14002 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746647792.928820   14002 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-07 19:56:32.951829: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:__main__:Loading data...
INFO:__main__:
Training BART...
  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.i

In [7]:
!python src/model_comparison.py

2025-05-08 10:51:14.445336: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746701474.466656    3116 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746701474.473202    3116 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-08 10:51:14.494336: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:__main__:Loading data...
INFO:__main__:
Training BART...
Map: 100% 18125/18125 [00:03<00:00, 5610.81 examples/s]

CUDA available: True
Current device: 0
Device name: NVIDIA A30
Allocated memory (MB): 0.0
Reserved memory (MB): 0.0


In [21]:
!pip install --upgrade "transformers>=4.39.0"

Collecting transformers>=4.39.0
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers>=4.39.0)
  Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m90.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m:01[0m
[?25hDownloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.2
    Uninstalling tokenizers-0.15.2:
      Successfully uninstalled tokenizers-0.15.2
  Attempting uninstall: transformers
    Found existing installation: transformers 4

In [26]:
import torch, gc
torch.cuda.empty_cache()
gc.collect()

1093

In [None]:

# Verify GPU availability
import torch
print('GPU available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU device:', torch.cuda.get_device_name(0))

# Set up project directory
import os
PROJECT_DIR = os.getcwd()
print(f"Using project directory: {PROJECT_DIR}")

# Create necessary directories if they don't exist
!mkdir -p data/{raw,processed,debug}

GPU available: True
GPU device: NVIDIA A30
Using project directory: /homes/ag724


In [32]:
!nvidia-smi

Thu May  8 16:17:14 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A30                     Off |   00000000:01:00.0 Off |                    0 |
| N/A   29C    P0             27W /  165W |       4MiB /  24576MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [10]:
import torch
import gc

# Delete all variables that might hold references
for obj in dir():
    if obj not in ['torch', 'gc'] and not obj.startswith('__'):
        del globals()[obj]

# Force CUDA operations to complete
torch.cuda.synchronize()

# Clear cache and run garbage collection
torch.cuda.empty_cache()
gc.collect()

# Check memory status
print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

GPU memory allocated: 0.00 GB
GPU memory reserved: 0.00 GB


In [9]:
!python src/model_comparison_reduced.py --model t5

INFO:__main__:Loading data...
INFO:__main__:Using 2013 examples for quick model comparison
INFO:__main__:
Quick comparison training for BART...
Map: 100%|█████████████████████████| 1811/1811 [00:00<00:00, 9476.59 examples/s]
Map: 100%|███████████████████████████| 202/202 [00:00<00:00, 8503.45 examples/s]
INFO:__main__:Found checkpoint: models/bart-large-cnn/checkpoint-300
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
	logging_steps: 50 (from args) != 100 (from trainer_state.json)
	per_device_train_batch_size: 16 (from args) != 8 (from trainer_state.json)
  0%|                                                   | 0/113 [00:00<?, ?it/s]There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
{'train_runtime': 0.2914, 'train_samples_per_second': 6214.429, 'train_steps_per_second': 387.758, 'train

In [14]:

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set memory options
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True,garbage_collection_threshold:0.8"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

def clear_gpu_memory():
    """Clear GPU memory cache thoroughly."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
        torch.cuda.synchronize()
        logger.info(f"GPU memory cleared. Current usage: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

def log_memory():
    """Log current GPU memory usage."""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        logger.info(f"GPU memory: allocated={allocated:.2f} GB, reserved={reserved:.2f} GB")

def run_inference(model_name):
    """Run simple inference with the model."""
    logger.info(f"Running inference with {model_name}")
    
    # Make sure memory is cleared before starting
    clear_gpu_memory()
    log_memory()
    
    try:
        # Fixed sample inputs
        sample_text = "Complete this sentence: Cards Against Humanity is"
        
        # Load tokenizer on CPU
        logger.info("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Load model with minimal memory usage
        logger.info("Loading model...")
        
        # Use CPU if GPU is not available or has very little memory
        if not torch.cuda.is_available() or torch.cuda.memory_allocated() / torch.cuda.get_device_properties(0).total_memory > 0.8:
            logger.info("Using CPU for inference")
            device = "cpu"
        else:
            logger.info("Using GPU for inference")
            device = "cuda"
            
        try:
            if device == "cuda":
                # Try loading with FP16 precision
                model = AutoModelForSeq2SeqLM.from_pretrained(
                    model_name,
                    torch_dtype=torch.float16,
                    device_map="auto",
                    low_cpu_mem_usage=True
                )
            else:
                # Load on CPU
                model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
                
            # Log memory after loading
            log_memory()
            
            # Tokenize input
            inputs = tokenizer(sample_text, return_tensors="pt")
            if device == "cuda":
                inputs = {k: v.cuda() for k, v in inputs.items()}
            
            # Run inference
            logger.info("Running generation...")
            with torch.no_grad():
                # Use minimal generation parameters
                output_ids = model.generate(
                    **inputs,
                    max_length=30,
                    num_beams=1,
                    no_repeat_ngram_size=2,
                    early_stopping=True
                )
            
            # Convert to text
            output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
            logger.info(f"Input: {sample_text}")
            logger.info(f"Output: {output_text}")
            
            return {
                "model": model_name,
                "input": sample_text,
                "output": output_text
            }
            
        finally:
            # Clean up
            if 'model' in locals():
                del model
            if 'inputs' in locals():
                del inputs
            if 'output_ids' in locals():
                del output_ids
            clear_gpu_memory()
            
    except Exception as e:
        logger.error(f"Error during inference: {str(e)}")
        return {"model": model_name, "error": str(e)}

def main():
    parser = argparse.ArgumentParser(description="Run minimal inference with a model")
    parser.add_argument("--model", type=str, choices=["t5", "bart"], default="t5",
                      help="Model to use for inference (t5 or bart)")
    args = parser.parse_args()
    
    # Define model mapping
    model_map = {
        "t5": "t5-small",
        "bart": "facebook/bart-base"
    }
    
    # Print GPU info
    if torch.cuda.is_available():
        logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
        logger.info(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        logger.info(f"Initial GPU memory usage: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    else:
        logger.warning("No GPU available, using CPU")
    
    # Run inference
    model_name = model_map[args.model]
    result = run_inference(model_name)
    
    # Print final result
    if "error" in result:
        logger.error(f"Inference failed for {result['model']}: {result['error']}")
    else:
        logger.info(f"Successful inference with {result['model']}:")
        logger.info(f"Input: {result['input']}")
        logger.info(f"Output: {result['output']}")

if __name__ == "__main__":
    # Make sure memory is clean at start
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    main() 

usage: ipykernel_launcher.py [-h] [--model {t5,bart}]
ipykernel_launcher.py: error: unrecognized arguments: -f /homes/ag724/.local/share/jupyter/runtime/kernel-4e9b5370-10f9-41a7-988b-564a91116428.json


SystemExit: 2

In [None]:
import torch
import gc

# Delete all variables that might hold references
for obj in dir():
    if obj not in ['torch', 'gc'] and not obj.startswith('__'):
        del globals()[obj]

# Force CUDA operations to complete
torch.cuda.synchronize()

# Clear cache and run garbage collection
torch.cuda.empty_cache()
gc.collect()

# Check memory status
print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

GPU memory allocated: 0.00 GB
GPU memory reserved: 0.00 GB


In [11]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()
torch.cuda.memory_summary(device=None, abbreviated=False)

KeyError: '_oh'

In [None]:

!python src/model_comparison_reduced.py --model bart

In [2]:
!python src/model_comparison_reduced.py

INFO:__main__:Loading data...
INFO:__main__:Using 2013 examples for quick model comparison
INFO:__main__:
Quick comparison training for BART...
Map: 100%|█████████████████████████| 1811/1811 [00:00<00:00, 8725.49 examples/s]
Map: 100%|███████████████████████████| 202/202 [00:00<00:00, 8353.54 examples/s]
INFO:__main__:Found checkpoint: models/bart-large-cnn/checkpoint-300
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
	logging_steps: 50 (from args) != 100 (from trainer_state.json)
	per_device_train_batch_size: 16 (from args) != 8 (from trainer_state.json)
  0%|                                                   | 0/113 [00:00<?, ?it/s]There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
{'train_runtime': 0.2932, 'train_samples_per_second': 6176.395, 'train_steps_per_second': 385.385, 'train

In [None]:
!python src/model_comparison_reduced.py

In [None]:
import torch
import gc

# Delete all variables that might hold references
for obj in dir():
    if obj not in ['torch', 'gc'] and not obj.startswith('__'):
        del globals()[obj]

# Force CUDA operations to complete
torch.cuda.synchronize()

# Clear cache and run garbage collection
torch.cuda.empty_cache()
gc.collect()

# Check memory status
print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

GPU memory allocated: 0.00 GB
GPU memory reserved: 0.00 GB


In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
    print("Allocated memory (MB):", torch.cuda.memory_allocated() / 1024**2)
    print("Reserved memory (MB):", torch.cuda.memory_reserved() / 1024**2)

CUDA available: True
Current device: 0
Device name: NVIDIA A30
Allocated memory (MB): 0.0
Reserved memory (MB): 0.0
