# Setup
Initial Setup and Versioning

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
from datetime import datetime
import json

In [4]:
# Version info
MODEL_VERSION = "llama2-7b-hf_v1"
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M")

# Create versioned output directories
base_output_dir = "/content/drive/MyDrive/outputs/llama_fine_tuning"
model_output_dir = f"{base_output_dir}/{MODEL_VERSION}/model_{TIMESTAMP}"
profiles_output_dir = f"{base_output_dir}/{MODEL_VERSION}/profiles_{TIMESTAMP}"
logs_dir = f"{base_output_dir}/{MODEL_VERSION}/logs_{TIMESTAMP}"

# Create directories if they don't exist
for dir_path in [model_output_dir, profiles_output_dir, logs_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Save version info
version_info = {
    "model_version": MODEL_VERSION,
    "timestamp": TIMESTAMP,
    "base_model": "Llama-2-7b-hf",
    "training_data": "original_dataset",
    "description": "Initial fine-tuning run with original dataset"
}

with open(f"{logs_dir}/version_info.json", 'w') as f:
    json.dump(version_info, f, indent=2)

print(f"Initialized version {MODEL_VERSION} at {TIMESTAMP}")
print(f"Model outputs will be saved to: {model_output_dir}")
print(f"Generated profiles will be saved to: {profiles_output_dir}")

Initialized version llama2-7b-hf_v1 at 20241118_1316
Model outputs will be saved to: /content/drive/MyDrive/outputs/llama_fine_tuning/llama2-7b-hf_v1/model_20241118_1316
Generated profiles will be saved to: /content/drive/MyDrive/outputs/llama_fine_tuning/llama2-7b-hf_v1/profiles_20241118_1316


# Data Loading and Preprocessing

- 8-bit quantization
- LoRA for efficient fine-tuning
- Gradient checkpointing

In [5]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [6]:
import logging
from datasets import Dataset
from transformers import AutoTokenizer

In [7]:
# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f"{logs_dir}/training.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

logger.info("Starting data loading and preprocessing")

def load_dataset():
    # Load the JSON data
    with open("/content/drive/MyDrive/data/processed/llama_training/processed_original_dataset.json", 'r') as f:
        data = json.load(f)
    logger.info(f"Loaded {len(data)} examples from dataset")
    return data

def prepare_dataset(data):
    # Create a HuggingFace dataset
    dataset = Dataset.from_dict({
        "text": [item["text"] for item in data]
    })
    logger.info("Created HuggingFace dataset")
    return dataset

# Load and prepare the data
raw_data = load_dataset()
dataset = prepare_dataset(raw_data)

# Split into train and validation sets
train_val_dataset = dataset.train_test_split(test_size=0.1, seed=42)
logger.info(f"Training set size: {len(train_val_dataset['train'])}")
logger.info(f"Validation set size: {len(train_val_dataset['test'])}")

# Initialize tokenizer
logger.info("Initializing tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    token="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",
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Print a sample to verify data format
print("\nSample from dataset:")
print(dataset[0]['text'][:500] + "...")  # Print first 500 chars of first example

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


Sample from dataset:
### Instruction:
Generate a detailed sperm donor profile based on these characteristics:
Height: 5'10 (178cm)
Weight: 162 lbs (73kg)
Eye Color: Black
Hair: Dark Brown
Education: Master in Architecture
Ethnic Background: East Indian

### Response:
Quadrilingual Architect. loves his career as an architect. He’s had a lifelong creative streak and drawing has always been a favorite pastime, from doodling to designing buildings! This smart cookie has an M.S. in architecture (3.7 GPA) and can even spe...


# Model Initiation

In [8]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.44.1


In [9]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [10]:
logger.info("Starting model initialization...")

In [11]:
# Memory optimization settings
logger.info("Setting up memory optimizations...")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

# Configure quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=True
)

In [12]:
# Print available GPU memory before model loading
if torch.cuda.is_available():
    logger.info(f"Available GPU memory before model loading: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


In [13]:
try:
    # Initialize model with memory optimizations
    logger.info("Loading model...")
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-hf",
        token="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",
        quantization_config=quantization_config,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )

    logger.info("Model loaded successfully")

    # Configure LoRA
    logger.info("Configuring LoRA...")
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    # Prepare model for training
    logger.info("Preparing model for training...")
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)

    # Print trainable parameters
    logger.info("Model preparation complete. Printing trainable parameters:")
    model.print_trainable_parameters()

    if torch.cuda.is_available():
        logger.info(f"Available GPU memory after model loading: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

except Exception as e:
    logger.error(f"Error during model initialization: {str(e)}")
    raise

print("\nModel initialization complete!")

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

ERROR:__main__:Error during model initialization: Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`


ImportError: Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

# Switching over to simple generation
- Could not resolve version conflicts

In [19]:
# NEW CELL - SIMPLE GENERATION
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

print("Loading model and tokenizer...")
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    token="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",
    device_map="auto"
)

# Your exact prompt
prompt = """Based on these donor profiles, generate 1 new donor profiles. For the profile, provide:
Profile #[number]:
* Height: [ft'in" (cm)]
* Weight: [lb (kg)]
* Eye Color: [color]
* Hair: [color/texture]
* Skin Tone: [tone]
* Education: [level and field]
* Ethnic Background: [ethnicity]
* Occupation: [job]
* Interests/Hobbies: [categories]
[Detailed description paragraph that captures personality, aspirations, physical features, and character traits in a marketing style similar to the samples provided]
"""

print("Generating profiles...")
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=2048)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\nGenerated profiles:")
print(generated_text)

Loading model and tokenizer...


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.2-1B.
403 Client Error. (Request ID: Root=1-673b5eb5-7a4b906c777b4ca45787a657;4314392a-46e2-43bd-bdfd-6945f87e95cc)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-3.2-1B to ask for access.

In [20]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")


True
Tesla T4


In [21]:
# Clear CUDA memory
import torch
import gc

def clear_gpu_memory():
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        torch.cuda.synchronize()

print("Clearing GPU memory...")
clear_gpu_memory()

Clearing GPU memory...
