## Requirements and dependencies


In [None]:
%%capture
!pip install opacus
!pip install -U bitsandbytes transformers accelerate
!pip install peft

In [None]:
!pip install pynvml



In [None]:
# Check NumPy Version and Import Dependencies
import numpy as np
print("NumPy version:", np.__version__)

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from opacus import PrivacyEngine
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import AdamW
from peft import get_peft_model, LoraConfig, TaskType

# Set up device - prioritize GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Print GPU info if available
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

NumPy version: 1.26.4
Using device: cuda
GPU Device: NVIDIA L4
Available GPU memory: 22.17 GB


In [None]:
## Clear GPU cache and storage
torch.cuda.empty_cache()  # Frees unused memory
torch.cuda.ipc_collect()  # Collects shared memory used in multiprocessing

In [None]:
from huggingface_hub import login
from google.colab import userdata

In [None]:
# Retrieve token securely
hf_token = userdata.get("HF_TOKEN")

if hf_token:
    login(token=hf_token)
    print("Logged in successfully!")
else:
    print("Hugging Face token not found. Please set it using `userdata.set`.")

Logged in successfully!


## CPU and GPU util functions

In [None]:
import psutil
import torch

try:
    from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetUtilizationRates, nvmlSystemGetDriverVersion, nvmlDeviceGetName, nvmlShutdown
    nvmlInit()
    NVML_AVAILABLE = True
except ImportError:
    NVML_AVAILABLE = False

def get_cpu_stats():
    """ Get CPU usage stats """
    cpu_usage = psutil.cpu_percent(interval=1)  # Get CPU usage %
    cpu_freq = psutil.cpu_freq().current if psutil.cpu_freq() else "Unknown"  # CPU Frequency
    num_cores = psutil.cpu_count(logical=False)  # Physical Cores
    num_threads = psutil.cpu_count(logical=True)  # Logical Cores
    print(f"CPU Usage: {cpu_usage}%")
    print(f"CPU Frequency: {cpu_freq} MHz")
    print(f"Physical Cores: {num_cores}")
    print(f"Logical Cores: {num_threads}")

def get_ram_stats():
    """ Get system RAM stats """
    ram = psutil.virtual_memory()
    print("Total RAM:", round(ram.total / 1e9, 2), "GB")
    print("Available RAM:", round(ram.available / 1e9, 2), "GB")
    print("Used RAM:", round(ram.used / 1e9, 2), "GB")
    print("RAM Usage:", ram.percent, "%")

def get_gpu_stats():
    """ Get GPU stats if available """
    if not NVML_AVAILABLE:
        return {"Error": "pynvml not installed. Run: pip install nvidia-ml-py3"}

    num_gpus = torch.cuda.device_count()

    for i in range(num_gpus):
        handle = nvmlDeviceGetHandleByIndex(i)
        mem_info = nvmlDeviceGetMemoryInfo(handle)
        utilization = nvmlDeviceGetUtilizationRates(handle)

        print(f"GPU {i} - {nvmlDeviceGetName(handle)}")
        print(f"Driver Version: {nvmlSystemGetDriverVersion()}")
        print(f"Total VRAM: {round(mem_info.total / 1e9, 2)} GB")
        print(f"Used VRAM: {round(mem_info.used / 1e9, 2)} GB")
        print(f"Free VRAM: {round(mem_info.free / 1e9, 2)} GB")
        print(f"GPU Usage: {utilization.gpu}%")
        print()

    nvmlShutdown()  # Clean up NVML

# Run and print system stats

print("\n🔹 CPU Stats:", )
print("\n🔹 RAM Stats:", )
print("\n🔹 GPU Stats:", )



🔹 CPU Stats:

🔹 RAM Stats:

🔹 GPU Stats:


## CPU & GPU specs

In [None]:
get_cpu_stats()

CPU Usage: 0.5%
CPU Frequency: 2200.2180000000003 MHz
Physical Cores: 4
Logical Cores: 8


In [None]:
get_ram_stats()

Total RAM: 33.67 GB
Available RAM: 31.78 GB
Used RAM: 1.4 GB
RAM Usage: 5.6 %


In [None]:
get_gpu_stats()

GPU 0 - NVIDIA L4
Driver Version: 535.104.05
Total VRAM: 24.15 GB
Used VRAM: 0.36 GB
Free VRAM: 23.8 GB
GPU Usage: 0%



## Model Loading and Tokenizer

In [None]:
# Load Pretrained Model and Tokenizer
# model_name = "EleutherAI/gpt-neo-2.7B"
# model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
model_name = "meta-llama/Llama-3.1-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model with GPU memory optimizations
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Use half precision
    device_map="auto"  # Automatically handle model parallelism if needed
)

# Ensure pad token exists
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Embedding(128256, 4096)

In [None]:
for name, module in model.named_modules():
    print(name, ":", module)

 : LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rota

## LoRA Configuration

In [None]:
# Apply LoRA for Parameter-Efficient Fine-Tuning
model.config.gradient_checkpointing = True

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=4,
    lora_alpha=32,
    lora_dropout=0.1,
    # target_modules=["query_key_value"]  # Specify target modules for GPT-Neo
)
model = get_peft_model(model, lora_config)
print("LoRA applied. Trainable parameters:")
model.print_trainable_parameters()

# Ensure model is on GPU
model = model.to(device)
model.train()

LoRA applied. Trainable parameters:
trainable params: 1,703,936 || all params: 8,031,965,184 || trainable%: 0.0212


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=4, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=4, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_feat

## Data loading and preprocessing

In [None]:
# Load and Format Training Data
import json

formatted_strings = []

with open("finetuning/train.jsonl", "r") as f:
    j = 0
    for line in f:
        data = json.loads(line.strip())
        j+=1
        # if j == 1000:
        #     break
        rating = data['Rating']
        title = data['Title']
        review = data['Review']

        formatted_string = f'"System prompt : Given the Rating and Title, you are required to generate the review" | "Rating": {rating} | "Title": {title} | "Review": {review}'
        formatted_strings.append(formatted_string)

train_texts = formatted_strings

## Data tokenization and dataset creation

In [None]:
# Tokenize and Prepare Dataset
encodings = tokenizer(train_texts, return_tensors='pt', padding=True, truncation=True, max_length=128)
input_ids = encodings['input_ids']
attention_mask = encodings['attention_mask']

labels = input_ids.clone()
labels[input_ids == tokenizer.pad_token_id] = -100

print("Training data shape:", input_ids.shape)

train_dataset = TensorDataset(input_ids, attention_mask, labels)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, drop_last=True)

Training data shape: torch.Size([100000, 128])


## Privacy engine setup

In [None]:
# Set Up Optimizer and PrivacyEngine
optimizer = AdamW(model.parameters(), lr=5e-4)
privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=1.0,
    max_grad_norm=1.0,
    batch_first=True,
    loss_reduction="mean",
    poisson_sampling=False
)

## Training loop

In [None]:
# Training Loop with DP-SGD
model.train()
epochs = 3

for epoch in range(epochs):
    total_loss = 0.0
    i = 0
    for batch in train_loader:
        if i%500 == 0:
            print(i)
        i+=1
        input_ids_batch, attention_mask_batch, labels_batch = [x.to(device) for x in batch]

        seq_len = input_ids_batch.size(1)
        position_ids = torch.arange(seq_len, device=device).unsqueeze(0).repeat(input_ids_batch.size(0), 1)

        outputs = model(
            input_ids=input_ids_batch,
            attention_mask=attention_mask_batch,
            position_ids=position_ids,
            labels=labels_batch
        )
        loss = outputs.loss
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs} - Average loss: {avg_loss:.4f}")

0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500
10000
10500
11000
11500
12000
12500
13000
13500
14000
14500
15000
15500
16000
16500
17000
17500
18000
18500
19000
19500
20000
20500
21000
21500
22000
22500
23000
23500
24000
24500
Epoch 1/3 - Average loss: 1.6101
0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500
10000
10500
11000
11500
12000
12500
13000
13500
14000
14500
15000
15500
16000
16500
17000
17500
18000
18500
19000
19500
20000
20500
21000
21500
22000
22500
23000
23500
24000
24500
Epoch 2/3 - Average loss: 1.6199
0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500
10000
10500
11000
11500
12000
12500
13000
13500
14000
14500
15000
15500
16000
16500
17000
17500
18000
18500
19000
19500
20000
20500
21000
21500
22000
22500
23000
23500
24000
24500
Epoch 3/3 - Average loss: 1.8221


## Model saving

In [None]:
# Save Fine-tuned Model
# model.remove_hooks()
# model = model._module

save_directory = "./finetuned_model_dp"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)
print(f"Model and tokenizer saved to {save_directory}")

Model and tokenizer saved to ./finetuned_model_dp


## Interactive testing

In [None]:
# Interactive Testing Loop
model.eval()

while True:
    sample_prompt = input("Input: ")
    if sample_prompt.lower() == "bye":
        break
    enc = tokenizer(sample_prompt, return_tensors='pt', padding=True, truncation=True)
    enc = {k: v.to(device) for k, v in enc.items()}
    # play with max_length=350, in the paper it is defined different
    # EOS token
    generated_ids = model.generate(**enc, max_length=350, do_sample=True, top_k=50)
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print("Generated text:", generated_text)

Input: "System prompt": "Given the Rating and Title, you are required to generate the review", "Rating": 4, "Title": "No white background! It\u2019s clear!", "Review":


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated text: "System prompt": "Given the Rating and Title, you are required to generate the review", "Rating": 4, "Title": "No white background! It\u2019s clear!", "Review": I need a camera.  I wish a case, I have the buy the case.  I only the watch and the put of watch.. the the order phone,, I's the the stand phone case of the the case.  I buy the case. I only it.  I.  I was the phone.   I.  I had it.  I buy the the the  I one. I watch,. I the the the the phone I. I back it. I the the the the the phone. I I like my the  I't my  I. I was one that. I, as the the the the the the, I   I I the the the the the the phone on the the phone,. I had the the the the the the phone. if it  ita that it's a the the the the the the the the the, you it... on phone of the the the., of the the phone. I. it.< so. I.. I the the the the the phone.. I. to the the phone I I not,,. I the phone. I., the will  the protector one one... I..  I's.. I it't that. the the the the back. I.. in one. it and it. if on

KeyboardInterrupt: Interrupted by user