In [None]:
import torch
import torch.nn as nn
import time
import os

# Create folder if not exists
os.makedirs("qlora_experiments", exist_ok=True)

# Simulate a small Transformer Block
class TinyTransformer(nn.Module):
    def __init__(self, embed_dim):
        super().__init__()
        self.q_proj = nn.Linear(embed_dim, embed_dim)
        self.v_proj = nn.Linear(embed_dim, embed_dim)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, embed_dim * 4),
            nn.ReLU(),
            nn.Linear(embed_dim * 4, embed_dim)
        )

    def forward(self, x):
        q = self.q_proj(x)
        v = self.v_proj(x)
        ffn_out = self.ffn(x)
        return q + v + ffn_out

# LoRA Adapter applied to Linear layer
class LoRAAdapter(nn.Module):
    def __init__(self, original_layer, rank=8):
        super().__init__()
        self.original_layer = original_layer
        self.lora_A = nn.Linear(original_layer.in_features, rank, bias=False)
        self.lora_B = nn.Linear(rank, original_layer.out_features, bias=False)
        # Freeze original layer
        for param in self.original_layer.parameters():
            param.requires_grad = False

    def forward(self, x):
        return self.original_layer(x) + self.lora_B(self.lora_A(x))

# Apply LoRA Adapters
def apply_lora(model, target_modules=["q_proj", "v_proj"], rank=8):
    for name, module in model.named_children():
        if name in target_modules:
            lora_module = LoRAAdapter(module, rank=rank)
            setattr(model, name, lora_module)
        else:
            apply_lora(module, target_modules, rank)

# Create dummy input
batch_size = 8
seq_len = 128
embed_dim = 1024

input_tensor = torch.randn(batch_size, seq_len, embed_dim)

# Instantiate models
full_model = TinyTransformer(embed_dim)
qlora_model = TinyTransformer(embed_dim)
apply_lora(qlora_model)

# Training Step simulation (single forward+backward)
def training_step(model, input_tensor):
    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
    model.train()
    output = model(input_tensor)
    loss = output.mean()
    loss.backward()
    optimizer.step()

# Benchmark Full Fine-Tuning
start = time.time()
training_step(full_model, input_tensor)
full_finetune_time = time.time() - start

# Benchmark QLoRA Fine-Tuning
start = time.time()
training_step(qlora_model, input_tensor)
qlora_finetune_time = time.time() - start

# Simulate VRAM usage (number of trainable params)
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

full_params = count_trainable_params(full_model)
qlora_params = count_trainable_params(qlora_model)

# Simulated Memory Savings
vram_reduction = 100 * (1 - qlora_params / full_params)

# Print results
print("=== QLoRA vs Full Fine-Tuning Simulation Results ===")
print(f"Full Fine-Tuning Step Time: {full_finetune_time:.6f} sec")
print(f"QLoRA Fine-Tuning Step Time: {qlora_finetune_time:.6f} sec")
print(f"Trainable Parameters (Full Fine-Tune): {full_params}")
print(f"Trainable Parameters (QLoRA): {qlora_params}")
print(f"Simulated VRAM Reduction: {vram_reduction:.2f}%")

# Save results to file
result_text = f"""
=== QLoRA vs Full Fine-Tuning Simulation Results ===

Batch Size: {batch_size}
Sequence Length: {seq_len}
Embedding Dimension: {embed_dim}

Full Fine-Tuning Step Time: {full_finetune_time:.6f} sec
QLoRA Fine-Tuning Step Time: {qlora_finetune_time:.6f} sec

Trainable Parameters (Full Fine-Tune): {full_params}
Trainable Parameters (QLoRA Fine-Tune): {qlora_params}

Simulated VRAM Reduction: {vram_reduction:.2f}%
"""

with open("qlora_experiments/qlora_vs_fullfinetune_results.txt", "w") as f:
    f.write(result_text)

print("\nResults saved to: qlora_experiments/qlora_vs_fullfinetune_results.txt")


=== QLoRA vs Full Fine-Tuning Simulation Results ===
Full Fine-Tuning Step Time: 6.413243 sec
QLoRA Fine-Tuning Step Time: 0.660742 sec
Trainable Parameters (Full Fine-Tune): 10492928
Trainable Parameters (QLoRA): 8426496
Simulated VRAM Reduction: 19.69%

Results saved to: qlora_experiments/qlora_vs_fullfinetune_results.txt
