In [1]:
import json
import random
from pathlib import Path
from datetime import datetime
import requests

In [2]:
API_KEY = "sk-RpSfTpeq-El4GvTwz_QHEQ"
API_URL = "http://api-uat-be.waraml.org/v1"
WARA_API_KEY = "waraml-sLoYpybG81wZCx29sAGQWJhA"

INPUT_PROMPT = "Generate a Python code snippet that causes an error. Return only the code and the resulting error message."
OUTPUT_PROMPT = "The following Python code causes errors. Please explain why it happens and how to fix it."

DATASET_JSON_PATH = Path("data/dataset.json")
TRAIN_DATASET_PATH = Path("data/train_dataset.json")
TRAIN_DATASET_ZIP_PATH = Path("data/train_dataset.zip")
EVAL_DATASET_PATH = Path("data/eval_dataset.json")
EVAL_DATASET_ZIP_PATH = Path("data/eval_dataset.zip")
EVAL_RESULTS_PATH = Path("data/eval_results")
METADATA_JSON_PATH = Path("data/metadata.json")

NUM_SAMPLES = 300

In [3]:
def generate(prompt):
    headers = {"Authorization": f"Bearer {API_KEY}"}
    
    data = {
        "model": "azure-gpt-4o",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 2048,
        "stream": False
    }
    
    response = requests.post(f"{API_URL}/chat/completions", headers=headers, json=data)
    response.raise_for_status()
    
    return response.json()["choices"][0]["message"]["content"].strip()

In [22]:
import zipfile

with zipfile.ZipFile(TRAIN_DATASET_ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zf:
    zf.write(TRAIN_DATASET_PATH, arcname=TRAIN_DATASET_PATH.name)
    
with zipfile.ZipFile(EVAL_DATASET_ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zf:
    zf.write(EVAL_DATASET_PATH, arcname=EVAL_DATASET_PATH.name)

In [43]:
import os
    
X_API_KEY = "waraml-sLoYpybG81wZCx29sAGQWJhA"
BASE_JOB_URL = "https://llmhubapi.waraml.org/jobs"

params = {
    "job_name": "PythonErrorModel",
    "base_model": "gpt2",
    "lora_r": "8",
    "num_epochs": "5",
    "batch_size": 8,
    "learning_rate": "0.0002",
}

headers = {
    "X-API-Key": X_API_KEY,
}

def save_checkpoint(job_id):

    resp = requests.get(
        f"{BASE_JOB_URL}/{job_id}/download", headers=headers)
    resp.raise_for_status()
    data = resp.content

    ckpt_dir = f"{job_id}_lora_checkpoint"
    os.makedirs(ckpt_dir, exist_ok=True)

    if data[:2] == b"PK":
        zip_path = os.path.join(ckpt_dir, "adapter.zip")
        with open(zip_path, "wb") as f:
            f.write(data)
        with zipfile.ZipFile(zip_path, "r") as z:
            z.extractall(ckpt_dir)
    else:
        # save the raw adapter
        adapter_file = os.path.join(ckpt_dir, "adapter_model.safetensors")
        with open(adapter_file, "wb") as f:
            f.write(data)
        # write a full adapter_config.json so PEFT can load it
        config = {
            "peft_type":        "LORA",
            "task_type":        "CAUSAL_LM",
            "inference_mode":   False,
            "r":                8,
            "lora_alpha":       32,
            "lora_dropout":     0.05,
            # GPT-2 LoRA usually targets the attention proj & attn matrices:
            "target_modules":   ["c_attn", "c_proj"],
            "bias":             "none"
        }
        with open(os.path.join(ckpt_dir, "adapter_config.json"), "w") as f:
            json.dump(config, f)

    print(f"Checkpoint prepared in {ckpt_dir}/")

import time
import requests
import os
import json

def start_finetune( dataset_zip_path):

    print(f"\n🔁 Starting job {params['job_name']}")

    # Prepare file upload
    files = {
        "dataset_file": open(dataset_zip_path, "rb")
    }

    # Submit training job
    response = requests.post(BASE_JOB_URL, headers=headers, params=params, files=files)

    try:
        response.raise_for_status()
        message = response.json()
        job_id = message.get("id")
        print(f"✅ Job {job_id} submitted.")
    except requests.HTTPError:
        print("❌ Job submission failed:", response.status_code)
        print(response.text)
        return

    current_epoch = 1
    
    # Poll job status
    while True:
        status_resp = requests.get(f"{BASE_JOB_URL}/{job_id}", headers=headers)
        status_data = status_resp.json()
        job_status = status_data.get("status")
        metrics = status_data.get("metrics")

        if metrics:
            print(f"Metrics:")
            print(json.dumps(metrics, indent=2))
            current_epoch = metrics.get("current_epoch")
            
        if job_status == "completed":
            print(f"✅ Job {job_id} completed")
            break
        elif job_status == "failed":
            print(f"❌ Training failed at epoch {current_epoch} (Job {job_id})")
            return
        else:
            print(f"⏳ Waiting for epoch {current_epoch} to complete...")
            time.sleep(60)

    save_checkpoint(job_id)

    return job_id
   
def evaluate_model(job_id):
    with open(EVAL_DATASET_ZIP_PATH, "rb") as f:
        files = {
            "test_dataset": f
        }

        response = requests.post(
            f"{BASE_JOB_URL}/{job_id}/evaluate",
            headers=headers,
            files=files
        )
        
        try:
            response.raise_for_status()
            message = response.json()
            print("Evaluation status:")
            print(json.dumps(message, indent=2))
        except requests.HTTPError:
            print("Failed:", response.status_code)
            print(json.dumps(response.text, indent=2))
            
def poll_eval_status(job_id):
    while True:
        response = requests.get(f"{BASE_JOB_URL}/{job_id}", headers=headers)
        
        try:
            response.raise_for_status()
            status_data = response.json()

            status = status_data.get("status")
            print(f"Evaluation status: {status}")

            metrics = status_data.get("metrics")
            if metrics:
                print(f"Metrics:")
                print(json.dumps(metrics, indent=2))

            if status == "completed":
                print("Evaluation completed!")
                return status_data
            elif status == "failed":
                print("Evaluation failed.")
                return status_data

        except requests.HTTPError as e:
            print("Failed to get evaluation status:", e)
            print(response.text)
            break
                
        time.sleep(60)

In [None]:
job_id = start_finetune(TRAIN_DATASET_ZIP_PATH)


🔁 Starting job PythonErrorModel
✅ Job 2270 submitted.
Metrics:
{
  "cuda_available": true,
  "gpu_name": "NVIDIA GeForce RTX 3090"
}
⏳ Waiting for epoch None to complete...
Metrics:
{
  "cuda_available": true,
  "gpu_name": "NVIDIA GeForce RTX 3090",
  "current_epoch": 4.44,
  "current_loss": 1.2079,
  "last_update": 1748185527.436311,
  "training_step": 0
}
⏳ Waiting for epoch 4.44 to complete...
Metrics:
{
  "cuda_available": true,
  "gpu_name": "NVIDIA GeForce RTX 3090",
  "current_epoch": 5.0,
  "current_loss": 0,
  "last_update": 1748185535.1581683,
  "training_step": 0,
  "train_runtime": 65.6369,
  "train_samples_per_second": 16.454,
  "train_loss": 1.6621660903648094,
  "eval_loss": 1.1080495119094849,
  "perplexity": 3.0284456827363786,
  "training_time": "0:01:07.983891",
  "base_model": "gpt2",
  "device": "cuda",
  "num_examples": 216,
  "num_epochs": 5,
  "final_loss": 1.6621660903648094,
  "lora_rank": 8,
  "lora_alpha": 32,
  "status": "completed"
}
✅ Job 2270 completed

In [38]:
evaluate_model(job_id)

Evaluation status:
{
  "message": "Evaluation started for job 2270"
}


In [60]:
status_data = poll_eval_status(job_id)

Evaluation status: completed
Metrics:
{
  "cuda_available": true,
  "gpu_name": "NVIDIA GeForce RTX 3090",
  "current_epoch": 5.0,
  "current_loss": 0,
  "last_update": 1748185535.1581683,
  "training_step": 0,
  "train_runtime": 65.6369,
  "train_samples_per_second": 16.454,
  "train_loss": 1.6621660903648094,
  "eval_loss": 1.1080495119094849,
  "perplexity": 3.0284456827363786,
  "training_time": "0:01:07.983891",
  "base_model": "gpt2",
  "device": "cuda",
  "num_examples": 216,
  "num_epochs": 5,
  "final_loss": 1.6621660903648094,
  "lora_rank": 8,
  "lora_alpha": 32,
  "status": "completed",
  "evaluation_cuda_available": true,
  "evaluation_gpu_name": "NVIDIA GeForce RTX 3090",
  "evaluation_metrics": {
    "num_samples": 50,
    "avg_completion_length": 243.2,
    "min_completion_length": 125,
    "max_completion_length": 321,
    "avg_tokens": 98.36,
    "min_tokens": 54,
    "max_tokens": 100,
    "avg_bleu": 0.005669194234508521,
    "min_bleu": 7.834790622988904e-05,
    "

In [40]:
output_path = Path("data/eval_results.zip")
eval_extract_to = Path("data/eval_results/")

response = requests.get(
    f"{BASE_JOB_URL}/{job_id}/evaluation/download",
    headers=headers,
    stream=True)
response.raise_for_status()

with open(output_path, "wb") as f:
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            f.write(chunk)

print(f"Downloaded evaluation results to: {output_path}")

Downloaded evaluation results to: data\eval_results.zip


In [41]:
import zipfile

eval_extract_to.mkdir(parents=True, exist_ok=True)

with zipfile.ZipFile(output_path, 'r') as zf:
    zf.extractall(eval_extract_to)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Load base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Fix missing pad token for GPT-2
tokenizer.pad_token = tokenizer.eos_token
base_model.config.pad_token_id = tokenizer.eos_token_id

# Load LoRA-adapted model
model = PeftModel.from_pretrained(base_model, f"{job_id}_lora_checkpoint/")
model.config.pad_token_id = tokenizer.eos_token_id

# Input prompt
input_text = """The following Python code causes errors. Please explain why it happens and how to fix it.
python\n# Code snippet\nprint(\"The result is \" + 5)\n\n# Error message\nTypeError: can only concatenate str (not \"int\") to str\n```",
"""

# Tokenize
inputs = tokenizer(input_text, return_tensors="pt", padding=True)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# Generate outputs
tuned_outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=100)
outputs = base_model.generate(input_ids, attention_mask=attention_mask, max_length=100)

# Decode
tuned_result = tokenizer.decode(tuned_outputs[0], skip_special_tokens=True)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Compare
print("Base Model Output:\n", result)
print("\nLoRA Fine-Tuned Model Output:\n", tuned_result)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Base Model Output:
 The following Python code causes errors. Please explain why it happens and how to fix it.```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivisionError: division by zero
```

The code below is a Python function that returns a number divisible by zero. The code below returns the number divisible by zero:
```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivision

LoRA Fine-Tuned Model Output:
 The following Python code causes errors. Please explain why it happens and how to fix it.```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivisionError: division by zero
```

The code below is a Python function that returns a number divisible by zero. The code below returns the number divisible by zero:
```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivision


In [34]:
print(f"Prompt: {input_text}\nBase model response: {result}\nTuned model response: {tuned_result}")

Prompt: Explain and fix this Python error: ```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivisionError: division by zero
```
Base model response: Explain and fix this Python error: ```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivisionError: division by zero
```

The Python error message is a Python exception that occurs when a division is attempted. The Python exception is thrown when a division is attempted, and the result of the division is undefined.

The Python error message is thrown when a division is attempted, and the result of the division is undefined.

Tuned model response: Explain and fix this Python error: ```python
# Code snippet
result = 1 / 0

# Error message
ZeroDivisionError: division by zero
```

The Python error message is a Python exception that occurs when a division is attempted. The Python exception is thrown when a division is attempted, and the result of the division is undefined.

The Python error message is thrown when a divisi

In [58]:
import torch
base_model = AutoModelForCausalLM.from_pretrained("gpt2")

for name, param in base_model.named_parameters():
    print(name, torch.sum(param).item())

transformer.wte.weight 14659.9580078125
transformer.wpe.weight -533.7647094726562
transformer.h.0.ln_1.weight 138.5156707763672
transformer.h.0.ln_1.bias -5.0637712478637695
transformer.h.0.attn.c_attn.weight 94.44642639160156
transformer.h.0.attn.c_attn.bias -1.6296703815460205
transformer.h.0.attn.c_proj.weight -95.18525695800781
transformer.h.0.attn.c_proj.bias -5.307050704956055
transformer.h.0.ln_2.weight 666.4932250976562
transformer.h.0.ln_2.bias 7.068312168121338
transformer.h.0.mlp.c_fc.weight -1766.00537109375
transformer.h.0.mlp.c_fc.bias -286.1942138671875
transformer.h.0.mlp.c_proj.weight 18.895719528198242
transformer.h.0.mlp.c_proj.bias -0.324904203414917
transformer.h.1.ln_1.weight 171.1417999267578
transformer.h.1.ln_1.bias -3.858037233352661
transformer.h.1.attn.c_attn.weight 49.700870513916016
transformer.h.1.attn.c_attn.bias 1.8437765836715698
transformer.h.1.attn.c_proj.weight -48.81307601928711
transformer.h.1.attn.c_proj.bias -0.8238444328308105
transformer.h.1.l