In [None]:
%%capture
!pip install unsloth
!pip install datasets
!pip install datasets transformers
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
import pandas as pd

# Load the updated dataset
dataset_path = "dataset.csv"
dataset = pd.read_csv(dataset_path)

print("Dataset Loaded:")
print(dataset.head())

In [None]:
from datasets import Dataset

# Convert the Pandas DataFrame to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(dataset)

print("Dataset successfully converted to Hugging Face Dataset format:")
print(hf_dataset)

In [None]:
from transformers import AutoTokenizer

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")

# Define the preprocessing function
def formatting_prompts_func(examples):
    inputs = examples["Input"]
    outputs = examples["Output"]
    return {
        "input_ids": [tokenizer(input_text, truncation=True, padding="max_length", max_length=512)["input_ids"] for input_text in inputs],
        "labels": [tokenizer(output_text, truncation=True, padding="max_length", max_length=512)["input_ids"] for output_text in outputs],
    }

# Apply preprocessing
processed_dataset = hf_dataset.map(formatting_prompts_func, batched=True)

print("Dataset successfully tokenized and preprocessed:")
print(processed_dataset)

In [None]:
# Split into training and validation datasets
train_dataset, val_dataset = processed_dataset.train_test_split(test_size=0.1).values()

print(f"Training Set: {len(train_dataset)} examples")
print(f"Validation Set: {len(val_dataset)} examples")

In [None]:
from unsloth import FastLanguageModel

# Load the base model
model_name = "unsloth/Llama-3.2-1B-Instruct"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    load_in_4bit=True,        # Enable 4-bit quantization for efficiency
    dtype=None,               # Auto-detect dtype
    token="hf_your_token_here",  # Replace with your Hugging Face token if required
)

# Configure the model with LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
    lora_dropout=0.1,
    use_gradient_checkpointing=True,  # Enable gradient checkpointing
)

print("Model configured with LoRA for fine-tuning.")

In [None]:
from transformers import TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",               # Directory for saving checkpoints
    evaluation_strategy="epoch",          # Evaluate after each epoch
    learning_rate=2e-5,                   # Learning rate
    per_device_train_batch_size=8,        # Training batch size
    per_device_eval_batch_size=8,         # Evaluation batch size
    num_train_epochs=3,                   # Number of epochs
    weight_decay=0.01,                    # Weight decay for regularization
    save_steps=500,                       # Save checkpoint every 500 steps
    save_total_limit=2,                   # Keep only 2 checkpoints
    logging_dir="./logs",                 # Directory for logs
    logging_steps=10,                     # Log every 10 steps
)

print("Training arguments configured.")

In [None]:
from transformers import Trainer

# Set up the Trainer
trainer = Trainer(
    model=model,                          # Model with LoRA
    args=training_args,                   # Training arguments
    train_dataset=train_dataset,          # Training dataset
    eval_dataset=val_dataset,             # Validation dataset
    tokenizer=tokenizer,                  # Tokenizer
)

# Start training
trainer.train()

print("Fine-tuning completed.")

In [None]:
# Prepare the model for inference
model = FastLanguageModel.for_inference(model)

# Define a function to generate Matplotlib code
def generate_code(query):
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_length=512)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example queries
test_queries = [
    "Show the distribution of expenses using Matplotlib. Columns: categories, expenses",
    "Visualize sales trends over the last quarter with Matplotlib. Use columns months, sales",
    "Compare revenues by region using Matplotlib. Columns: regions, revenues",
]

# Generate and print Matplotlib code for each query
for query in test_queries:
    generated_code = generate_code(query)
    print(f"Query: {query}")
    print(f"Generated Matplotlib Code:\n{generated_code}\n")

# Code Evaluation

In [None]:
# Define the Pretrained model
from unsloth import FastLanguageModel

# Load the original pretrained model
pretrained_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-Instruct",
    max_seq_length=512,
    load_in_4bit=True,
    dtype=None,
    token="hf_your_token_here"
)

In [None]:
# Defined fine_tuned_model with our fine-tuned model
fine_tuned_model = model

In [None]:
from unsloth import FastLanguageModel

# Prepare both models for inference
pretrained_model = FastLanguageModel.for_inference(pretrained_model)
fine_tuned_model = FastLanguageModel.for_inference(fine_tuned_model)

# Example queries
test_queries = [
    "Show the distribution of expenses using Matplotlib. Columns: categories, expenses",
    "Visualize sales trends over the last quarter with Matplotlib. Use columns months, sales",
    "Compare revenues by region using Matplotlib. Columns: regions, revenues",
]

# Function to generate code
def generate_code(model, tokenizer, query):
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_length=512)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Evaluate both pretrained and fine-tuned models
results = []
for query in test_queries:
    pretrained_code = generate_code(pretrained_model, tokenizer, query)  # Pretrained model's output
    fine_tuned_code = generate_code(fine_tuned_model, tokenizer, query)  # Fine-tuned model's output
    results.append({
        "Query": query,
        "Pretrained Output": pretrained_code,
        "Fine-Tuned Output": fine_tuned_code
    })

# Print results
for result in results:
    print(f"Query: {result['Query']}")
    print(f"Pretrained Model Output:\n{result['Pretrained Output']}\n")
    print(f"Fine-Tuned Model Output:\n{result['Fine-Tuned Output']}\n")


In [None]:
from unsloth import FastLanguageModel
import evaluate

# Prepare both models for inference
pretrained_model = FastLanguageModel.for_inference(pretrained_model)
fine_tuned_model = FastLanguageModel.for_inference(fine_tuned_model)

# Example queries and reference outputs from the dataset
test_queries = [example["Input"] for example in hf_dataset]
reference_outputs = [example["Output"] for example in hf_dataset]

# Function to extract only the Python code from the output
def extract_code(output):
    start_marker = "```python"
    end_marker = "```"
    if start_marker in output and end_marker in output:
        start_idx = output.index(start_marker) + len(start_marker)
        end_idx = output.index(end_marker, start_idx)
        return output[start_idx:end_idx].strip()
    return output.strip()

# Modified function to generate code and extract only the Python code
def generate_code_with_extraction(model, tokenizer, query):
    raw_output = generate_code(model, tokenizer, query)
    return extract_code(raw_output)

# BLEU Score Evaluation
def calculate_bleu_score(fine_tuned_model, tokenizer, test_queries, reference_outputs):
    # Generate predictions with code extraction
    predictions = [generate_code_with_extraction(fine_tuned_model, tokenizer, query) for query in test_queries]

    # Extract only the code from references for comparison
    reference_codes = [extract_code(ref) for ref in reference_outputs]

    # Load BLEU metric
    bleu = evaluate.load("bleu")
    return bleu.compute(predictions=predictions, references=[[ref] for ref in reference_codes])["bleu"]

# Calculate BLEU score
bleu_score = calculate_bleu_score(fine_tuned_model, tokenizer, test_queries, reference_outputs)
print(f"BLEU Score: {bleu_score}")

# Executability Testing
def test_executability(code):
    try:
        exec(code, {"__builtins__": {}})
        return True
    except Exception as e:
        return False

# Test executability for pretrained and fine-tuned models
executability_results = {
    "Pretrained": [],
    "Fine-Tuned": []
}

for query in test_queries:
    pretrained_code = generate_code_with_extraction(pretrained_model, tokenizer, query)
    fine_tuned_code = generate_code_with_extraction(fine_tuned_model, tokenizer, query)
    executability_results["Pretrained"].append(test_executability(pretrained_code))
    executability_results["Fine-Tuned"].append(test_executability(fine_tuned_code))

# Calculate executability rates
exec_rate_pretrained = sum(executability_results["Pretrained"]) / len(executability_results["Pretrained"]) * 100
exec_rate_fine_tuned = sum(executability_results["Fine-Tuned"]) / len(executability_results["Fine-Tuned"]) * 100

print(f"Pretrained Model Executability Rate: {exec_rate_pretrained:.2f}%")
print(f"Fine-Tuned Model Executability Rate: {exec_rate_fine_tuned:.2f}%")