In [1]:
from datasets import Dataset
import json

with open("qa_pairs.json") as f:
    qa_data = json.load(f)

dataset = Dataset.from_list([{
    "text": f"### Instruction:\n{q['question']}\n\n### Response:\n{q['answer']}"
} for q in qa_data])

In [2]:
# Install necessary libraries
!pip install -q transformers peft bitsandbytes accelerate --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m58.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [4]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

In [7]:
# The dataset `dataset` was loaded in a previous cell (sJqKGtN0-wly)
# from datasets import Dataset
# import json

# with open("qa_pairs.json") as f:
#     qa_data = json.load(f)

# dataset = Dataset.from_list([{
#     "text": f"### Instruction:\n{q['question']}\n\n### Response:\n{q['answer']}"
# } for q in qa_data])


def formatting_func(example):
    return {"text": example['text']}

dataset = dataset.map(formatting_func)

print(dataset[0])

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

{'text': '### Instruction:\nUseful when you forget to use sudo for a command.  "!!" grabs the last run command.\n\n### Response:\nsudo !!'}


In [9]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=100,
    learning_rate=2e-4,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    num_train_epochs=1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
)

In [11]:
!pip install -q trl

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/366.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m358.4/366.4 kB[0m [31m16.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m366.4/366.4 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/193.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the fol

In [15]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=lora_config,
    args=training_arguments,
)

Converting train dataset to ChatML:   0%|          | 0/200 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [16]:
trainer.train()

Step,Training Loss


TrainOutput(global_step=50, training_loss=2.3372621154785156, metrics={'train_runtime': 59.784, 'train_samples_per_second': 3.345, 'train_steps_per_second': 0.836, 'total_flos': 150762245996544.0, 'train_loss': 2.3372621154785156})

In [17]:
# Save the fine-tuned model and the LoRA adapters
trainer.model.save_pretrained("tinyllama_finetuned")
tokenizer.save_pretrained("tinyllama_finetuned")

('tinyllama_finetuned/tokenizer_config.json',
 'tinyllama_finetuned/special_tokens_map.json',
 'tinyllama_finetuned/chat_template.jinja',
 'tinyllama_finetuned/tokenizer.model',
 'tinyllama_finetuned/added_tokens.json',
 'tinyllama_finetuned/tokenizer.json')

In [19]:
from peft import AutoPeftModelForCausalLM

# Load the fine-tuned model
finetuned_model = AutoPeftModelForCausalLM.from_pretrained(
    "tinyllama_finetuned",
    torch_dtype=torch.float16,
    device_map="auto",
)

# Merge LoRA adapters with the base model
merged_model = finetuned_model.merge_and_unload()

# Load the tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("tinyllama_finetuned")

In [20]:
# Test the fine-tuned model
input_text = "### Instruction:\nHow do I list all files in a directory?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

outputs = merged_model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

### Instruction:
How do I list all files in a directory?

### Response:
ls -l


In [21]:
from google.colab import files
import os

# Define the directory where the fine-tuned model was saved
model_dir = "tinyllama_finetuned"

# Get a list of all files in the directory
file_list = os.listdir(model_dir)

# Download each file
for file_name in file_list:
    file_path = os.path.join(model_dir, file_name)
    print(f"Downloading {file_name}...")
    files.download(file_path)

print("Download complete.")

Downloading tokenizer_config.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading adapter_model.safetensors...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading special_tokens_map.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading README.md...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading tokenizer.model...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading chat_template.jinja...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading tokenizer.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading adapter_config.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download complete.


## Testing the model

In [23]:
import argparse
import json
import os
import subprocess
from datetime import datetime
import sys

# Set up argument parsing
parser = argparse.ArgumentParser(description='CLI Agent that generates and executes a plan based on a natural language instruction.')
parser.add_argument('instruction', type=str, help='The natural language instruction for the agent.')

# In a notebook environment, we pass arguments explicitly for testing
# In a real CLI setting, you would just use args = parser.parse_args()
if 'ipykernel' in sys.modules:
    # For testing in a notebook, provide a sample instruction
    args = parser.parse_args(['List all files in the current directory.'])
else:
    # For command line execution
    args = parser.parse_args()

instruction = args.instruction

# Create logs directory if it doesn't exist
if not os.path.exists('logs'):
    os.makedirs('logs')

# Define log file
log_file = 'logs/trace.jsonl'

# Log the initial instruction
log_entry = {
    "timestamp": datetime.now().isoformat(),
    "step": "Instruction received",
    "details": instruction
}
with open(log_file, 'a') as f:
    json.dump(log_entry, f)
    f.write('\n')

print(f"Instruction received: {instruction}")

# Placeholder for model loading and plan generation
# This will be implemented in the next steps
print("Loading model and generating plan...")

# Placeholder for dry-run execution
# This will be implemented in a later step
# if plan_first_step_is_command:
#     print(f"Executing in dry-run mode: echo {plan_first_step}")
#     subprocess.run(['echo', plan_first_step])

# Log the completion (placeholder for now)
# log_entry = {
#     "timestamp": datetime.now().isoformat(),
#     "step": "Process completed",
#     "details": "Plan generated and processed."
# }
# with open(log_file, 'a') as f:
#     json.dump(log_entry, f)
#     f.write('\n')

print("Script structure set up.")

Instruction received: List all files in the current directory.
Loading model and generating plan...
Script structure set up.


In [24]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import AutoPeftModelForCausalLM

# Define the path to the fine-tuned model
model_path = "./tinyllama_finetuned"

# Load the fine-tuned model
print(f"Loading model from {model_path}...")
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
)

# Merge LoRA adapters with the base model
merged_model = model.merge_and_unload()

# Load the tokenizer
print(f"Loading tokenizer from {model_path}...")
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

print("Model and tokenizer loaded.")

Loading model from ./tinyllama_finetuned...
Loading tokenizer from ./tinyllama_finetuned...
Model and tokenizer loaded.


In [25]:
def generate_plan(instruction, model, tokenizer):
    """Generates a step-by-step plan using the fine-tuned model."""
    prompt = f"### Instruction:\nGenerate a step-by-step plan for the following task: {instruction}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the plan from the response
    # Assuming the model's response format is consistent
    plan_start = response.find("### Response:\n")
    if plan_start != -1:
        plan = response[plan_start + len("### Response:\n"):].strip()
        # Split the plan into steps (assuming each step is on a new line or numbered)
        steps = [step.strip() for step in plan.split('\n') if step.strip()]
        return steps
    else:
        return ["Could not generate a plan."]

# Example usage (for testing within the notebook)
# instruction = "List all files in the current directory."
# plan = generate_plan(instruction, merged_model, tokenizer)
# print("\nGenerated Plan:")
# for i, step in enumerate(plan):
#     print(f"{i+1}. {step}")

In [26]:
import re

def is_shell_command(step):
    """Checks if a plan step looks like a shell command."""
    # This is a simple check and can be made more sophisticated
    # We look for common command-line patterns at the beginning of the step
    shell_command_patterns = [
        r"^(sudo\s+)?\w+",  # Starts with an optional sudo followed by a word (command)
        r"^!",             # Starts with ! (Colab magic command for shell)
        r"^pip\s+",        # Starts with pip
        r"^apt-get\s+",    # Starts with apt-get
        r"^git\s+",        # Starts with git
        r"^ls",            # Starts with ls
        r"^cd",            # Starts with cd
        r"^mkdir",         # Starts with mkdir
        r"^rm",            # Starts with rm
        r"^echo",          # Starts with echo
    ]
    for pattern in shell_command_patterns:
        if re.match(pattern, step):
            return True
    return False

def dry_run_command(command):
    """Executes a command in dry-run mode using echo."""
    print(f"Executing in dry-run mode: {command}")
    subprocess.run(['echo', command])

# Example usage (for testing within the notebook)
# sample_plan = ["ls -l", "Analyze the output", "Summarize findings"]
# if sample_plan and is_shell_command(sample_plan[0]):
#     dry_run_command(sample_plan[0])
# else:
#     print("First step is not a recognized shell command or plan is empty.")

# sample_plan_2 = ["Analyze the data", "Generate a report"]
# if sample_plan_2 and is_shell_command(sample_plan_2[0]):
#     dry_run_command(sample_plan_2[0])
# else:
#     print("First step is not a recognized shell command or plan is empty.")

In [27]:
def log_step(step_name, details):
    """Logs a step with timestamp and details to the trace file."""
    log_entry = {
        "timestamp": datetime.now().isoformat(),
        "step": step_name,
        "details": details
    }
    with open(log_file, 'a') as f:
        json.dump(log_entry, f)
        f.write('\n')

# Example usage (for testing within the notebook)
# log_step("Plan generated", ["Step 1", "Step 2"])
# log_step("Dry run executed", "echo ls -l")

In [28]:
# This cell integrates the previously defined components into the main script flow.
# It assumes the code from previous cells (argument parsing, model loading,
# generate_plan, is_shell_command, dry_run_command, log_step) is available.

# --- Main execution flow ---

# 1. Instruction is already received and logged in the first cell.

# 2. Load model and tokenizer (code from the second cell is needed here).
#    This part should be included directly in the script or called from here.
#    For the final agent.py file, you would place the model loading code here.
#    print("Loading model and tokenizer...")
#    # Include model loading code here
#    # model, tokenizer = load_model_and_tokenizer(...) # Placeholder

# 3. Generate plan using the instruction and the loaded model.
print("Generating plan...")
plan = generate_plan(instruction, merged_model, tokenizer) # Using merged_model from previous cell

# Log the generated plan
log_step("Plan generated", plan)

print("\nGenerated Plan:")
for i, step in enumerate(plan):
    print(f"{i+1}. {step}")

# 4. Check if the first step is a shell command and perform dry-run if it is.
if plan and is_shell_command(plan[0]):
    log_step("Checking first step for shell command", plan[0])
    dry_run_command(plan[0])
    log_step("Dry run executed", plan[0])
else:
    print("\nFirst step is not a recognized shell command or plan is empty. No dry-run executed.")
    if plan:
        log_step("First step not a shell command", plan[0])
    else:
        log_step("Plan is empty", "No plan generated")


# 5. Log the completion.
log_step("Process completed", "Plan generated and processed.")

print("\nAgent process completed. Check logs/trace.jsonl for details.")

Generating plan...

Generated Plan:
1. find . -type f -print0 | xargs -0
Executing in dry-run mode: find . -type f -print0 | xargs -0

Agent process completed. Check logs/trace.jsonl for details.
