In [1]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-16b8n02g/unsloth_e64ea88514454f28837cca6474876dba
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-16b8n02g/unsloth_e64ea88514454f28837cca6474876dba
  Resolved https://github.com/unslothai/unsloth.git to commit 0fb14e6a76f3695d01314d7b3faf7252141d9f56
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.11.5 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.11.5-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.gi

In [2]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = 8192,
    dtype = None,
    load_in_4bit = True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth 2025.11.4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [3]:
from datasets import load_dataset, Dataset


raw_dataset = load_dataset("ise-uiuc/Magicoder-OSS-Instruct-75K", split="train")

data_list = []
for item in raw_dataset:
    if item["lang"] != "python":
        continue
    problem = item["problem"]
    solution = item["solution"].strip()
    if len(solution) < 300 or len(solution) > 15000:
        continue
    formatted = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{problem}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n```python\n{solution}\n```<|eot_id|>"
    data_list.append({"text": formatted})
    if len(data_list) >= 10000:
        break

dataset = Dataset.from_list(data_list)
print(f"{len(dataset)} projects was downloaded  ")

README.md:   0%|          | 0.00/314 [00:00<?, ?B/s]

data-oss_instruct-decontaminated.jsonl:   0%|          | 0.00/203M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/75197 [00:00<?, ? examples/s]

10000 projects was downloaded  


In [4]:
from trl import SFTTrainer
from transformers import TrainingArguments

print("fine tuning started")

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 8192,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,
        warmup_steps = 20,
        max_steps = 400,
        learning_rate = 2e-4,
        fp16 = True,
        bf16 = False,
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "llama3.2-project-gen-final",
        report_to = "none",
        save_strategy = "steps",
        save_steps = 200,
    ),
)

trainer.train()
print("done")

fine tuning started


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/10000 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10,000 | Num Epochs = 1 | Total steps = 400
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 97,255,424 of 3,310,005,248 (2.94% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.0566
20,0.8725
30,0.774
40,0.674
50,0.7062
60,0.6419
70,0.6512
80,0.6206
90,0.6106
100,0.6032


done


In [19]:
## Cell 5: Save the Merged Model in 4-bit (FORCED)

# Choose a clear folder name for GitHub/UI integration
MODEL_DIR = "python-coder-llama3-model-4bit-merged"

# Merge LoRA weights into the base model and save it as a complete 4-bit model.
# We use "merged_4bit_forced" to confirm the save despite Unsloth's warning about accuracy loss.
model.save_pretrained_merged(
    MODEL_DIR,
    tokenizer,
    save_method = "merged_4bit_forced" # This forces the 4-bit merge
)

# Save the tokenizer (essential for correct loading)
tokenizer.save_pretrained(MODEL_DIR)

print(f"INFO: The 4-bit merged model has been successfully saved to the '{MODEL_DIR}' folder.")
print("✅ The model file size will be approximately 3.2 GB.")


INFO: The 4-bit merged model has been successfully saved to the 'python-coder-llama3-model-4bit-merged' folder.
✅ The model file size will be approximately 3.2 GB.


In [21]:
## Cell 6: Test the Fine-Tuned Model for Project Generation

# The code generation request now asks for a multi-file project skeleton.
# We specify the files and force the output pattern (FILE: <filename>).
problem_prompt = """
Generate a simple, structured Python project skeleton for a command-line tool
that summarizes text using a transformer model (like Hugging Face's pipeline).
The project should include the following files:
1. main.py (to handle arguments and run the summarizer)
2. summarizer/processor.py (to contain the summarization logic and pipeline setup)
3. requirements.txt (with the 'transformers' and 'torch' dependencies)

Structure the output by starting each file with 'FILE: <filename>'.
"""

# Prepare the input in the fine-tuning chat format (Llama 3 Instruct)
messages = [
    {"role": "user", "content": problem_prompt}
]

# Convert messages to the required model format
input_prompt = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True
)

# Convert the text prompt to IDs (tensors)
inputs = tokenizer(
    [input_prompt],
    return_tensors = "pt"
).to("cuda")

print("\n--- Starting Project Skeleton Generation ---")

# Generate the output from the model (Increased max_new_tokens for large output)
outputs = model.generate(**inputs, max_new_tokens = 1200, use_cache = True)
decoded_output = tokenizer.batch_decode(outputs)
assistant_response = decoded_output[0]

# Clean and extract the full response
start_tag = "<|start_header_id|>assistant<|end_header_id|>"
end_tag = "<|eot_id|>"

if start_tag in assistant_response:
    start_index = assistant_response.find(start_tag) + len(start_tag)
    end_index = assistant_response.rfind(end_tag)
    full_response = assistant_response[start_index:end_index].strip()

    # Process the output to extract individual files (Robust Logic for UI Backend)
    print("🤖 Model Response (Full Project Structure):\n")
    print("=" * 60)

    file_sections = full_response.split("FILE: ")
    generated_files = {}

    for section in file_sections:
        section = section.strip()
        if section == "":
            continue

        parts = section.split('\n', 1)

        # Check if the section is too short or lacks content, skipping introductory filler text
        if len(parts) < 2:
             if ' ' in parts[0].strip() and len(parts[0].strip()) > 30:
                 print(f"⚠️ SKIPPED: Introductory text detected: '{parts[0].strip()[:50]}...'")
                 continue
             continue

        filename = parts[0].strip()
        file_content = parts[1].strip()

        # Final sanity check: Ensure filename doesn't contain code block markers or weird chars
        if filename.startswith('```') or filename.endswith('```') or filename.startswith('<|'):
             print(f"⚠️ SKIPPED: Invalid filename format: {filename}")
             continue


        generated_files[filename] = file_content

        # Print confirmation and a snippet of the content
        print(f"✅ GENERATED FILE: **{filename}**")
        print(f"   - Content Length: {len(file_content)} characters")
        print("-" * 60)

        # Display the content of the main file or configuration file
        if filename.lower().endswith(("main.py", "requirements.txt", "processor.py")):
             snippet = '\n'.join(file_content.split('\n')[:15])
             print("   [CONTENT SNIPPET]:")
             print("   " + snippet.replace('\n', '\n   ') + "\n   ...")
             print("-" * 60)


else:
    print("❌ Error: Could not find a structured response tag.")
    print(assistant_response)

print("--- Project Generation Simulation Finished ---")


--- Starting Project Skeleton Generation ---
🤖 Model Response (Full Project Structure):

✅ GENERATED FILE: ****Project Structure:****
   - Content Length: 110 characters
------------------------------------------------------------
✅ GENERATED FILE: **main.py**
   - Content Length: 924 characters
------------------------------------------------------------
   [CONTENT SNIPPET]:
   import argparse
   from summarizer.processor import SummarizerProcessor
   
   def main():
       # Set up command-line arguments
       parser = argparse.ArgumentParser(description='Text Summarizer')
       parser.add_argument('--input_file', type=str, required=True, help='Path to the input file')
       parser.add_argument('--output_file', type=str, required=True, help='Path to the output file')
       parser.add_argument('--model_name', type=str, default='distilbert-base-uncased', help='Model name')
       parser.add_argument('--max_length', type=int, default=50, help='Maximum length of the summary')
   
 