<a href="https://colab.research.google.com/github/danishnaseer00/codeminx/blob/main/Fine_Tuning_with_Unsloth_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install transformers datasets peft accelerate bitsandbytes

In [2]:
from unsloth import FastLanguageModel
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling , EarlyStoppingCallback
from datasets import load_dataset
import torch
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
import huggingface_hub
print(huggingface_hub.utils.get_session().get("https://huggingface.co").status_code)

200


In [4]:
from huggingface_hub import login
    from google.colab import userdata

    HF_TOKEN = userdata.get('HF-TOKEN')

    if HF_TOKEN:
        login(token=HF_TOKEN)
        print("Successfully logged in to Hugging Face!")
    else:
        print("Hugging Face token not found in Colab Secrets. Please add it.")

Successfully logged in to Hugging Face!


In [5]:
max_seq_length = 1024
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name= "unsloth/phi-3-mini-4k-instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2025.9.4: Fast Mistral patching. Transformers: 4.56.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/194 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [6]:
special_tokens = ["<|user|>", "<|assistant|>", "<|end|>"]
tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})
model.resize_token_embeddings(len(tokenizer))

Embedding(32011, 3072, padding_idx=32009)

In [7]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.9.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
dataset = load_dataset("iamtarun/python_code_instructions_18k_alpaca", split="train").shuffle(seed=3407).select(range(7000))
dataset = dataset.train_test_split(test_size=0.15, seed=3407)

README.md:   0%|          | 0.00/905 [00:00<?, ?B/s]

data/train-00000-of-00001-8b6e212f3e1ece(…):   0%|          | 0.00/11.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/18612 [00:00<?, ? examples/s]

In [9]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'prompt'],
        num_rows: 5950
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'prompt'],
        num_rows: 1050
    })
})

In [10]:
def format_prompts(examples):
    """
    Format the dataset for instruction tuning with Phi-3 Mini template
    """
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]

    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        try:
            if input_text and input_text.strip():
                prompt = f"""<|user|>\n{instruction}\nInput: {input_text}<|end|>\n<|assistant|>\n{output}<|end|>"""
            else:
                prompt = f"""<|user|>\n{instruction}<|end|>\n<|assistant|>\n{output}<|end|>"""
            texts.append(prompt)
        except Exception as e:
            print(f"Error formatting prompt: {e}")
            texts.append("")
    return {"text": texts}

train_dataset = dataset["train"].map(format_prompts, batched=True)
eval_dataset = dataset["test"].map(format_prompts, batched=True)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_seq_length)

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True, remove_columns=eval_dataset.column_names)

Map:   0%|          | 0/5950 [00:00<?, ? examples/s]

Map:   0%|          | 0/1050 [00:00<?, ? examples/s]

Map:   0%|          | 0/5950 [00:00<?, ? examples/s]

Map:   0%|          | 0/1050 [00:00<?, ? examples/s]

In [11]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./Fine_tuned_LLM",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    max_steps=100,
    learning_rate=1e-5,
    fp16=True,
    bf16=False,
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    eval_strategy="steps",
    eval_steps=50,
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    report_to="none"
)

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    data_collator=data_collator,
    callbacks=[
        EarlyStoppingCallback(
            early_stopping_patience=5,
            early_stopping_threshold=0.001
        )
    ],
)

In [13]:
torch.cuda.empty_cache()
print("Starting training...")
print(f"GPU memory before training: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

trainer.train()

print("Training completed!")
print(f"GPU memory after training: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,950 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 29,884,416 of 3,850,638,336 (0.78% trained)


Starting training...
GPU memory before training: 2.22 GB
Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
50,7.6919,1.027002
100,7.0083,1.024597


Unsloth: Not an error, but MistralForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


Training completed!
GPU memory after training: 2.35 GB


In [27]:
def validate_model(prompt, max_length=512):
    """Test the model with a sample prompt"""
    inputs = tokenizer(
        f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
        return_tensors="pt",
        truncation=True,
        max_length=max_seq_length
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            tokenizer = tokenizer,

            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,

            stop_strings=["```\n", "### Instruction:", "### Input:", "### Response:"]
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("<|assistant|>")[-1].strip()

# Test the model
test_prompt = "Write function to check a string is palindrome or not "
print("\nTest Generation:")
print(validate_model(test_prompt))


Test Generation:
Write function to check a string is palindrome or not  
 
Here is a Python function that checks if a given string is a palindrome or not:

```python
def is_palindrome(s: str) -> bool:
    # Remove spaces and convert to lowercase
    s = s.replace(" ", "").lower()
    
    # Compare the string with its reverse
    return s == s[::-1]
```


In [33]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./Fine_tuned_LLM",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    max_steps=100,
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    eval_strategy="steps",
    eval_steps=50,
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    report_to="none"
)

In [34]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    data_collator=data_collator,
    callbacks=[
        EarlyStoppingCallback(
            early_stopping_patience=5,
            early_stopping_threshold=0.001
        )
    ],
)

In [36]:
torch.cuda.empty_cache()
print("Starting training...")
print(f"GPU memory before training: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

trainer.train(resume_from_checkpoint="/content/Fine_tuned_LLM/checkpoint-100")

print("Training completed!")
print(f"GPU memory after training: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

Starting training...
GPU memory before training: 2.55 GB


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,950 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 29,884,416 of 3,850,638,336 (0.78% trained)


Step,Training Loss,Validation Loss




Training completed!
GPU memory after training: 2.55 GB


In [41]:
def validate_model(prompt, max_length=512):
    """Test the model with a sample prompt"""
    inputs = tokenizer(
        f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
        return_tensors="pt",
        truncation=True,
        max_length=max_seq_length
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            tokenizer = tokenizer,

            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,

            stop_strings=["```\n", "### Instruction:", "### Input:", "### Response:"]
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("<|assistant|>")[-1].strip()

# Test the model
test_prompt = "Write a mergesort algorithm in python."
print("\nTest Generation:")
print(validate_model(test_prompt))


Test Generation:
Write a mergesort algorithm in python. 
 
Certainly! Below is a Python implementation of the Merge Sort algorithm. This implementation includes comments to explain the steps involved in the process.

```python
def merge_sort(arr):
    """
    Sorts an array in ascending order using the Merge Sort algorithm.
    
    Parameters:
    arr (list): The list of elements to be sorted.
    
    Returns:
    list: The sorted list of elements.
    """
    if len(arr) > 1:
        mid = len(arr) // 2  # Finding the mid of the array
        L = arr[:mid]  # Dividing the array elements into 2 halves
        R = arr[mid:]

        merge_sort(L)  # Sorting the first half
        merge_sort(R)  # Sorting the second half

        # Merging the sorted halves
        merge(arr, L, R)

    return arr

def merge(arr, L, R):
    """
    Merges two sorted subarrays into a single sorted array.
    
    Parameters:
    arr (list): The array to be sorted.
    L (list): The left half of the arr

In [43]:
model.save_pretrained("phi3-python-fine_tuned")
tokenizer.save_pretrained("phi3-python-fine_tuned")



('phi3-python-fine_tuned/tokenizer_config.json',
 'phi3-python-fine_tuned/special_tokens_map.json',
 'phi3-python-fine_tuned/chat_template.jinja',
 'phi3-python-fine_tuned/tokenizer.model',
 'phi3-python-fine_tuned/added_tokens.json',
 'phi3-python-fine_tuned/tokenizer.json')

In [64]:
!apt-get install git

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.15).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [67]:
!git config --global user.name "danishnaseer00"
!git config --global user.email "danishmughal.dev@gmail.com"

In [70]:
from google.colab import userdata

# Fetch the token from Colab secrets
try:
    GITHUB_TOKEN = userdata.get('GH-token')
    print("Token fetched successfully (length: {} characters)".format(len(GITHUB_TOKEN)))
except Exception as e:
    print(f"Error fetching token: {e}")
    print("Make sure you added the secret named 'GITHUB_TOKEN' in Colab secrets.")
    GITHUB_TOKEN = None

Token fetched successfully (length: 93 characters)


In [74]:
%%bash
# Set up GitHub repository details
REPO_URL="https://github.com/danishnaseer00/codeminx.git"
BRANCH="main"

# Create a new directory for the repo
mkdir Fine_tuned_phi
cd Fine_tuned_phi # Changed from my_model to Fine_tuned_phi

# Initialize git
git init
git checkout -b $BRANCH

# Copy all files from Colab environment (adjust paths as needed)
cp -r /content/* .

# Add all files including notebook, model, and tokenizer
git add .
git commit -m "Upload Colab notebook, fine-tuned model, and tokenizer"

# Add GitHub remote and push
git remote add origin $REPO_URL
git push -u origin $BRANCH

Initialized empty Git repository in /content/Fine_tuned_phi/.git/
On branch main

Initial commit

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	Fine_tuned_LLM/
	Fine_tuned_phi/
	codemini/
	codeminx/
	huggingface_tokenizers_cache/
	phi3-python-fine_tuned/
	sample_data/
	unsloth_compiled_cache/
	upload_to_github.sh

nothing added to commit but untracked files present (use "git add" to track)


mkdir: cannot create directory ‘Fine_tuned_phi’: File exists
hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>
Switched to a new branch 'main'
cp: cannot copy a directory, '/content/Fine_tuned_phi', into itself, './Fine_tuned_phi'
error: 'Fine_tuned_phi/' does not have a commit checked out
fatal: adding files failed
error: src refspec main does not match any
error: failed to push some refs to 'https://github.com/danishnaseer00/codeminx.git'


CalledProcessError: Command 'b'# Set up GitHub repository details\nREPO_URL="https://github.com/danishnaseer00/codeminx.git"\nBRANCH="main"\n\n# Create a new directory for the repo\nmkdir Fine_tuned_phi\ncd Fine_tuned_phi # Changed from my_model to Fine_tuned_phi\n\n# Initialize git\ngit init\ngit checkout -b $BRANCH\n\n# Copy all files from Colab environment (adjust paths as needed)\ncp -r /content/* .\n\n# Add all files including notebook, model, and tokenizer\ngit add .\ngit commit -m "Upload Colab notebook, fine-tuned model, and tokenizer"\n\n# Add GitHub remote and push\ngit remote add origin $REPO_URL\ngit push -u origin $BRANCH\n'' returned non-zero exit status 1.

In [75]:
pip install nbformat==5.10.4 nbconvert==7.16.6

