In [None]:
pip install datasets transformers torch bitsandbytes peft trl colorama -q

## Create Synthetic Data
### from pdf into llm to create syntethic data

In [1]:
from docling.document_converter import DocumentConverter
from docling.chunking import HybridChunker
from colorama import Fore 

import json
from typing import List 
from pydantic import BaseModel
from litellm import completion
from generated_prompt import prompt_template

class Record(BaseModel):
    question: str
    answer: str

class Response(BaseModel):
    generated: List[Record]

def llm_call(data: str, num_records: int = 5) -> dict:
    stream = completion(
        model="ollama/qwen2.5:3b",
        messages=[
            {
                "role": "user",
                "content": prompt_template(data, num_records),
            }
        ],
        stream=True,
        options={"num_predict": 2000},
        format=Response.model_json_schema(),
    )
    data = ""
    for x in stream: 
        delta = x['choices'][0]["delta"]["content"]
        if delta is not None: 
            print(Fore.LIGHTBLUE_EX+ delta + Fore.RESET, end="") 
            data += delta 
    return json.loads(data)


if __name__=="__main__":
    converter = DocumentConverter()
    doc = converter.convert("Python Programming.pdf").document
    chunker = HybridChunker()
    chunks = chunker.chunk(dl_doc=doc)

    dataset = {}
    for i, chunk in enumerate(chunks):
      print(Fore.YELLOW + f"Raw Text:\n{chunk.text[:300]}..." + Fore.RESET)
      enriched_text = chunker.contextualize(chunk=chunk)
      print(Fore.GREEN + f"Contextualized Tex:\n{enriched_text[:300]}..." + Fore.RESET)

      data = llm_call(
          enriched_text
      )
      dataset[i] = {"generated":data["generated"], "context":enriched_text}
    
    with open('pydata.json','w') as f: 
        json.dump(dataset, f) 


Token indices sequence length is longer than the specified maximum sequence length for this model (570 > 512). Running this sequence through the model will result in indexing errors


[33mRaw Text:
What is Python?
Python is a popular programming language. It was created by Guido van Rossum, and released in 1991.
It is used for:
web development (server-side), software development,
mathematics, system scripting.
What can Python do?
Python can be used on a server to create web applications.
Pytho...[39m
[32mContextualized Tex:
Python Introduction
What is Python?
Python is a popular programming language. It was created by Guido van Rossum, and released in 1991.
It is used for:
web development (server-side), software development,
mathematics, system scripting.
What can Python do?
Python can be used on a server to create web...[39m
[94m{
[39m[94m   [39m[94m "[39m[94mgenerated[39m[94m":[39m[94m [
[39m[94m       [39m[94m {
[39m[94m           [39m[94m "[39m[94mquestion[39m[94m":[39m[94m "[39m[94mHow[39m[94m was[39m[94m Python[39m[94m created[39m[94m?",
[39m[94m           [39m[94m "[39m[94manswer[39m[94m":[39m[94m "[39m[94mPy

## Preprocessing Data
### formating the data for better training 

In [2]:
import json
from colorama import Fore 

instructions = []
with open('pydata.json', 'r') as f: 
    data = json.load(f)
    for key,chunk in data.items(): 
        for pairs in chunk['generated']: 
            question, answer = pairs['question'], pairs['answer'] 
            context_pair = {
                'question': f" {pairs['question']}",
                'answer':pairs['answer']
                }
            instructions.append(context_pair) 
        print(Fore.YELLOW + str(chunk)) 
        print('\n~~~~~~~~~~~~~~~~~~~~~')

with open('data/instruction.json','w') as f: 
    json.dump(instructions,f) 

with open('data/instruction.json','r') as f: 
    data = json.load(f)
    print(Fore.LIGHTMAGENTA_EX + str(data[:10]))

[33m{'generated': [{'question': 'How was Python created?', 'answer': 'Python was created by Guido van Rossum and released in 1991.'}, {'question': 'What are the applications of Python mentioned in the data?', 'answer': 'The applications of Python include web development (server-side), software development, mathematics, and system scripting.'}, {'question': 'Can you give an example of how Python is used on a server to create web applications?', 'answer': 'Yes, Python can be used on a server to create web applications. For instance, frameworks like Django or Flask are built using Python.'}, {'question': "What does the simplicity of Python's syntax allow developers to do?", 'answer': 'The simple syntax similar to English allows developers to write programs with fewer lines than in some other programming languages.'}, {'question': 'How can Python be used for rapid prototyping?', 'answer': 'Python runs on an interpreter system, meaning code can be executed as soon as it is written. This qu

## Using LLM to quality check Data
### classified the data into best accuracy and style

In [3]:
import json
from pydantic import BaseModel 
from litellm import completion
from colorama import Fore


class Score(BaseModel):
  score: int
  explaination: str

class Rank(BaseModel):
  accuracy: Score
  style: Score

def llm_call(record: str) -> dict:
    stream = completion(
        model="ollama/qwen2.5:7b",
        messages=[
            {
                "role": "user",
                "content": f"""Classify this instruction tuning record between 1-10 in terms of accuracy and style, provide explanations as to your reasoning. The answer should be completely self contained. 
                                If the question is not in fact a question it should receive 0 for accuracy. If the answer does not adequately answer the question it should receive 1 for accuracy. If the question 
                                or answer are in any way harmful, not helpful or not honest it should be classified as 1 for style. If the question or answer are blank or ..., return 1 for both. \n\n Record: {record} Return as JSON""",
            }
        ],
        stream=True,
        options={"num_predict": 2000, "temperature": 0.2},
        format=Rank.model_json_schema(),
    )
    data = ""
    for x in stream: 
        delta = x['choices'][0]["delta"]["content"]
        if delta is not None: 
            print(Fore.LIGHTBLUE_EX+ delta + Fore.RESET, end="") 
            data += delta 
    return json.loads(data)


if __name__ == "__main__": 
    quality = []
    instructions = []
    with open('data/instruction.json', 'r') as f: 
        data = json.load(f)
        for pair in data: 
            print(Fore.YELLOW + str(pair) +Fore.RESET) 
            result = llm_call(pair) 
            
            if result['accuracy']['score'] >= 6 and result['style']['score'] >= 6:
                instructions.append(pair)
                quality.append({**pair, 'quality':result})     

    with open('data/instructionquality.json','w') as f: 
        json.dump(instructions,f)

    with open('data/qualityresults.json','w') as f: 
        json.dump(quality,f)


[33m{'question': ' How was Python created?', 'answer': 'Python was created by Guido van Rossum and released in 1991.'}[39m
[94m{
[39m[94m [39m[94m "[39m[94maccuracy[39m[94m":[39m[94m [39m[94m{[39m[94m "[39m[94mscore[39m[94m":[39m[94m [39m[94m2[39m[94m,[39m[94m "[39m[94mex[39m[94mplain[39m[94mation[39m[94m":[39m[94m "[39m[94mThe[39m[94m answer[39m[94m is[39m[94m partially[39m[94m correct[39m[94m but[39m[94m lacks[39m[94m detail[39m[94m.[39m[94m It[39m[94m mentions[39m[94m the[39m[94m creator[39m[94m ([39m[94mGuid[39m[94mo[39m[94m van[39m[94m Ros[39m[94msum[39m[94m)[39m[94m and[39m[94m year[39m[94m of[39m[94m release[39m[94m ([39m[94m1[39m[94m9[39m[94m9[39m[94m1[39m[94m),[39m[94m which[39m[94m are[39m[94m accurate[39m[94m.[39m[94m However[39m[94m,[39m[94m it[39m[94m does[39m[94m not[39m[94m explain[39m[94m how[39m[94m Python[39m[94m was[39m[94m created[39m[94m 

# Training in local device

## Training Model LLM using LoRA

In [None]:
from datasets import load_dataset
from colorama import Fore

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

dataset = load_dataset("json", data_files="data/instruction1.json", split="train")
print(Fore.YELLOW  + str(dataset[2]) + Fore.RESET)

def format_chat_template(batch, tokenizer):
    system_prompt = """You are a helpful, honest, and harmless assistant designed to teach Python programming to beginners. 
    Explain concepts clearly and step by step. Provide working Python code examples wherever relevant. 
    Focus on Python fundamentals, coding best practices, and problem-solving skills. 
    If a question is outside your scope, politely advise the user that you cannot answer it. 
    Think through each question logically before providing an answer."""

    tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"

    samples = []
    questions = batch["question"]
    answers = batch["answer"]
    for i in range(len(questions)):
      row_json = [
          {"role": "system", "content": system_prompt},
          {"role": "user", "content": questions[i]},
          {"role": "assistant", "content": answers[i]}
      ]

      text = tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)
      samples.append(text)

    return {
        "instruction": questions,
        "response": answers,
        "text": samples
    }


base_model = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    trust_remote_code = True,
    token = "YOUR_HF_TOKEN",
    )

train_dataset = dataset.map(lambda x: format_chat_template(x, tokenizer), batched=True, batch_size=10)
print(Fore.LIGHTMAGENTA_EX  + str(train_dataset[0]) + Fore.RESET)


quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    )


model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    quantization_config=quant_config,
    token="YOUR_HF_TOKEN",
    torch_dtype=torch.float16,      # ✅ supported on 1050Ti
    )

print(Fore.CYAN  + str(model) + Fore.RESET)
print(Fore.LIGHTYELLOW_EX  + str(next(model.parameters()).device) )


model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules="all-linear",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
    )

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=SFTConfig (
        output_dir="meta-llama/Llama-3.2-1B-SFT",
        num_train_epochs=5,
        ),
    peft_config=peft_config,
)

trainer.train()

trainer.save_model('complete_checkpoint')
trainer.model.save_pretrained("final_model")


[33m{'question': "Context:Basics\nJust printing  hello world  is not enough, is it? You want to do more than that - you want to take some input, manipulate it and get something out of it. We can achieve this in Python using constants and variables, and we'll learn some other concepts as well in this chapter. What is the difference between mutable and immutable objects in Python?", 'answer': 'Mutable objects can be changed after their creation (e.g., lists, dictionaries), while immutable objects cannot (e.g., integers, strings).'}[39m
[95m{'question': "Context:Basics\nJust printing  hello world  is not enough, is it? You want to do more than that - you want to take some input, manipulate it and get something out of it. We can achieve this in Python using constants and variables, and we'll learn some other concepts as well in this chapter. What is the purpose of using constants and variables in Python?", 'answer': 'Constants and variables are used to store data that can be reused thro

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.8668
20,2.6424
30,2.4079
40,2.2308
50,1.9712
60,1.6905
70,1.5148


# Test Model and run Inference

In [None]:
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM

# 1. Base model
base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Path to merged model
merged_model_dir = "C:/Users/nixon/test/checkpoints/merged_model"

# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(merged_model_dir)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    merged_model_dir,
    device_map="cuda",          # put model on GPU
    torch_dtype=torch.float32
)

# Create pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

# Chat loop
print("🤖 Chatbot ready! Type 'quit' or 'exit' to stop.\n")
while True:
    prompt = input("You: ")
    if prompt.lower() in ["quit", "exit"]:
        print("👋 Goodbye!")
        break
    
    outputs = pipe(
        prompt,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )
    response = outputs[0]["generated_text"]
    
    print(f"Bot: {response}\n")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda


🤖 Chatbot ready! Type 'quit' or 'exit' to stop.

You: what is python
Bot: what is python programming
Python programming is a high-level programming language that is easy to learn and use. It is based on the syntax of other programming languages, such as C and Java, but has a more concise and readable way of writing code. Python is used for a wide range of applications, including web development, scientific computing, data analysis, and more.
Some of the key features of Python programming include:
* **Easy to learn**: Python has a simple syntax and is relatively easy to learn, making it a great language for beginners.
* **High-level**: Python is a high-level language, which means it abstracts away many low-level details, allowing you to focus on the logic of your program without worrying about the underlying implementation.
* **Object-oriented**: Python supports object-oriented programming (OOP), which allows you to create complex data structures and methods that operate on them.
* **Ex