<a href="https://colab.research.google.com/github/anjelisa01/LLM-fine-tuned-chatbot/blob/main/notebook_fine_tuned_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#clone repo

In [1]:
#Up di github
token = ""  #fill the token, delete after successfully clone the repo
username = "anjelisa01"
repo = "LLM-fine-tuned-chatbot"

!git clone https://{username}:{token}@github.com/{username}/{repo}.git
%cd {repo}

Cloning into 'LLM-exploration'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 16 (delta 6), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (16/16), 22.24 KiB | 11.12 MiB/s, done.
Resolving deltas: 100% (6/6), done.
/content/LLM-exploration


#installs and imports

In [None]:
!pip install -q \
  transformers==4.38.2 \
  peft==0.8.2 \
  datasets \
  "accelerate>=0.27.2,<0.28.0"

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import torch
import os

In [4]:
os.environ["WANDB_DISABLED"] = "true"

#load model and tokenizer

choosing this model because its lightweight and compatible with Google colab

In [None]:
model_name = "EleutherAI/gpt-neo-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    pad_token_id=tokenizer.pad_token_id
)

#Fine-tuning

##define lora config and apply to base model

In [6]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

##the training data

In [7]:
data = {
    "text": [
        "### Human: What is the capital of France?\n### Assistant: Paris.",
        "### Human: Who wrote '1984'?\n### Assistant: George Orwell.",
        "### Human: What is the boiling point of water?\n### Assistant: 100 degrees Celsius.",
        "### Human: What's the square root of 64?\n### Assistant: 8.",
        "### Human: Who painted the Mona Lisa?\n### Assistant: Leonardo da Vinci.",
        "### Human: What’s the largest planet in our solar system?\n### Assistant: Jupiter.",
        "### Human: When did World War II end?\n### Assistant: 1945.",
        "### Human: What is the chemical symbol for gold?\n### Assistant: Au.",
        "### Human: What does DNA stand for?\n### Assistant: Deoxyribonucleic acid.",
        "### Human: Who discovered gravity?\n### Assistant: Isaac Newton."
    ]
}
dataset = Dataset.from_dict(data)

##tokenized the training dataset

In [None]:
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

dataset = dataset.map(tokenize)

##setting training arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./gptneo-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=5, #dari 1 ganti ke 5
    learning_rate=2e-4,
    logging_steps=1,
    save_strategy="no",
    fp16=torch.cuda.is_available()
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


##training the model

train the base model using the datasets with the training arguments

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)
trainer.train()

#save the fine-tuned model

we dont save the entire model only the configurations, so when we load the base model somewhere else we can apply this configuration

In [None]:
model.save_pretrained("./gptneo-lora")
tokenizer.save_pretrained("./gptneo-lora")

#download the model

In [None]:
import shutil
from google.colab import files

# Zip it
shutil.make_archive("gptneo-lora", 'zip', "gptneo-lora")

# Download
files.download("gptneo-lora.zip")


#Using the model

This function will be in app.py

##function ask model

In [14]:
def ask_model(prompt, model=model, tokenizer=tokenizer, max_new_tokens=20):
    """
    Generate an answer from the fine-tuned model based on a custom prompt.
    Args:
        prompt (str): Your custom question or instruction.
        model: Your fine-tuned Hugging Face model.
        tokenizer: The tokenizer used with the model.
        max_new_tokens (int): Max number of tokens to generate (default 20).
    Returns:
        str: Cleaned assistant response.
    """
    # Format prompt like the training data
    full_prompt = f"### Human: {prompt}\n### Assistant:"
    # Tokenize
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    # Generate response
    output = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id
    )
    # Decode and clean
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    # Extract only the assistant answer
    if "### Assistant:" in decoded:
        answer = decoded.split("### Assistant:")[1].strip()
        # Cut off hallucinated continuation (like another ### block or file paths)
        for stop_token in ["### Human:", "###", "\n#", "\n##"]:
            if stop_token in answer:
                answer = answer.split(stop_token)[0].strip()
    return answer


##use the model

In [16]:
response = ask_model("what is an influencer?", model, tokenizer)
print("🤖", response)

🤖 a person who is influential in a certain field.
