<a href="https://colab.research.google.com/github/ashev2021/Fine-Tuning-LLM/blob/main/FullyFineTune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install "datasets>=2.18.0,<3"
# deploy in Gradio
!pip install gradio

In [None]:
!pip install transformers>=4.38.2 sentence-transformers>=2.5.1 setfit>=1.0.3 accelerate>=0.27.2 seqeval>=1.2.2

In [None]:
#Load dataset
from datasets import load_dataset

# Prepare data and splits
imdb_data = load_dataset("mteb/imdb")

train_data, test_data = imdb_data["train"], imdb_data["test"]


train_data = train_data.shuffle(seed=42).select(range(2000))
test_data = test_data.shuffle(seed=42).select(range(1000))

In [None]:
#Supervised classification Fine tune everything(Huggingface train)

# model and tokenize
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "distilbert-base-uncased"

#pretrained model + tokenizer

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [None]:
# Data Collator
from transformers import DataCollatorWithPadding

# pad to longest sentence

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# preprocess
def preprocess_function(examples):
   """Tokenize input data"""
   return tokenizer(examples["text"], truncation=True)

# tokenize train
tokenized_train = train_data.map(preprocess_function, batched=True)
tokenized_test = test_data.map(preprocess_function, batched=True)


In [None]:
# evaluate the model with F1
import numpy as np
import evaluate


def compute_metrics(eval_pred):
    """Calculate F1 score"""
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    load_f1 = evaluate.load("f1")
    f1 = load_f1.compute(predictions=predictions, references=labels)["f1"]
    return {"f1": f1}


In [None]:
# training arguments for parameter tuning
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
   "model",
   learning_rate=2e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=1,
   weight_decay=0.01,
   save_strategy="epoch",
   report_to="none"
)

# Trainer which executes the training process
trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_train,
   eval_dataset=tokenized_test,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)



In [None]:
# train
trainer.train()

In [None]:
#eval results
trainer.evaluate()

In [None]:
#save fine-tuned model
trainer.save_model("my_finetuned_model")


In [None]:
#Deploy with Gradio to compare

import gradio as gr
from transformers import pipeline

# Load your fine-tuned model
fine_tuned_model_path = "my_finetuned_model"
fine_tuned_classifier = pipeline("text-classification", model=fine_tuned_model_path, tokenizer=fine_tuned_model_path)

# Load pretrained sentiment model from Hugging Face Hub
pretrained_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased")

# Label mapping
label_map = {
    "LABEL_0": "negative",
    "LABEL_1": "positive",
    "NEGATIVE": "negative",
    "POSITIVE": "positive",
}

def compare_models(text):
    # Fine-tuned model
    ft_pred = fine_tuned_classifier(text)[0]
    ft_label = label_map.get(ft_pred["label"], ft_pred["label"])
    ft_score = round(ft_pred["score"], 4)
    ft_result = f"Label: {ft_label}, Confidence: {ft_score}"

    # Pretrained model
    pre_pred = pretrained_classifier(text)[0]
    pre_label = label_map.get(pre_pred["label"], pre_pred["label"])
    pre_score = round(pre_pred["score"], 4)
    pre_result = f"Label: {pre_label}, Confidence: {pre_score}"

    return ft_result, pre_result

with gr.Blocks() as demo:
    gr.Markdown("## 🎬 Compare Fine-Tuned vs Pretrained Sentiment Models")
    gr.Markdown("Enter a movie review below to compare predictions.")

    input_text = gr.Textbox(label="Movie Review", lines=4, placeholder="Type your review here...")
    classify_btn = gr.Button("Classify")

    with gr.Row():
        ft_output = gr.Textbox(label="Fine-Tuned Model Prediction", interactive=False)
        pre_output = gr.Textbox(label="Pretrained Model Prediction", interactive=False)

    classify_btn.click(compare_models, inputs=input_text, outputs=[ft_output, pre_output])

demo.launch(share=True)
