In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# pip installs

!pip install trl==0.9.4 transformers==4.45.1 peft==0.12.0 accelerate datasets requests torch bitsandbytes sentencepiece wandb matplotlib

In [3]:
# imports

import os
import re
import math
from tqdm import tqdm
import torch
from huggingface_hub import login
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM,TrainingArguments,set_seed,BitsAndBytesConfig
from datasets import load_dataset,Dataset,DatasetDict
import wandb
from peft import LoraConfig
from google.colab import userdata
from trl import SFTTrainer,SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt
from trl import DataCollatorForCompletionOnlyLM
from huggingface_hub import HfApi

In [None]:
# set check point tracker
# Path to store the checkpoint tracking information
def create_checkpoint_tracker():
  checkpoint_file = "checkpoint_tracker.py"
  with open(checkpoint_file,"w") as f:
    f.write("""
def get_lastest_step():
  try:
    with open("latest_step.txt","r") as f:
      return int(f.read().strip())
  except:
    return 0

def set_latest_step(step):
  with open("latest_step.txt","w") as f:
    f.write(str(step))
    """)
create_checkpoint_tracker()


In [None]:
from checkpoint_tracker import get_lastest_step,set_latest_step

In [None]:
# Model and repository names
BASE_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct"
HF_USER = "Hoangee2"
PROJECT_NAME = "rating_book"
DATA_NAME = "ratingbook-data"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_NAME}"
LOG_TO_WANDB = True

# Login to Hugging Face
hf_token = userdata.get('HF_TOKEN')
login(hf_token,add_to_git_credential=True)

# Load train data from dataset
dataset = load_dataset(f"{HF_USER}/{DATA_NAME}")
train_data = dataset['train'].shuffle(seed=123).select(range(min(5000,len(dataset["train"]))))

# Log in to Weights & Biases
wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()

# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_WATCH"] = "gradient"
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"

In [None]:
from huggingface_hub import whoami
print(whoami())

{'type': 'user', 'id': '69060f76a923ae9405459c07', 'name': 'Hoangee2', 'fullname': 'Xuan Hoang', 'email': 'tranxuanhoang14062004@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': None, 'isPro': False, 'avatarUrl': '/avatars/e3563efe0aa65931765151d5e3cb5b89.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'read', 'role': 'read', 'createdAt': '2025-11-08T14:28:26.162Z'}}}


In [None]:
# resume training function
def train_or_resume(
    base_model_name,
    hf_model_name,
    train_dataset,
    lora_config,
    steps_per_session = 200,
    max_total_steps=1000,
    batch_size =1,
    grad_accum_steps = 16,
    save_steps = 100
):
  # Get the latest step we've trained to
  latest_step = get_lastest_step()

  # Check if we've already reached the max steps
  if latest_step >= max_total_steps:
    print(f"Trainning already completed! Reached {latest_step}/{max_total_steps} steps")
    return latest_step

  # Calculate how many steps to train in this session
  step_this_session = min(steps_per_session,max_total_steps-latest_step)
  print(f"Training for {step_this_session} steps (total progress: {latest_step}/{max_total_steps})")

  # Set up tokenizer
  tokenizer = AutoTokenizer.from_pretrained(base_model_name,trust_remote_code = True)
  tokenizer.pad_token = tokenizer.eos_token
  tokenizer.padding_side = 'right'

  # Configure quantization
  quant_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
  )

  # Check if we need to resume training
  try:
    if latest_step > 0:
      print(f"Ressuming from step {latest_step}")
      base_model = AutoModelForCausalLM.from_pretrained(hf_model_name,quantization_config = quant_config)
    else:
      print("Start training from base_model")
      base_model = AutoModelForCausalLM.from_pretrained(base_model_name,quantization_config = quant_config)
  except Exception as e:
    print("Error loading model")
    print(e)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_name,quantization_config = quant_config,device_map = "auto")

  # Configure training parameters
  train_params = SFTConfig(
      output_dir = f"./checkpoints",
      num_train_epochs = 1,
      max_steps = step_this_session,
      per_device_train_batch_size = batch_size,
      gradient_accumulation_steps = grad_accum_steps,
      optim = "paged_adamw_32bit",
      save_steps = save_steps,
      logging_steps = 20,
      learning_rate = 1e-4,
      weight_decay=0.001,
      fp16 = True,
      dataset_text_field="text",
      max_grad_norm = 0.3,
      warmup_ratio = 0.03,
      group_by_length = True,
      lr_scheduler_type = "cosine",
      push_to_hub = True,
      hub_model_id = hf_model_name,
      hub_private_repo = True,
  )

  # Create trainer
  trainer = SFTTrainer(
      model = base_model,
      train_dataset = train_dataset,
      peft_config = lora_config,
      args = train_params,
  )

  # Train the model
  trainer.train()

  # Push to Hugging Face Hub
  hf_write_token = userdata.get("HF_TOKEN_WRITE")
  #trainer.model.push_to_hub(hf_model_name, private=True)
  api = HfApi(token=hf_write_token)
  api.upload_folder(
    folder_path=f"./checkpoints",
    repo_id = hf_model_name,
    repo_type="model",
  )

  # Update and save the latest step count
  set_latest_step(latest_step+step_this_session)

  print(f"Completed training session ({latest_step+step_this_session}/{max_total_steps} steps)")

  return latest_step+step_this_session

In [None]:
# LoRA configuration
lora_parameter = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules = ["q_proj","v_proj","k_proj","o_proj"]
)

# Create the response template and data collator
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL,trust_remote_code = True)
response_template = "\n\n\nOutput:\n"
collator = DataCollatorForCompletionOnlyLM(tokenizer=tokenizer,mlm=False, response_template=response_template)

# Train or resume
current_step = train_or_resume(
    base_model_name = BASE_MODEL,
    hf_model_name = HUB_MODEL_NAME,
    train_dataset = train_data,
    lora_config = lora_parameter,
    steps_per_session = 200,
    max_total_steps=1000,
    batch_size =1,
    grad_accum_steps = 16,
    save_steps = 200
)

print(f"Current training progress: {current_step}/1000 steps")

In [None]:
from huggingface_hub import HfApi

api = HfApi(token=hf_token)

api.create_repo(
    repo_id="Hoangee2/rating_book-2025-11-23",   
    repo_type="model",
    private=False
)

api.upload_folder(
    folder_path=f"./checkpoints",
    repo_id="Hoangee2/rating_book-2025-11-23",
    repo_type="model",
)

**TEST MODEL**

In [4]:
# Model and repository names
BASE_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct"
HF_USER = "Hoangee2"
PROJECT_NAME = "rating_book"
DATA_NAME = "ratingbook-data"
RUN_NAME = "2025-11-23"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
REVISION = "71dc08b7f8c91e521ed510a5752f97187e415a2e"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_NAME}"

# Hyperparameters for QLoRA
QUANT_4_BIT = True
%matplotlib inline


In [5]:
from google.colab import userdata
from huggingface_hub import login

# Login to Hugging face
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
from datasets import load_dataset

# Load dataset
dataset = load_dataset(f"{HF_USER}/{DATA_NAME}")
train = dataset['train']
test = dataset['test']

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM,TrainingArguments,set_seed,BitsAndBytesConfig

# Pick the right quantization
if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
  )

In [None]:
# Load the Tokenizer and the Model
from peft import PeftModel

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

# Load the fine-tuned model with PEFT
if REVISION:
    fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
else:
    fine_tuned_model = PeftModel.from_pretrained(base_model,FINETUNED_MODEL)

print(f"Memory footprint: {fine_tuned_model.get_memory_footprint()/ 1e6:.1f} MB")

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/956 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

In [9]:
def model_predict(prompt):
    set_seed(123)
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = torch.ones(inputs.shape, device="cuda")
    outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=50, num_return_sequences=1)
    response = tokenizer.decode(outputs[0])
    return response

In [None]:
res = model_predict(test[100]["text"])
print(res)

<|begin_of_text|>Analyze the following product review and provide both the rating and a short summary.

Review:
I have been a fan of L'Engle's books since I first read WRINKLE IN TIME, shortly after it came out. Over the years I have often thought of her works and have often given her books as gifts. I have also read many of her books as they came out but missed this one until now. I wish that I had just skipped this one.<br /><br />The story focuses on Sandy and Dennys, the


Output:
Rating: 2.0
Summary: Not my favorite L'Engle book. Disappointing and boring. Not recommended. Disappointing and boring. Not recommended. Disappointing and boring. Not recommended. Disappointing and boring


In [10]:
def extract_output(text: str):
    if "Output:" in text:
        return text.split("Output:", 1)[1].strip()
    return text

In [11]:
def build_review_prompt(user_review: str):
    prompt = (
        "Analyze the following product review and provide both the rating and a short summary.\n\n"
        "Review:\n"
        f"{user_review}\n\n"
        "Output:\n"
    )
    return {"text": prompt}


In [24]:
def rating_summary (input):
    input = build_review_prompt(input)
    output = model_predict(input["text"])
    output = extract_output(output)
    return output

In [19]:
user = input()
output = rating_summary(user)


“This book was extremely disappointing, offering little value and failing on every level.”


In [20]:
print(output)

Rating: 1.
