<a href="https://colab.research.google.com/github/januverma/llm-cross-encoders-for-recsys/blob/main/LLM_Regressor_for_Movie_Rec_with_genre_and_rating_no_prompt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune a LLM for Movie Rating Prediction

## Install Dependencies

In [1]:
! pip install -U "transformers[torch]" datasets accelerate peft bitsandbytes evaluate



## Load and Process Data

In [2]:
import pandas as pd
import re
from datasets import Dataset
import json

In [3]:
train_data = pd.read_csv('./train_data_it.csv')
test_data = pd.read_csv('./test_data_it.csv')

In [4]:
train_data['past_movies'] = train_data['past_movies'].apply(eval)
test_data['past_movies'] = test_data['past_movies'].apply(eval)

In [5]:
train_data.head()

Unnamed: 0,userId,movie_count,past_movies,past_movie_ids,candidate,movieId,rating
0,1,11,"[Girl, Interrupted (1999):::Drama:::4, Titanic...","[3186, 1721, 1270, 1022, 2340, 1836, 3408, 120...",Wallace & Gromit: The Best of Aardman Animatio...,720,3.0
1,1,11,[One Flew Over the Cuckoo's Nest (1975):::Dram...,"[1193, 919, 608, 2692, 1961, 2028, 3105, 938, ...",Mary Poppins (1964):::Children's|Comedy|Musical,1028,5.0
2,1,11,"[Bambi (1942):::Animation|Children's:::4, Apol...","[2018, 150, 1097, 914, 1287, 2797, 1246, 2762,...","Secret Garden, The (1993):::Children's|Drama",531,4.0
3,1,11,[Toy Story 2 (1999):::Animation|Children's|Com...,"[3114, 2791, 1029, 2321, 1197, 594, 2398, 1545...",Beauty and the Beast (1991):::Animation|Childr...,595,5.0
4,1,3,[Aladdin (1992):::Animation|Children's|Comedy|...,"[588, 1]",Tarzan (1999):::Animation|Children's,2687,3.0


In [6]:
test_data.head()

Unnamed: 0,userId,past_movies,past_movie_ids,candidate,movieId,rating
0,238,"[Philadelphia (1993):::Drama:::4, Crimes of th...","[508, 2738, 3546, 1747, 3135, 3071, 3194, 85, ...",Clara's Heart (1988):::Drama,3714,3.0
1,591,"[Postino, Il (The Postman) (1994):::Drama|Roma...","[58, 3370, 2611, 3765, 2000, 1246, 509, 3135, ...","Untouchables, The (1987):::Action|Crime|Drama",2194,3.0
2,165,"[Man Who Knew Too Much, The (1956):::Thriller:...","[2183, 2700, 3061, 2863, 2946, 2747, 3922, 203...",Repo Man (1984):::Comedy|Sci-Fi,1965,4.0
3,631,[Superman (1978):::Action|Adventure|Sci-Fi:::5...,"[2640, 2302, 3081, 1732, 1042, 440, 986, 2278,...",God Said 'Ha!' (1998):::Comedy,2499,5.0
4,588,[Singin' in the Rain (1952):::Musical|Romance:...,"[899, 2130, 1734, 2697, 3588, 1247, 1041, 3545...",What Ever Happened to Baby Jane? (1962):::Dram...,3546,4.0


In [7]:
train_data.iloc[0]

Unnamed: 0,0
userId,1
movie_count,11
past_movies,"[Girl, Interrupted (1999):::Drama:::4, Titanic..."
past_movie_ids,"[3186, 1721, 1270, 1022, 2340, 1836, 3408, 120..."
candidate,Wallace & Gromit: The Best of Aardman Animatio...
movieId,720
rating,3.0


In [8]:
train_data.past_movies.iloc[1]

["One Flew Over the Cuckoo's Nest (1975):::Drama:::5",
 "Wizard of Oz, The (1939):::Adventure|Children's|Drama|Musical:::4",
 'Fargo (1996):::Crime|Drama|Thriller:::4',
 'Run Lola Run (Lola rennt) (1998):::Action|Crime|Romance:::4',
 'Rain Man (1988):::Drama:::5',
 'Saving Private Ryan (1998):::Action|Drama|War:::5',
 'Awakenings (1990):::Drama:::5',
 'Gigi (1958):::Musical:::4',
 'Sound of Music, The (1965):::Musical:::5',
 'Driving Miss Daisy (1989):::Drama:::4']

In [9]:
train_data.past_movies.iloc[1]

["One Flew Over the Cuckoo's Nest (1975):::Drama:::5",
 "Wizard of Oz, The (1939):::Adventure|Children's|Drama|Musical:::4",
 'Fargo (1996):::Crime|Drama|Thriller:::4',
 'Run Lola Run (Lola rennt) (1998):::Action|Crime|Romance:::4',
 'Rain Man (1988):::Drama:::5',
 'Saving Private Ryan (1998):::Action|Drama|War:::5',
 'Awakenings (1990):::Drama:::5',
 'Gigi (1958):::Musical:::4',
 'Sound of Music, The (1965):::Musical:::5',
 'Driving Miss Daisy (1989):::Drama:::4']

In [10]:
test_data.past_movies.iloc[1]

['Postino, Il (The Postman) (1994):::Drama|Romance:::5',
 'Betrayed (1988):::Drama|Thriller:::4',
 'Winslow Boy, The (1998):::Drama:::5',
 'Hot Spot, The (1990):::Drama|Romance:::2',
 'Lethal Weapon (1987):::Action|Comedy|Crime|Drama:::3',
 'Dead Poets Society (1989):::Drama:::2',
 'Piano, The (1993):::Drama|Romance:::4',
 'Great Santini, The (1979):::Drama:::5',
 'Quiz Show (1994):::Drama:::4',
 'Breaking the Waves (1996):::Drama:::4']

## Create Hugging Face datasets

In [11]:
train_dataset = Dataset.from_pandas(train_data)
train_dataset

Dataset({
    features: ['userId', 'movie_count', 'past_movies', 'past_movie_ids', 'candidate', 'movieId', 'rating'],
    num_rows: 84297
})

In [12]:
test_dataset = Dataset.from_pandas(test_data)
test_dataset

Dataset({
    features: ['userId', 'past_movies', 'past_movie_ids', 'candidate', 'movieId', 'rating'],
    num_rows: 2000
})

In [13]:
# Evaluate the model on a small sample of the evaluation dataset
val_sample_size = 1000  # Adjust the sample size as needed
val_dataset = test_dataset.select(range(min(val_sample_size, len(test_dataset))))

## Tokenize

In [14]:
import os
import numpy as np
import torch
import evaluate
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    DataCollatorWithPadding, TrainingArguments, Trainer,
    BitsAndBytesConfig
)

In [15]:
MODEL_NAME = "Qwen/Qwen2.5-7B"
NUM_LABELS = 1
MAX_LEN    = 512

In [16]:
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
tok.add_special_tokens({"additional_special_tokens": ["[CAND]"]})

if tok.pad_token is None:
    tok.pad_token = tok.eos_token

In [17]:
def preprocess(ex):
    joined = " | ".join(ex["past_movies"]) + " [CAND] " + ex["candidate"]
    enc = tok(joined, truncation=True, max_length=MAX_LEN)
    enc["labels"] = float(ex["rating"])
    return enc

In [18]:
train_tok = train_dataset.map(
    preprocess, remove_columns=train_dataset.column_names)

val_tok   = val_dataset.map(
    preprocess, remove_columns=val_dataset.column_names)

test_tok  = test_dataset.map(
    preprocess, remove_columns=test_dataset.column_names)

Map:   0%|          | 0/84297 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

## Load Model

In [19]:
## BnB config
bnb_cfg = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True)

base = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS,
    device_map="auto",
    quantization_config=bnb_cfg,
    torch_dtype=torch.bfloat16,
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen2.5-7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
base.config.problem_type = "regression"

In [21]:
base.config.pad_token_id = tok.pad_token_id   # keep config consistent

In [22]:
base.resize_token_embeddings(len(tok))   # add the "[CAND]" token

Embedding(151666, 3584)

## LoRA Setup

In [23]:
from peft import LoraConfig, get_peft_model, TaskType

lora_cfg = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Qwen attention
)
model = get_peft_model(base, lora_cfg)
model.print_trainable_parameters()

trainable params: 10,096,128 || all params: 7,079,292,416 || trainable%: 0.1426


## Training Setup

In [29]:
args = TrainingArguments(
    output_dir      = "./qwen_cls_lora",
    per_device_train_batch_size = 4,
    per_device_eval_batch_size  = 4,
    gradient_accumulation_steps = 8,   # effective 32
    num_train_epochs = 5,
    learning_rate   = 2e-5,
    bf16            = torch.cuda.is_available(),
    logging_steps   = 50,
    eval_strategy = "steps",
    eval_steps = 500,
    save_strategy   = "steps",
    load_best_model_at_end = True,
    metric_for_best_model = "mae",
    label_names=["labels"],
)

In [30]:
import numpy as np
mae = evaluate.load("mae")

def compute_metrics(pred):
    preds = pred.predictions.squeeze()          # (batch,1) → (batch,)
    labels = pred.label_ids.astype(np.float32)
    return {"mae": mae.compute(predictions=preds, references=labels)["mae"]}

In [31]:
## Data collator for padding
collator = DataCollatorWithPadding(tok)

In [32]:
trainer = Trainer(
    model=model,
    args=args,
    data_collator=collator,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    compute_metrics=compute_metrics,
)

## Training

In [33]:
trainer.train()

Step,Training Loss,Validation Loss,Mae
500,0.9159,1.203488,0.860609
1000,0.927,1.023252,0.820836
1500,0.9123,0.991659,0.804602
2000,0.926,1.117881,0.834
2500,0.869,0.99519,0.796531
3000,0.914,0.971161,0.792352
3500,0.9119,0.959976,0.785668
4000,0.8347,0.936996,0.777434




Step,Training Loss,Validation Loss,Mae
500,0.9159,1.203488,0.860609
1000,0.927,1.023252,0.820836
1500,0.9123,0.991659,0.804602
2000,0.926,1.117881,0.834
2500,0.869,0.99519,0.796531
3000,0.914,0.971161,0.792352
3500,0.9119,0.959976,0.785668
4000,0.8347,0.936996,0.777434
4500,0.8406,0.960696,0.797687
5000,0.8844,0.975237,0.793246




TrainOutput(global_step=13175, training_loss=0.8401337495112555, metrics={'train_runtime': 33217.6805, 'train_samples_per_second': 12.689, 'train_steps_per_second': 0.397, 'total_flos': 3.8242042880790067e+18, 'train_loss': 0.8401337495112555, 'epoch': 5.0})

In [34]:
trainer.evaluate(test_tok)

{'eval_loss': 1.1588256359100342,
 'eval_mae': 0.84656640625,
 'eval_runtime': 66.6332,
 'eval_samples_per_second': 30.015,
 'eval_steps_per_second': 7.504,
 'epoch': 5.0}

In [35]:
test_dataset[0]

{'userId': 238,
 'past_movies': ['Philadelphia (1993):::Drama:::4',
  'Crimes of the Heart (1986):::Comedy|Drama:::3',
  'What Ever Happened to Baby Jane? (1962):::Drama|Thriller:::3',
  'Wag the Dog (1997):::Comedy|Drama:::5',
  'Great Santini, The (1979):::Drama:::5',
  'Stand and Deliver (1987):::Drama:::4',
  'Way We Were, The (1973):::Drama:::2',
  'Angels and Insects (1995):::Drama|Romance:::4',
  'American Buffalo (1996):::Drama:::3',
  'Oscar and Lucinda (a.k.a. Oscar & Lucinda) (1997):::Drama|Romance:::3'],
 'past_movie_ids': '[508, 2738, 3546, 1747, 3135, 3071, 3194, 85, 806, 2801]',
 'candidate': "Clara's Heart (1988):::Drama",
 'movieId': 3714,
 'rating': 3.0}

In [36]:
def predict_rating(history, candidate):
    text = " | ".join(history) + " [CAND] " + candidate
    ids  = tok(text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        pred = model(**ids).logits.squeeze().item()   # scalar
    return max(1.0, min(5.0, pred))   # clamp into range if desired

In [37]:
predict_rating(test_dataset[0]['past_movies'], test_dataset[0]['candidate'])

3.421875

In [38]:
preds = []
actuals = []
for i in range(len(test_dataset)):
    preds.append(predict_rating(test_dataset[i]['past_movies'], test_dataset[i]['candidate']))
    actuals.append(test_dataset[i]['rating'])

In [39]:
len(preds), len(actuals)

(2000, 2000)

In [40]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

rmse = np.sqrt(mean_squared_error(actuals, preds))
mae = mean_absolute_error(actuals, preds)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")

RMSE: 1.0765764184060362
MAE: 0.846484375


In [41]:
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    accuracy_score,
    confusion_matrix
)
actuals_binary = [1 if x >= 4 else 0 for x in actuals]
preds_binary = [1 if x >= 4 else 0 for x in preds]

In [42]:
acc = accuracy_score(actuals_binary, preds_binary)
precision = precision_score(actuals_binary, preds_binary)
recall = recall_score(actuals_binary, preds_binary)
f1 = f1_score(actuals_binary, preds_binary)
auc = roc_auc_score(actuals_binary, preds_binary)

In [43]:
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1:        {f1:.4f}")
print(f"AUC:       {auc:.4f}")
print(f"Accuracy:  {acc:.4f}")

Precision: 0.7195
Recall:    0.6080
F1:        0.6591
AUC:       0.6740
Accuracy:  0.6710
