In [None]:
# Step 0: Mount Google Drive (if needed)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Step 1: Install dependencies and import libraries
!pip install -q transformers
!pip install -q torchinfo
!pip install -q datasets
!pip install -q evaluate
!pip install -q optuna
!pip install -q wandb

import wandb
# Log in to wandb. Replace "your_api_key_here" with your actual WANDB API key.
wandb.login(key="your_api_key_here")

from transformers import (RobertaTokenizer, RobertaForSequenceClassification, Trainer,
                          TrainingArguments)
from datasets import load_dataset, load_from_disk
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/183.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/143.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpeng_zhao[0m ([33mpeng_zhao-university-of-california-berkeley[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Step 2:
# Define the same tokenization and formatting function used during training
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
def tokenize_and_format(examples):
    outputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
    outputs["labels"] = [1 if rating > 3 else 0 for rating in examples["rating"]]
    return outputs

# Define the compute_metrics function for evaluation
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [None]:
# Step 3a: Load the Grocery_and_Gourmet_Food dataset
food_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_Grocery_and_Gourmet_Food", trust_remote_code=True)

# Choose the fraction of the data to use
data_fraction = 0.2
total_samples = len(food_dataset["full"])
subset_size = int(total_samples * data_fraction)

# Shuffle and select the specified fraction of data
subset_dataset = food_dataset["full"].shuffle(seed=42).select(range(subset_size))

# Filter out samples where rating equals 3
subset_dataset = subset_dataset.filter(lambda x: x["rating"] != 3)


In [None]:
# Step 3b: Tokenize the new dataset using batched processing
tokenized_food = food_dataset.map(tokenize_and_format, batched=True)

# save check point
tokenized_food.save_to_disk("/content/drive/MyDrive/FP/Checkpoints/tokenized_food_checkpoint")

# Split the processed dataset into test set only
tokenized_food = tokenized_food.shuffle(seed=42)
tokenized_food.set_format("torch")


In [None]:
# Step 3': Load the saved tokenized dataset from disk with code below:
tokenized_food = load_from_disk("/content/drive/MyDrive/FP/Checkpoints/tokenized_food_checkpoint")

# Split the processed dataset into test set only
tokenized_food = tokenized_food.shuffle(seed=42)
tokenized_food.set_format("torch")

Loading dataset from disk:   0%|          | 0/46 [00:00<?, ?it/s]

In [None]:
# Step 4: Load the previously fine-tuned model checkpoint (Movies_and_TV)
model = RobertaForSequenceClassification.from_pretrained("/content/drive/MyDrive/FP/Checkpoints/final_checkpoint_movies", num_labels=2)


In [None]:
# Step 5: Initialize a Trainer for evaluation on the Grocery_and_Gourmet_Food dataset(No training)
eval_training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/FP/results_food_eval",
    per_device_eval_batch_size=2048,
    do_predict=True,
    fp16=True,
    report_to=[]  # no log record
)

eval_trainer = Trainer(
    model=model,
    args=eval_training_args,
    compute_metrics=compute_metrics
)


In [None]:
# Step 6: Evaluate the model on the Grocery_and_Gourmet_Food dataset

# Select partial dataset
test_fraction = 0.1

# If tokenized_food if DatasetDict，Choose "full", otherwise tokenized_food.
if isinstance(tokenized_food, dict):
    dataset_to_sample = tokenized_food["full"]
else:
    dataset_to_sample = tokenized_food

total_samples = len(dataset_to_sample)
test_subset = dataset_to_sample.shuffle(seed=42).select(range(int(test_fraction * total_samples)))

eval_result = eval_trainer.evaluate(eval_dataset=test_subset)
print("Evaluation results on Grocery_and_Gourmet_Food (subset):")
print(eval_result)

# Reuslt with predict way
predictions = eval_trainer.predict(test_subset)
print("Predictions summary:")
print(predictions.metrics)


Evaluation results on Grocery_and_Gourmet_Food (subset):
{'eval_loss': 0.21174874901771545, 'eval_model_preparation_time': 0.0031, 'eval_accuracy': 0.9249279953514749, 'eval_f1': 0.9220636140136625, 'eval_precision': 0.9249437246084682, 'eval_recall': 0.9249279953514749, 'eval_runtime': 2947.239, 'eval_samples_per_second': 485.828, 'eval_steps_per_second': 0.238}
Predictions summary:
{'test_loss': 0.21174874901771545, 'test_model_preparation_time': 0.0031, 'test_accuracy': 0.9249279953514749, 'test_f1': 0.9220636140136625, 'test_precision': 0.9249437246084682, 'test_recall': 0.9249279953514749, 'test_runtime': 2750.0874, 'test_samples_per_second': 520.657, 'test_steps_per_second': 0.255}
