### Install dependencies


In [1]:
%pip install ftfy huggingface_hub scikit-learn transformers datasets optuna accelerate==0.27.2 --quiet

Note: you may need to restart the kernel to use updated packages.


### Import Libraries

In [1]:
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup
import random
import numpy as np
import torch
import ftfy

### Load data

In [3]:
url = "https://raw.githubusercontent.com/VridhiJ/CIS519/refs/heads/main/Dataset/news_urls.csv"

# Load the dataset
df = pd.read_csv(url)

# Display the first few rows to verify the data
df.head()

Unnamed: 0,url
0,https://www.foxnews.com/lifestyle/jack-carrs-e...
1,https://www.foxnews.com/entertainment/bruce-wi...
2,https://www.foxnews.com/politics/blinken-meets...
3,https://www.foxnews.com/entertainment/emily-bl...
4,https://www.foxnews.com/media/the-view-co-host...


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3805 entries, 0 to 3804
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   url     3805 non-null   object
dtypes: object(1)
memory usage: 29.9+ KB


### Headline Collection Method

Collect the news headlines by scraping multiple news websites using BeautifulSoup libraries. The scraping process involved:

1. Fetching Webpages:

  - Sending HTTP requests to news article URLs.

  - Using appropriate headers to mimic a real browser and avoid blocking.
    - User-Agent: Identifies the client making request. Helps avoid bot detection by mimicking real browser behavior.
    - Accept-Charset:  Specifies the character encodings that the client can process. Helps ensure proper text rendering.
    - Accept: Defines the type of content the client expects from the server.
    - Accept-Language: Specifies the preferred language for the response content. Helps receive content in a readable format when a website supports multiple languages.
    - referer: Indicates the URL of the page that made the request.
    

2. Extracting Headlines:

  - Parsing the webpage content with BeautifulSoup.

  - Identifying and extracting headlines using H1 tags and class attributes related to headlines.

  - Handling variations in website structures dynamically.

3. Error Handling & Optimization:

  - Implementing error handling to skip unavailable pages.

4. Storing Data:

  - Storing extracted headlines in a structured pandas DataFrame.

 - Saving the data in CSV format for further processing.

This method ensures efficient and scalable data collection while minimizing disruptions caused by website restrictions.

### Data Scraping (don't rerun)

In [6]:
# Helper function to get headline from a single URL
def get_article_headline(url):
  try:
    user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    ]

    session = requests.Session()

    headers = {
    'user-agent': random.choice(user_agents),
    "Accept-Charset": "utf-8",
    "Accept": "*/*",
    "Accept-Language": "en-US,en;q=0.9",
    "referer": "https://www.google.com/",
    }
    time.sleep(2)

    response = requests.get(url, headers = headers)

    if response.status_code != 200:
      print(f"Warning: Failed to load page {url} (Status Code: {response.status_code})")
      return None  # Don't stop execution, just return None

    soup = BeautifulSoup(response.text, 'html.parser')

    # To find headline of various types of classes
    headline = soup.find("h1", class_=lambda c: c and "headline" in c)

    if headline:
      headline = ftfy.fix_text(headline.get_text())  # Fix any encoding issues
      return headline.strip()  # Return the cleaned headline
    else:
      return None  # Return None if no headline is found
  except Exception as e:
    print(f"Error processing {url}: {e}")
    return None  # Return None in case of an error

In [7]:
# Create an empty list to store the headlines
headlines = []

# Loop through the URLs in your dataframe
for url in df['url']:
    headline = get_article_headline(url)
    headlines.append(headline)

# Add the scraped headlines to your dataframe
df['headline'] = headlines

# Show the first few rows with the scraped headlines
df.head()



In [None]:
df.to_csv("scraped_headlines.csv", index=False)

In [None]:
from huggingface_hub import login
login()

In [None]:
from huggingface_hub import create_repo

# Create a repository on Hugging Face Hub
repo_name = 'scraped-headlines'
create_repo(repo_name, private=True)

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj='scraped_headlines.csv',
    path_in_repo='scraped_headlines_v4.csv',
    repo_id= 'VridhiJain/scraped-headlines'
)

### Cleaning Data

In [2]:
from huggingface_hub import notebook_login

notebook_login()  # enter your Hugging Face token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
import pandas as pd
from huggingface_hub import hf_hub_download

repo_id = "VridhiJain/scraped-headlines"  # repo name
filename = "scraped_headlines_v4.csv"  # file name

# Download the file
file_path = hf_hub_download(repo_id=repo_id, filename=filename)

# Load into a DataFrame
df = pd.read_csv(file_path)

df.head()

scraped_headlines_v4.csv:   0%|          | 0.00/689k [00:00<?, ?B/s]

Unnamed: 0,url,headline
0,https://www.foxnews.com/lifestyle/jack-carrs-e...,Jack Carr recalls Gen. Eisenhower's D-Day memo...
1,https://www.foxnews.com/entertainment/bruce-wi...,"Bruce Willis, Demi Moore avoided doing one thi..."
2,https://www.foxnews.com/politics/blinken-meets...,
3,https://www.foxnews.com/entertainment/emily-bl...,Emily Blunt says her 'toes curl' when people t...
4,https://www.foxnews.com/media/the-view-co-host...,"'The View' co-host, CNN commentator Ana Navarr..."


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3805 entries, 0 to 3804
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   url       3805 non-null   object
 1   headline  3352 non-null   object
dtypes: object(2)
memory usage: 59.6+ KB


In [5]:
# Check for missing values in the dataset
print(df.isnull().sum())

# Drop any rows where the headline is missing/duplicates
df = df.dropna(subset=['headline']).drop_duplicates(subset=['headline'])

# Reset index after dropping rows
df = df.reset_index(drop=True)

url           0
headline    453
dtype: int64


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3336 entries, 0 to 3335
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   url       3336 non-null   object
 1   headline  3336 non-null   object
dtypes: object(2)
memory usage: 52.3+ KB


In [7]:
df['url'].str.contains('foxnews').value_counts()

url
False    1779
True     1557
Name: count, dtype: int64

Fox News Headlines: 1779

NBC News Headlines: 1557

### Baseline Model(TF-IDF + Log Regression)

In [2]:
# For reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the preprocessed headline data from Hugging Face
from huggingface_hub import hf_hub_download
csv_path = hf_hub_download(repo_id="VridhiJain/scraped-headlines", filename="scraped_headlines_v4.csv")
df = pd.read_csv(csv_path)

# Drop rows with missing headlines
df = df.dropna(subset=['headline']).drop_duplicates(subset=['headline'])

# Label: 1 for FoxNews, 0 for NBC
df['label'] = df['url'].apply(lambda x: 1 if "foxnews" in x else 0)

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(df['headline'], df['label'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=100)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Logistic Regression
baseline_model = LogisticRegression(max_iter=100)
baseline_model.fit(X_train_tfidf, y_train)

# Evaluate
y_pred = baseline_model.predict(X_test_tfidf)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

scraped_headlines_v4.csv:   0%|          | 0.00/689k [00:00<?, ?B/s]

Accuracy: 0.7036
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.65      0.69       335
           1       0.68      0.76      0.72       333

    accuracy                           0.70       668
   macro avg       0.71      0.70      0.70       668
weighted avg       0.71      0.70      0.70       668



### Bert-based Classifier

In [4]:
import os
import torch
from sklearn.metrics import precision_recall_fscore_support
from datasets import Dataset
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments)

In [5]:
df['label'] = df['url'].apply(lambda x: 1 if "foxnews" in x.lower() else 0)
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_dataset = Dataset.from_pandas(train_df[['headline', 'label']])
test_dataset = Dataset.from_pandas(test_df[['headline', 'label']])

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["headline"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# Load model
bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bert_results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="none"
)

# Define evaluation metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = torch.argmax(torch.tensor(pred.predictions), axis=1).numpy()
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return{"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

trainer = Trainer(
    model=bert_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()
results = trainer.evaluate()
print("BERT Evaluation Results:")
print(results)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/2668 [00:00<?, ? examples/s]

Map:   0%|          | 0/668 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.426632,0.815868,0.79397,0.831579,0.759615
2,No log,0.419494,0.823353,0.819018,0.785294,0.855769
3,0.359400,0.455817,0.841317,0.816609,0.887218,0.75641


BERT Evaluation Results:
{'eval_loss': 0.4558168649673462, 'eval_accuracy': 0.8413173652694611, 'eval_f1': 0.8166089965397924, 'eval_precision': 0.8872180451127819, 'eval_recall': 0.7564102564102564, 'eval_runtime': 5.7073, 'eval_samples_per_second': 117.043, 'eval_steps_per_second': 7.359, 'epoch': 3.0}


In [14]:
from huggingface_hub import HfApi

api = HfApi()

# Repo name
repo_name = "bert_vanilla_2"

# This creates a repo under your namespace (username)
api.create_repo(repo_id=repo_name, private=False, exist_ok=True)


RepoUrl('https://huggingface.co/VridhiJain/bert_vanilla_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/bert_vanilla_2')

In [None]:
from huggingface_hub import notebook_login, login

notebook_login()  # enter your Hugging Face token

In [None]:
# Push model and tokenizer
trainer.model.push_to_hub("bert_vanilla_2")
tokenizer.push_to_hub("bert_vanilla_2")

### RoBERTa-based Classifier

In [6]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

tokenizer_roberta = RobertaTokenizer.from_pretrained("roberta-base")
train_dataset = Dataset.from_pandas(train_df[["headline", "label"]])
test_dataset = Dataset.from_pandas(test_df[["headline", "label"]])

def tokenize_roberta(batch):
    return tokenizer_roberta(batch["headline"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize_roberta, batched=True)
test_dataset = test_dataset.map(tokenize_roberta, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

roberta_model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

training_args_roberta = TrainingArguments(
    output_dir="./roberta_results",
    eval_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="none"
)

trainer_roberta = Trainer(
    model=roberta_model,
    args=training_args_roberta,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer_roberta.train()
roberta_results = trainer_roberta.evaluate()
print("RoBERTa Evaluation Results:", roberta_results)

trainer_roberta.save_model("./roberta_model")

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/2668 [00:00<?, ? examples/s]

Map:   0%|          | 0/668 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5647,0.428925,0.806886,0.774869,0.850575,0.711538
2,0.3517,0.384277,0.850299,0.84472,0.819277,0.871795
3,0.2221,0.380195,0.860778,0.84878,0.861386,0.836538


RoBERTa Evaluation Results: {'eval_loss': 0.3801954686641693, 'eval_accuracy': 0.8607784431137725, 'eval_f1': 0.848780487804878, 'eval_precision': 0.8613861386138614, 'eval_recall': 0.8365384615384616, 'eval_runtime': 5.7908, 'eval_samples_per_second': 115.356, 'eval_steps_per_second': 7.253, 'epoch': 3.0}


In [18]:
# Repo name
repo_name = "roberta_vanilla_2"

# This creates a repo under your namespace (username)
api.create_repo(repo_id=repo_name, private=False, exist_ok=True)

RepoUrl('https://huggingface.co/VridhiJain/roberta_vanilla_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/roberta_vanilla_2')

In [None]:
# Push model and tokenizer
trainer.model.push_to_hub("roberta_vanilla_2")
tokenizer.push_to_hub("roberta_vanilla_2")

### Bert-based Classifier - Hyperparameter Tuning

In [7]:
from sklearn.model_selection import train_test_split

df['label'] = df['url'].apply(lambda x: 1 if "foxnews" in x.lower() else 0)
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_dataset = Dataset.from_pandas(train_df[['headline', 'label']])
test_dataset = Dataset.from_pandas(test_df[['headline', 'label']])

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["headline"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/2668 [00:00<?, ? examples/s]

Map:   0%|          | 0/668 [00:00<?, ? examples/s]

In [8]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
  labels = pred.label_ids
  preds = torch.argmax(torch.tensor(pred.predictions), axis=1).numpy()
  precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
  acc = accuracy_score(labels, preds)
  return{"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

### Grid Search

In [13]:
from transformers import (AutoModelForSequenceClassification, Trainer, TrainingArguments)
import numpy as np

# define search space
learning_rates = [2e-5, 3e-5, 5e-5]
epochs = [3, 4, 5]
weight_decays = [0.01, 0.001]

best_f1 = 0
best_config = {}

for lr in learning_rates:
  for num_epochs in epochs:
    for wd in weight_decays:
      print(f"\nTraining with lr={lr}, epochs={num_epochs}, weight_decay={wd}")

      training_args = TrainingArguments(
        output_dir=f"./bert_tuned_lr{lr}_ep{num_epochs}_wd{wd}",
        eval_strategy="epoch",
        learning_rate=lr,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=num_epochs,
        weight_decay=wd,
        report_to="none"
      )

      model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

      trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
      )

      trainer.train()
      eval_results = trainer.evaluate()
      print("F1 score:", eval_results['eval_f1'])

      if eval_results['eval_f1'] > best_f1:
        best_f1 = eval_results['eval_f1']
        best_config = {
            "learning_rate": lr,
            "num_epochs": num_epochs,
            "weight_decay": wd,
            "eval_results": eval_results
        }

print("\nBest configuration:")
print(best_config)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training with lr=2e-05, epochs=3, weight_decay=0.01


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.426632,0.815868,0.79397,0.831579,0.759615
2,No log,0.419494,0.823353,0.819018,0.785294,0.855769
3,0.359400,0.455817,0.841317,0.816609,0.887218,0.75641


F1 score: 0.8166089965397924

Training with lr=2e-05, epochs=3, weight_decay=0.001


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.442123,0.808383,0.77931,0.843284,0.724359
2,No log,0.434572,0.808383,0.802469,0.77381,0.833333
3,0.380300,0.449917,0.833832,0.807626,0.879245,0.746795


F1 score: 0.8076256499133448

Training with lr=2e-05, epochs=4, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.469424,0.773952,0.718808,0.857778,0.61859
2,No log,0.429148,0.829341,0.815534,0.823529,0.807692
3,0.382300,0.463697,0.832335,0.808219,0.867647,0.75641
4,0.382300,0.536257,0.833832,0.810903,0.865455,0.762821


F1 score: 0.8109028960817717

Training with lr=2e-05, epochs=4, weight_decay=0.001


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.427433,0.808383,0.775439,0.856589,0.708333
2,No log,0.414941,0.829341,0.82622,0.787791,0.86859
3,0.343500,0.452554,0.83982,0.82314,0.849829,0.798077
4,0.343500,0.571782,0.83982,0.819562,0.864769,0.778846


F1 score: 0.8195615514333895

Training with lr=2e-05, epochs=5, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.429466,0.811377,0.770909,0.890756,0.679487
2,No log,0.41337,0.832335,0.819936,0.822581,0.817308
3,0.336200,0.489701,0.838323,0.824675,0.835526,0.814103
4,0.336200,0.660159,0.823353,0.78777,0.897541,0.701923
5,0.336200,0.686295,0.842814,0.824708,0.860627,0.791667


F1 score: 0.8247078464106845

Training with lr=2e-05, epochs=5, weight_decay=0.001


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.453365,0.79491,0.751361,0.866109,0.663462
2,No log,0.401113,0.826347,0.80602,0.842657,0.772436
3,0.347700,0.464564,0.832335,0.809524,0.862319,0.762821
4,0.347700,0.677387,0.845808,0.815742,0.923077,0.730769
5,0.347700,0.713668,0.835329,0.809028,0.882576,0.746795


F1 score: 0.8090277777777778

Training with lr=3e-05, epochs=3, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.446268,0.800898,0.773424,0.825455,0.727564
2,No log,0.399312,0.835329,0.818482,0.843537,0.794872


F1 score: 0.7902097902097902

Training with lr=3e-05, epochs=4, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.446978,0.800898,0.765432,0.85098,0.695513
2,No log,0.466851,0.812874,0.776386,0.878543,0.695513
3,0.327600,0.579543,0.827844,0.800693,0.871698,0.740385
4,0.327600,0.702011,0.835329,0.810997,0.874074,0.75641


F1 score: 0.8109965635738832

Training with lr=3e-05, epochs=4, weight_decay=0.001


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.434977,0.802395,0.764286,0.862903,0.685897
2,No log,0.424984,0.830838,0.810084,0.85159,0.772436
3,0.310400,0.551798,0.853293,0.833898,0.884892,0.788462
4,0.310400,0.657122,0.857784,0.841402,0.878049,0.807692


F1 score: 0.8414023372287145

Training with lr=3e-05, epochs=5, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.426798,0.797904,0.764398,0.83908,0.701923
2,No log,0.416095,0.848802,0.836834,0.843648,0.830128
3,0.315100,0.563682,0.845808,0.82149,0.89434,0.759615
4,0.315100,0.773806,0.850299,0.822695,0.920635,0.74359
5,0.315100,0.745173,0.856287,0.837288,0.888489,0.791667


F1 score: 0.8372881355932204

Training with lr=3e-05, epochs=5, weight_decay=0.001


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.47103,0.782934,0.741533,0.835341,0.666667
2,No log,0.400084,0.848802,0.834154,0.855219,0.814103
3,0.318400,0.694243,0.821856,0.780847,0.917749,0.679487
4,0.318400,0.896279,0.821856,0.780037,0.921397,0.676282
5,0.318400,0.863166,0.827844,0.801382,0.868914,0.74359


F1 score: 0.8013816925734024

Training with lr=5e-05, epochs=3, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.398506,0.802395,0.766784,0.854331,0.695513
2,No log,0.383496,0.863772,0.853462,0.857605,0.849359
3,0.286300,0.526548,0.85479,0.834188,0.893773,0.782051


: 

: 

: 

### Bert-based Classifier retrained w/ best hyperparameters (from grid search)

Best configuration (best F1):
- Learning rate: 5e-05 
- Num epochs: 5 
- Weight decay: 0.01

In [9]:
# Load model
bert_gridsearch_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bert_results_best_gridsearch",
    eval_strategy="epoch",
    learning_rate=5e-05,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    report_to="none"
)

trainer = Trainer(
    model=bert_gridsearch_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()
results = trainer.evaluate()
print("BERT Evaluation Results:")
print(results)

trainer.save_model("./bert_gridsearch_model")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.396439,0.818862,0.781193,0.896266,0.692308
2,No log,0.41045,0.83982,0.821963,0.854671,0.791667
3,0.276900,0.66983,0.83982,0.810619,0.905138,0.733974
4,0.276900,0.930456,0.841317,0.806569,0.936441,0.708333
5,0.276900,0.840534,0.85479,0.830716,0.911877,0.762821


BERT Evaluation Results:
{'eval_loss': 0.8405343294143677, 'eval_accuracy': 0.8547904191616766, 'eval_f1': 0.8307155322862129, 'eval_precision': 0.9118773946360154, 'eval_recall': 0.7628205128205128, 'eval_runtime': 5.6238, 'eval_samples_per_second': 118.781, 'eval_steps_per_second': 7.468, 'epoch': 5.0}


In [13]:
from huggingface_hub import HfApi

api = HfApi()
# Repo name
repo_name = "bert_gridsearch_2"

# This creates a repo under your namespace (username)
api.create_repo(repo_id=repo_name, private=False, exist_ok=True)

RepoUrl('https://huggingface.co/VridhiJain/bert_gridsearch_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/bert_gridsearch_2')

In [15]:
# Push model and tokenizer
trainer.model.push_to_hub("bert_gridsearch_2")
tokenizer.push_to_hub("bert_gridsearch_2")

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/VridhiJain/bert_gridsearch_2/commit/a031f5e97d5dc481bc56190f2def1b6284353af2', commit_message='Upload tokenizer', commit_description='', oid='a031f5e97d5dc481bc56190f2def1b6284353af2', pr_url=None, repo_url=RepoUrl('https://huggingface.co/VridhiJain/bert_gridsearch_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/bert_gridsearch_2'), pr_revision=None, pr_num=None)

### Bayesian Optimization (using optuna)

In [16]:
import optuna
def model_init():
  return AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

def objective(trial):
  # hyperparameter search space
  learning_rate = trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True)
  weight_decay = trial.suggest_float("weight_decay", 0.0, 0.3)
  batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
  num_train_epochs = trial.suggest_int("num_train_epochs", 2, 5)

  args = TrainingArguments(
    output_dir=f"./bert_bayesian_tuned_lr{learning_rate}_ep{num_train_epochs}_wd{weight_decay}",
    eval_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    report_to="none"
  )

  trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
  )

  trainer.train()
  eval_result = trainer.evaluate()
  return eval_result["eval_f1"]


In [17]:
# run optimization loop
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2025-05-07 02:02:19,474] A new study created in memory with name: no-name-cd901520-a5a5-461a-b984-9bcf15373fc6
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.46676,0.784431,0.791304,0.722222,0.875
2,No log,0.405856,0.821856,0.812006,0.800623,0.823718
3,No log,0.413123,0.829341,0.823529,0.796407,0.852564
4,No log,0.426895,0.83982,0.821963,0.854671,0.791667


[I 2025-05-07 02:06:53,172] Trial 0 finished with value: 0.8219633943427621 and parameters: {'learning_rate': 1.8508075746385966e-05, 'weight_decay': 0.09499825421417672, 'batch_size': 32, 'num_train_epochs': 4}. Best is trial 0 with value: 0.8219633943427621.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.601,0.699102,0.67423,0.681967,0.666667
2,0.637400,0.555956,0.718563,0.676976,0.72963,0.63141


[I 2025-05-07 02:09:56,270] Trial 1 finished with value: 0.6769759450171822 and parameters: {'learning_rate': 4.144451980447425e-06, 'weight_decay': 0.20786572117414442, 'batch_size': 8, 'num_train_epochs': 2}. Best is trial 0 with value: 0.8219633943427621.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.555719,0.738024,0.702886,0.747292,0.663462
2,0.610900,0.476841,0.797904,0.77686,0.802048,0.753205
3,0.465700,0.471277,0.784431,0.754266,0.806569,0.708333


[I 2025-05-07 02:14:26,713] Trial 2 finished with value: 0.7542662116040956 and parameters: {'learning_rate': 4.951619131331183e-06, 'weight_decay': 0.11474886906471249, 'batch_size': 8, 'num_train_epochs': 3}. Best is trial 0 with value: 0.8219633943427621.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.448176,0.791916,0.736243,0.902326,0.621795
2,No log,0.406413,0.832335,0.820513,0.820513,0.820513
3,0.321300,0.527646,0.848802,0.826758,0.889299,0.772436
4,0.321300,0.662746,0.847305,0.822917,0.897727,0.759615


[I 2025-05-07 02:19:28,859] Trial 3 finished with value: 0.8229166666666666 and parameters: {'learning_rate': 2.322620076809675e-05, 'weight_decay': 0.039382382431990014, 'batch_size': 16, 'num_train_epochs': 4}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.681662,0.613772,0.556701,0.6,0.519231
2,No log,0.664767,0.672156,0.612389,0.683794,0.554487
3,No log,0.657458,0.684132,0.646566,0.677193,0.61859


[I 2025-05-07 02:22:56,157] Trial 4 finished with value: 0.6465661641541038 and parameters: {'learning_rate': 1.72483038806336e-06, 'weight_decay': 0.28532904003413123, 'batch_size': 32, 'num_train_epochs': 3}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.428017,0.80988,0.805513,0.771261,0.842949
2,No log,0.378352,0.847305,0.838608,0.828125,0.849359
3,No log,0.411184,0.838323,0.817568,0.864286,0.775641
4,No log,0.516122,0.833832,0.804233,0.894118,0.730769
5,No log,0.540089,0.835329,0.815436,0.855634,0.778846


[I 2025-05-07 02:28:36,360] Trial 5 finished with value: 0.8154362416107382 and parameters: {'learning_rate': 2.581111550748752e-05, 'weight_decay': 0.18679526297932084, 'batch_size': 32, 'num_train_epochs': 5}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.527109,0.77994,0.773498,0.744807,0.804487


[I 2025-05-07 02:36:55,880] Trial 7 finished with value: 0.8096885813148789 and parameters: {'learning_rate': 1.335342919991641e-05, 'weight_decay': 0.08200623379037976, 'batch_size': 8, 'num_train_epochs': 3}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.425388,0.811377,0.808511,0.768786,0.852564
2,No log,0.368836,0.838323,0.828025,0.822785,0.833333
3,No log,0.413692,0.841317,0.818493,0.878676,0.766026
4,No log,0.49144,0.841317,0.819728,0.873188,0.772436
5,No log,0.571567,0.830838,0.809444,0.854093,0.769231


[I 2025-05-07 02:42:36,084] Trial 8 finished with value: 0.8094435075885329 and parameters: {'learning_rate': 2.5165929812911236e-05, 'weight_decay': 0.04088012760133073, 'batch_size': 32, 'num_train_epochs': 5}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.611633,0.73503,0.710311,0.725753,0.695513
2,No log,0.533084,0.763473,0.729452,0.783088,0.682692
3,No log,0.498243,0.782934,0.768,0.766773,0.769231
4,No log,0.479208,0.782934,0.76112,0.783051,0.740385
5,No log,0.475053,0.784431,0.764706,0.78,0.75


[I 2025-05-07 02:48:16,746] Trial 9 finished with value: 0.7647058823529411 and parameters: {'learning_rate': 4.658196705105442e-06, 'weight_decay': 0.055988288110807194, 'batch_size': 32, 'num_train_epochs': 5}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.419866,0.799401,0.753676,0.883621,0.657051
2,No log,0.456029,0.823353,0.795139,0.867424,0.733974
3,0.278200,0.833826,0.815868,0.769231,0.927602,0.657051


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[I 2025-05-07 03:07:55,577] Trial 13 finished with value: 0.8178694158075601 and parameters: {'learning_rate': 4.540353487980111e-05, 'weight_decay': 0.10030459209425613, 'batch_size': 32, 'num_train_epochs': 4}. Best is trial 3 with value: 0.8229166666666666.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.408268,0.823353,0.805921,0.827703,0.785256
2,No log,0.406758,0.83982,0.819562,0.864769,0.778846


[I 2025-05-07 03:10:29,675] Trial 14 finished with value: 0.8195615514333895 and parameters: {'learning_rate': 2.1109800177428915e-05, 'weight_decay': 0.163513477011474, 'batch_size': 16, 'num_train_epochs': 2}. Best is trial 3 with value: 0.8229166666666666.


### Bert-based Classifier retrained w/ best hyperparameters (from Bayesian optimization)

Best configuration (best F1):
- Learning rate: 4.733978326237281e-05
- Weight decay: 0.13720158843680744
- Batch size: 8
- Num epochs: 4



In [19]:
# Load model
bert_bayesian_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bert_results_best_bayesian",
    eval_strategy="epoch",
    learning_rate=4.733978326237281e-05,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=4,
    weight_decay=0.13720158843680744,
    report_to="none"
)

trainer = Trainer(
    model=bert_bayesian_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()
results = trainer.evaluate()
print("BERT Evaluation Results:")
print(results)

trainer.save_model("./bert_bayesian_model")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.343576,0.851796,0.841091,0.842444,0.839744
2,0.443100,0.377412,0.872754,0.859967,0.884746,0.836538
3,0.173200,0.594932,0.872754,0.859038,0.890034,0.830128
4,0.173200,0.681367,0.86976,0.856198,0.883959,0.830128


BERT Evaluation Results:
{'eval_loss': 0.6813668608665466, 'eval_accuracy': 0.8697604790419161, 'eval_f1': 0.856198347107438, 'eval_precision': 0.8839590443686007, 'eval_recall': 0.8301282051282052, 'eval_runtime': 6.2328, 'eval_samples_per_second': 107.176, 'eval_steps_per_second': 13.477, 'epoch': 4.0}


In [20]:
# Repo name
repo_name = "bert_bayesian_2"

# This creates a repo under your namespace (username)
api.create_repo(repo_id=repo_name, private=False, exist_ok=True)

RepoUrl('https://huggingface.co/VridhiJain/bert_bayesian_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/bert_bayesian_2')

In [22]:
# Push to your Hugging Face repo
trainer.push_to_hub("bert_bayesian_2")
tokenizer.push_to_hub("bert_bayesian_2")

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/VridhiJain/bert_bayesian_2/commit/d04495efa7eb2c2a182716028d0799915c9a70ae', commit_message='Upload tokenizer', commit_description='', oid='d04495efa7eb2c2a182716028d0799915c9a70ae', pr_url=None, repo_url=RepoUrl('https://huggingface.co/VridhiJain/bert_bayesian_2', endpoint='https://huggingface.co', repo_type='model', repo_id='VridhiJain/bert_bayesian_2'), pr_revision=None, pr_num=None)

### RoBERTa-based Classifier - Hyperparameter Tuning (don't rerun)

In [7]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
import numpy as np

tokenizer_roberta = RobertaTokenizer.from_pretrained("roberta-base")
train_dataset = Dataset.from_pandas(train_df[["headline", "label"]])
test_dataset = Dataset.from_pandas(test_df[["headline", "label"]])

def tokenize_roberta(batch):
    return tokenizer_roberta(batch["headline"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize_roberta, batched=True)
test_dataset = test_dataset.map(tokenize_roberta, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/2668 [00:00<?, ? examples/s]

Map:   0%|          | 0/668 [00:00<?, ? examples/s]

### Grid Search

In [None]:
# define search space
learning_rates = [1e-5, 2e-5, 3e-5, 5e-5]
epochs = [3, 5]
weight_decays = [0.01, 0.001]

best_f1 = 0
best_config = {}

for lr in learning_rates:
  for num_epochs in epochs:
    for wd in weight_decays:
      print(f"\nTraining with lr={lr}, epochs={num_epochs}, weight_decay={wd}")

      training_args = TrainingArguments(
        output_dir=f"./roberta_tuned_lr{lr}_ep{num_epochs}_wd{wd}",
        eval_strategy="epoch",
        learning_rate=lr,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=num_epochs,
        weight_decay=wd,
        report_to="none",
        logging_strategy="epoch"
      )

      model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

      trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
      )

      trainer.train()
      eval_results = trainer.evaluate()
      print("F1 score:", eval_results['eval_f1'])

      if eval_results['eval_f1'] > best_f1:
        best_f1 = eval_results['eval_f1']
        best_config = {
            "learning_rate": lr,
            "num_epochs": num_epochs,
            "weight_decay": wd,
            "eval_results": eval_results
        }

print("\nBest configuration:")
print(best_config)

: 

: 

### RoBERTa-based Classifier retrained w/ best hyperparameters (from grid search)

Best configuration (best F1):
- Learning rate: 2e-05
- Weight decay: 0.001
- Num epochs: 5

In [None]:
roberta_gridsearch_model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

training_args_roberta = TrainingArguments(
    output_dir="./roberta_gridsearch_results",
    eval_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-05,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.001,
    report_to="none"
)

trainer_roberta = Trainer(
    model=roberta_gridsearch_model,
    args=training_args_roberta,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer_roberta.train()
roberta_results = trainer_roberta.evaluate()
print("RoBERTa Evaluation Results:", roberta_results)

trainer_roberta.save_model("./roberta_gridsearch_model")

### RoBERTa - Bayesian Optimization

In [None]:
def model_init():
  return RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

def objective(trial):
  # hyperparameter search space
  learning_rate = trial.suggest_float("learning_rate", 1e-6, 7e-5, log=True)
  weight_decay = trial.suggest_float("weight_decay", 0.0, 0.05)
  batch_size = trial.suggest_categorical("batch_size", [8, 16])
  num_train_epochs = trial.suggest_int("num_train_epochs", 3, 6)

  args = TrainingArguments(
    output_dir=f"./roberta_bayesian_tuned_lr{learning_rate}_ep{num_train_epochs}_wd{weight_decay}",
    eval_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    report_to="none"
  )

  trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
  )

  trainer.train()
  eval_result = trainer.evaluate()
  return eval_result["eval_f1"]

In [None]:
# run optimization loop
study_roberta = optuna.create_study(direction="maximize")
study_roberta.optimize(objective, n_trials=15)