In [None]:
!pip install datasets
!pip install transformers
!huggingface-cli login     

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

In [None]:

# Load the dataset
dataset = load_dataset('yummy456/viral_news_pairs')['train'].train_test_split(train_size=7000, test_size=3000)

In [None]:
# Load the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

In [36]:
def softmax(logits):
    return F.softmax(torch.tensor(logits), dim=-1).numpy()

In [37]:
# function to tokenize input
def tokenize_function(examples):
    return tokenizer(
        text=examples['title1'],
        text_pair=examples['title2'],
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors='pt'
    )

# Map the tokenization function to the dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [38]:

# Set the format of the dataset for PyTorch
tokenized_datasets.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Split the dataset into train and test
train_dataset = tokenized_datasets['train']
test_dataset = tokenized_datasets['test']


In [39]:
#function to caluclate accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(softmax(logits), axis=-1)
    accuracy = accuracy_score(labels, predictions)
    return {'accuracy': accuracy}

In [40]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    learning_rate=1e-5,
    weight_decay=0.01,
    logging_dir='./logs',
)




In [41]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

In [None]:
# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()

print(f"Evaluation results: {eval_results}")
