In [None]:
import numpy as np
import pandas as pd
from textblob import TextBlob

In [None]:
#assuming df has column ticker and news

In [None]:
def compute_polarity_subjectivity(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity

In [None]:
df[['polarity', 'subjectivity']]=df['news'].apply(lambda x: pd.Series(compute_polarity_subjectivity(x)))

In [None]:
def assign_sentiment_label(polarity):
    if polarity > 0:
        return 1
    elif polarity < 0:
        return -1
    else:
        return 0

In [None]:
df['sentiment_label'] = df['polarity'].apply(assign_sentiment_label)

In [None]:
df.head(7)

In [None]:
#tokenizing and preparing data for BERT
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_texts = train_df['news'].tolist()
train_labels = train_df['sentiment_label'].tolist()

test_texts = test_df['news'].tolist()
test_labels = test_df['sentiment_label'].tolist()

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

In [None]:
#PyTorch dataset to pass data to BERT
import torch
from torch.utils.data import Dataset

class NewsDataset(Dataset):
  def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
  def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
  def __len__(self):
        return len(self.labels)

train_dataset = NewsDataset(train_encodings, train_labels)
test_dataset = NewsDataset(test_encodings, test_labels)

In [None]:
#fine-tuning
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    evaluation_strategy="epoch",     # evaluate during training at each epoch
    per_device_train_batch_size=16,  # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    num_train_epochs=3,
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,                # logs recorded every 10 steps
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

trainer.train()
trainer.evaluate()