In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")

In [None]:
df = pd.read_csv('news_cleaned.csv', sep = ";")
df = df.drop(columns=['Unnamed: 0'])

In [None]:
msft_df = df[df['company'] == "Microsoft_Corporation"]

In [None]:
def split_into_sentences(s):
    # Remove the leading and trailing square brackets and single quotes
    s = s[2:-2]

    # Split the string into a list of sentences
    sentences = s.split("', '")

    return sentences

# Apply the function to the 'sentences' column
msft_df['sentences'] = msft_df['sentences'].apply(split_into_sentences)

In [None]:
import torch

In [None]:
DRob_mapping = {0: 1, 1: 2, 2: 0} #Equal mapping of FinBert

In [None]:
def classify_sentences(sentences):
    # Initialize the lists of headlines and scores
    predictions = []
    positives = []
    negatives = []
    neutrals = []

    # Process each sentence
    for sentence in sentences:
        # Tokenize the sentence
        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)

        # Get the model's prediction
        outputs = model(**inputs)

        # Get the predicted scores
        scores = torch.nn.functional.softmax(outputs.logits, dim=-1)
        prediction = outputs.logits.argmax(dim=-1).item()

        prediction = DRob_mapping.get(prediction)

        # Add the sentence and scores to the lists
        predictions.append(prediction)
        positives.append(scores[:, 2].item())
        negatives.append(scores[:, 0].item())
        neutrals.append(scores[:, 1].item())

    return predictions, positives, negatives, neutrals

# Apply the function to the 'sentences' column and concatenate the results
msft_df[['Predictions', 'Positive', 'Negative', 'Neutral']] = msft_df['sentences'].apply(lambda x: pd.Series(classify_sentences(x)))

In [None]:
msft_df.to_csv('msft_roberta_output.csv', index=True)