In [None]:
import pandas as pd
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [None]:
!pip install transformers

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [None]:
df = pd.read_csv('news_cleaned.csv', sep = ";")

In [None]:
df = df.drop(columns=['Unnamed: 0'])

In [None]:
def split_into_sentences(s):
    # Remove the leading and trailing square brackets and single quotes
    s = s[2:-2]

    # Split the string into a list of sentences
    sentences = s.split("', '")

    return sentences

# Apply the function to the 'sentences' column
df['sentences'] = df['sentences'].apply(split_into_sentences)

In [None]:
AAPL_df = df[df['ticker'] == 'AAPL']
samples_100_AAPL = AAPL_df.sample(100)

In [None]:
import torch

In [None]:
def classify_sentences(sentences):
    # Initialize the lists of headlines and scores
    predictions = []
    positives = []
    negatives = []
    neutrals = []

    # Process each sentence
    for sentence in sentences:
        # Tokenize the sentence
        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)

        # Get the model's prediction
        outputs = model(**inputs)

        # Get the predicted scores
        scores = torch.nn.functional.softmax(outputs.logits, dim=-1)
        prediction = outputs.logits.argmax(dim=-1).item()

        # Add the sentence and scores to the lists
        predictions.append(prediction)
        positives.append(scores[:, 0].item())
        negatives.append(scores[:, 1].item())
        neutrals.append(scores[:, 2].item())

    return predictions, positives, negatives, neutrals

# Apply the function to the 'sentences' column and concatenate the results
samples_100_AAPL[['Predictions', 'Positive', 'Negative', 'Neutral']] = samples_100_AAPL['sentences'].apply(lambda x: pd.Series(classify_sentences(x)))

In [None]:
samples_100_AAPL

In [None]:
samples_100_AAPL.to_csv('output.csv', index=True)