In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('final_cleaned_data.csv')

In [3]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

In [4]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)



In [5]:
example = 'I love hiking.'

# Run for Roberta Model
encoded_text = tokenizer(example, return_tensors='pt')
output = model(**encoded_text)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
scores_dict = {
    'roberta_neg' : scores[0],
    'roberta_neu' : scores[1],
    'roberta_pos' : scores[2]
}
print(scores_dict)

{'roberta_neg': 0.0032610784, 'roberta_neu': 0.016913326, 'roberta_pos': 0.97982556}


In [6]:
# Define function to get RoBERTa scores
def polarity_scores_roberta(text):
    if not isinstance(text, str) or text.strip() == "":  # Handle empty/missing values
        return {'roberta_neg': np.nan, 'roberta_neu': np.nan, 'roberta_pos': np.nan}

    encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
    output = model(**encoded_text)
    scores = output.logits[0].detach().numpy()
    scores = softmax(scores)  # Convert logits to probabilities

    return {
        'roberta_neg': scores[0],
        'roberta_neu': scores[1],
        'roberta_pos': scores[2]
    }

In [7]:
# Apply the function to each row in the "headline" column
df[['roberta_neg', 'roberta_neu', 'roberta_pos']] = df['headline'].apply(lambda x: pd.Series(polarity_scores_roberta(x)))

df

Unnamed: 0,ticker,time,source,headline,trading_status,company_news,roberta_neg,roberta_neu,roberta_pos
0,AAPL,2024-01-01 03:07:40,NS:ASSOPR,AP Top Technology News at 3:03 a.m. EST,Non-Trading,1,0.020380,0.924047,0.055573
1,AAPL,2024-01-01 04:28:07,NS:INDEPE,January sales 2023 UK ?€? live: Today's best n...,Non-Trading,0,0.001954,0.063187,0.934858
2,AAPL,2024-01-01 04:28:18,NS:HINDUT,Iphone's new iOS update causing connectivity i...,Non-Trading,0,0.342205,0.511794,0.146001
3,AAPL,2024-01-01 11:31:26,NS:INDIAE,My wishlist: What Apple needs to fix in 2024,Non-Trading,0,0.058921,0.498487,0.442592
4,AAPL,2024-01-01 12:01:12,NS:INDIAE,"Tech News Today: iPhone 15 for Rs 66,990, ISRO...",Non-Trading,1,0.010269,0.763596,0.226135
...,...,...,...,...,...,...,...,...,...
64288,TSLA,2024-12-31 04:28:20,NS:RTRS,"Tesla to fix software for 77,650 China-made ve...",Non-Trading,0,0.128529,0.777510,0.093961
64289,TSLA,2024-12-31 09:30:00,NS:RTRS,BUZZ-Trump-ally Musk's Tesla only gainer in br...,Trading,0,0.013703,0.454222,0.532075
64290,TSLA,2024-12-31 10:14:37,NS:RTRS,Newscasts - Beijing plans to get more driverle...,Trading,1,0.013439,0.758447,0.228113
64291,TSLA,2024-12-31 10:46:55,NS:RTRS,"BUZZ-U.S. STOCKS ON THE MOVE-Comm services, Nv...",Trading,1,0.022781,0.905148,0.072071


In [19]:
df.to_csv('sentiment_results_roberta.csv')

In [25]:
# Calculate the combined roberta score
df['roberta_combined'] = df['roberta_neg'] * -1 + df['roberta_neu'] * 0 + df['roberta_pos'] * 1
df

Unnamed: 0,ticker,time,source,headline,trading_status,company_news,roberta_neg,roberta_neu,roberta_pos,total,roberta_combined
0,AAPL,2024-01-01 03:07:40,NS:ASSOPR,AP Top Technology News at 3:03 a.m. EST,Non-Trading,1,0.020380,0.924047,0.055573,1.0,0.035193
1,AAPL,2024-01-01 04:28:07,NS:INDEPE,January sales 2023 UK ?€? live: Today's best n...,Non-Trading,0,0.001954,0.063187,0.934858,1.0,0.932904
2,AAPL,2024-01-01 04:28:18,NS:HINDUT,Iphone's new iOS update causing connectivity i...,Non-Trading,0,0.342205,0.511794,0.146001,1.0,-0.196204
3,AAPL,2024-01-01 11:31:26,NS:INDIAE,My wishlist: What Apple needs to fix in 2024,Non-Trading,0,0.058921,0.498487,0.442592,1.0,0.383671
4,AAPL,2024-01-01 12:01:12,NS:INDIAE,"Tech News Today: iPhone 15 for Rs 66,990, ISRO...",Non-Trading,1,0.010269,0.763596,0.226135,1.0,0.215866
...,...,...,...,...,...,...,...,...,...,...,...
64288,TSLA,2024-12-31 04:28:20,NS:RTRS,"Tesla to fix software for 77,650 China-made ve...",Non-Trading,0,0.128529,0.777510,0.093961,1.0,-0.034568
64289,TSLA,2024-12-31 09:30:00,NS:RTRS,BUZZ-Trump-ally Musk's Tesla only gainer in br...,Trading,0,0.013703,0.454222,0.532075,1.0,0.518372
64290,TSLA,2024-12-31 10:14:37,NS:RTRS,Newscasts - Beijing plans to get more driverle...,Trading,1,0.013439,0.758447,0.228113,1.0,0.214674
64291,TSLA,2024-12-31 10:46:55,NS:RTRS,"BUZZ-U.S. STOCKS ON THE MOVE-Comm services, Nv...",Trading,1,0.022781,0.905148,0.072071,1.0,0.049291
