## Sentiment scores generation using Finbert model
 - [Finbert github](https://github.com/ProsusAI/finBERT/tree/master)
 - [Huggingface link](https://huggingface.co/ProsusAI/finbert)

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch import nn

import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pd.set_option('display.max_colwidth', None)

In [3]:
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")



In [4]:
test_string = """Stocks"""

In [5]:
inputs = tokenizer(test_string, return_tensors='pt', padding=True)
logits = model(**inputs)[0]

# Convert logits to softmax probabilities.
probabilities = nn.functional.softmax(logits, dim=-1)

# The probability labels are 'positive', 'negative', 'neutral' (NEED TO CONFIRM)

print(probabilities.detach().numpy().reshape((3,)))

[0.04868224 0.05188207 0.8994357 ]


In [6]:
def generate_sentiment_values(text: str) -> list[float]:
    inputs = tokenizer(text, return_tensors='pt', padding=True)
    logits = model(**inputs)[0]

    # Convert logits to softmax probabilities.
    probabilities = nn.functional.softmax(logits, dim=-1)

    # The probability labels are 'positive', 'negative', 'neutral' (NEED TO CONFIRM)
    return probabilities.detach().numpy().reshape((3,))

## Finbert on NYTimes news


1. Get sentiment for each article

In [7]:
df = pd.read_csv('../data/nyt_tesla_shorter.csv')
df.head()

Unnamed: 0,timestamp,article_url,lead_paragraph,abstract,adjusted_date
0,2024-01-25 13:04:37+00:00,https://www.nytimes.com/2024/01/25/business/dealbook/tesla-shares-outlook-growth.html,Elon Musk and Tesla shareholders are at a crossroads.,Shares in Elon Musk’s electric vehicle maker fell sharply after the company delivered lackluster quarterly results and declined to give full-year guidance.,2024-01-25
1,2024-01-31 21:25:41+00:00,https://www.nytimes.com/2024/01/31/business/tesla-elon-musk-pay-package.html,A Delaware judge’s decision to void the pay package that helped make Elon Musk the world’s richest person leaves Tesla’s board with some difficult decisions to make.,"The company’s board will now decide whether to appeal the decision, change where Tesla is incorporated or negotiate a new pay package.",2024-02-01
2,2024-01-24 21:21:45+00:00,https://www.nytimes.com/2024/01/24/business/teslas-electric-vehicles-profit-price-cuts.html,"Tesla’s profit from sales of electric cars slumped in the last three months of last year because of price cuts intended to thwart increasingly intense competition, the company said on Wednesday as it warned of a tough year ahead.","A tax benefit boosted quarterly earnings, but that masked a decline in earnings from carmaking caused by intensifying competition.",2024-01-25
3,2024-01-30 22:12:10+00:00,https://www.nytimes.com/2024/01/30/business/elon-musk-tesla-pay-package.html,"Elon Musk, the chief executive of Tesla, suffered a stunning rebuke Tuesday when a Delaware judge voided the pay package that helped make him a billionaire many times over and the world’s wealthiest human being.","Shareholders had sued, arguing that Mr. Musk’s compensation — which helped make him the world’s richest person — was excessive.",2024-01-31
4,2024-01-16 14:37:28+00:00,https://www.nytimes.com/2024/01/16/business/tesla-elon-musk-stock.html,"Elon Musk, the chief executive of Tesla, stunned investors by demanding that the company’s board give him shares worth more than $80 billion if it wants him to continue developing products based on artificial intelligence.","Elon Musk, the electric car company’s chief executive, said he would “build products outside of Tesla” unless the board raises his stake to 25 percent.",2024-01-16


In [18]:
for i, row in df.iterrows():
    combined_str = f"""{str(row['lead_paragraph'])}\n{str(row['abstract'])}"""
    output = generate_sentiment_values(combined_str)
    df.at[i, 'pos_sentiment'] = output[0]
    df.at[i, 'neg_sentiment'] = output[1]
    df.at[i, 'neutral_sentiment'] = output[2]

    preamble = "Evaluate the following news on Tesla's stock price."
    combined_str = f"""<instructions>{preamble}</instructions> <news>{str(row['lead_paragraph'])}\n{str(row['abstract'])}</news>"""    
    output = generate_sentiment_values(combined_str)
    df.at[i, 'pos_sentiment_w_preamb'] = output[0]
    df.at[i, 'neg_sentiment_w_preamb'] = output[1]
    df.at[i, 'neutral_sentiment_w_preamb'] = output[2]

In [19]:
df

Unnamed: 0,timestamp,article_url,lead_paragraph,abstract,adjusted_date,pos_sentiment,neg_sentiment,neutral_sentiment,pos_sentiment_w_preamb,neg_sentiment_w_preamb,neutral_sentiment_w_preamb
0,2024-01-25 13:04:37+00:00,https://www.nytimes.com/2024/01/25/business/dealbook/tesla-shares-outlook-growth.html,Elon Musk and Tesla shareholders are at a crossroads.,Shares in Elon Musk’s electric vehicle maker fell sharply after the company delivered lackluster quarterly results and declined to give full-year guidance.,2024-01-25,0.007523,0.974638,0.017839,0.007406,0.971248,0.021345
1,2024-01-31 21:25:41+00:00,https://www.nytimes.com/2024/01/31/business/tesla-elon-musk-pay-package.html,A Delaware judge’s decision to void the pay package that helped make Elon Musk the world’s richest person leaves Tesla’s board with some difficult decisions to make.,"The company’s board will now decide whether to appeal the decision, change where Tesla is incorporated or negotiate a new pay package.",2024-02-01,0.015148,0.873508,0.111344,0.017542,0.821107,0.16135
2,2024-01-24 21:21:45+00:00,https://www.nytimes.com/2024/01/24/business/teslas-electric-vehicles-profit-price-cuts.html,"Tesla’s profit from sales of electric cars slumped in the last three months of last year because of price cuts intended to thwart increasingly intense competition, the company said on Wednesday as it warned of a tough year ahead.","A tax benefit boosted quarterly earnings, but that masked a decline in earnings from carmaking caused by intensifying competition.",2024-01-25,0.012998,0.972522,0.01448,0.013672,0.97297,0.013358
3,2024-01-30 22:12:10+00:00,https://www.nytimes.com/2024/01/30/business/elon-musk-tesla-pay-package.html,"Elon Musk, the chief executive of Tesla, suffered a stunning rebuke Tuesday when a Delaware judge voided the pay package that helped make him a billionaire many times over and the world’s wealthiest human being.","Shareholders had sued, arguing that Mr. Musk’s compensation — which helped make him the world’s richest person — was excessive.",2024-01-31,0.013649,0.934513,0.051838,0.017202,0.922987,0.059811
4,2024-01-16 14:37:28+00:00,https://www.nytimes.com/2024/01/16/business/tesla-elon-musk-stock.html,"Elon Musk, the chief executive of Tesla, stunned investors by demanding that the company’s board give him shares worth more than $80 billion if it wants him to continue developing products based on artificial intelligence.","Elon Musk, the electric car company’s chief executive, said he would “build products outside of Tesla” unless the board raises his stake to 25 percent.",2024-01-16,0.055498,0.096209,0.848293,0.040988,0.092453,0.866559
5,2024-01-05 16:34:44+00:00,https://www.nytimes.com/2024/01/05/business/tesla-china-recall.html,"The Chinese government announced on Friday that Tesla would recall practically all of the cars it has sold in the country to adjust their assisted-driving systems, less than a month after a similar recall in the United States.",The recall follows a similar move by the company in the United States to upgrade the cars’ Autopilot feature.,2024-01-06,0.243601,0.430676,0.325723,0.121804,0.390774,0.487422
6,2024-01-11 21:53:42+00:00,https://www.nytimes.com/2024/01/11/business/hertz-electric-vehicles-tesla.html,"The rental car company Hertz will be selling about one-third of the electric vehicles in its fleet after they lost value more quickly than expected, the company said on Thursday. The drop in value is a blow to the company’s efforts to replace gasoline vehicles with cars that do not produce tailpipe emissions.","The rental car company blamed the sharp drop in the value of electric vehicles and higher repair costs for its decision to sell 20,000 cars.",2024-01-12,0.008454,0.970315,0.021231,0.008601,0.969396,0.022004
7,2024-01-02 13:51:40+00:00,https://www.nytimes.com/2024/01/02/business/tesla-fourth-quarter-sales.html,Tesla sales jumped during the last three months of 2023 after the carmaker slashed prices and customers rushed to take advantage of tax breaks on electric vehicles — provisions that will be harder to come by in 2024.,The leading U.S. electric-vehicle maker also drew buyers eager to take advantage of government incentives that will be harder to get in 2024.,2024-01-02,0.904772,0.055428,0.0398,0.885809,0.053105,0.061085
8,2024-01-31 13:04:38+00:00,https://www.nytimes.com/2024/01/31/business/dealbook/musk-tesla-compensation-delaware.html,An unusual pay package that Tesla devised in 2018 helped make Elon Musk the world’s wealthiest individual.,A Delaware court ruling on his $50 billion compensation plan at Tesla raises questions about corporate governance and more.,2024-01-31,0.016874,0.884944,0.098182,0.026306,0.756185,0.21751
9,2024-01-27 10:00:27+00:00,https://www.nytimes.com/2024/01/27/business/china-hefei-ev-city-economy.html,"Ultramodern factories churn out electric cars and solar panels in Hefei, an industrial center in the heart of central China. Broad avenues link office towers and landscaped parks. Subway lines open at a brisk pace.","Hefei has led the country in making electric vehicles and other tech products, but it still has not escaped a nationwide housing crisis.",2024-01-27,0.640278,0.064174,0.295548,0.375445,0.044368,0.580187


Save to the same file first

In [26]:
df[:-182].to_csv('../data/nyt_headlines_temp.csv', index=False)

2. Group by date

In [31]:
new_df = pd.read_csv('../data/nyt_headlines.csv')

In [34]:
agg_func = {
    'pos_sentiment': 'mean',
    'neg_sentiment': 'mean',
    'neutral_sentiment': 'mean'
}
column_rename = {
    'pos_sentiment': 'mean_pos_sentiment',
    'neg_sentiment': 'mean_neg_sentiment',
    'neutral_sentiment': 'mean_neutral_sentiment'
}
grouped_by_date_df = new_df.groupby(by='adjusted_date').agg(agg_func).rename(columns=column_rename).reset_index()
grouped_by_date_df.tail()

Unnamed: 0,adjusted_date,mean_pos_sentiment,mean_neg_sentiment,mean_neutral_sentiment
1493,2024-09-25,0.159386,0.285289,0.555326
1494,2024-09-26,0.296828,0.350619,0.352554
1495,2024-09-27,0.192744,0.503622,0.303634
1496,2024-09-28,0.225603,0.226435,0.547962
1497,adjusted_date,0.019455,0.067139,0.913406


In [35]:
grouped_by_date_df.to_csv('../data/nyt_sentiment.csv', index=False)