In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification # type: ignore

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [4]:
from datasets import load_dataset # type: ignore

ds = load_dataset("sehyun66/Finnhub-News", "clean")

In [5]:
import pandas as pd # type: ignore

pdds = pd.DataFrame(ds['clean'])
pdds = pdds.drop(columns=['image','source','url','id'])
pdds

Unnamed: 0,datetime,related,summary,category,headline
0,1682380800000,GOOGL,Looking for stock market analysis and research...,company,Notable companies reporting after market close
1,1691366400000,JJSF,The spicy cheddar flavored popcorn is availabl...,company,Get Fired Up with Doc Popcorn's Latest Limited...
2,1681776000000,TSLA,Toyota and other Japanese carmakers are suffer...,company,Japanese carmakers’ China sales hit by sluggis...
3,1682553600000,PCAR,,company,BMO Capital Maintains Paccar (PCAR) Market Per...
4,1678752000000,RFIL,Q1 2023 RF Industries Ltd Earnings Call,company,Q1 2023 RF Industries Ltd Earnings Call
...,...,...,...,...,...
316081,1686873600000,TECH,Looking for stock market analysis and research...,company,Why Nvidia Stock Is Surging Premarket Today
316082,1673913600000,FFIV,F5 (FFIV) doesn't possess the right combinatio...,company,Earnings Preview: F5 Networks (FFIV) Q1 Earnin...
316083,1686182400000,CSIQ,Looking for stock market analysis and research...,company,Canadian Solar's CSI Energy selected to provid...
316084,1688169600000,BPMC,Looking for stock market analysis and research...,company,Week In Review: CMBG Out-Licenses Two CAR-T Ca...


In [6]:
pdds['datetime'] =pd.to_datetime(pdds['datetime'], unit='ms')
pdds

Unnamed: 0,datetime,related,summary,category,headline
0,2023-04-25,GOOGL,Looking for stock market analysis and research...,company,Notable companies reporting after market close
1,2023-08-07,JJSF,The spicy cheddar flavored popcorn is availabl...,company,Get Fired Up with Doc Popcorn's Latest Limited...
2,2023-04-18,TSLA,Toyota and other Japanese carmakers are suffer...,company,Japanese carmakers’ China sales hit by sluggis...
3,2023-04-27,PCAR,,company,BMO Capital Maintains Paccar (PCAR) Market Per...
4,2023-03-14,RFIL,Q1 2023 RF Industries Ltd Earnings Call,company,Q1 2023 RF Industries Ltd Earnings Call
...,...,...,...,...,...
316081,2023-06-16,TECH,Looking for stock market analysis and research...,company,Why Nvidia Stock Is Surging Premarket Today
316082,2023-01-17,FFIV,F5 (FFIV) doesn't possess the right combinatio...,company,Earnings Preview: F5 Networks (FFIV) Q1 Earnin...
316083,2023-06-08,CSIQ,Looking for stock market analysis and research...,company,Canadian Solar's CSI Energy selected to provid...
316084,2023-07-01,BPMC,Looking for stock market analysis and research...,company,Week In Review: CMBG Out-Licenses Two CAR-T Ca...


In [7]:
aapl = pdds[pdds['related'].str.contains('|'.join(['AAPL', 'GOOGL','AMZN']))]
aapl = aapl.drop(aapl[aapl['summary']==''].index)
aapl = aapl.drop(aapl[aapl['headline']==''].index)
aapl

Unnamed: 0,datetime,related,summary,category,headline
0,2023-04-25,GOOGL,Looking for stock market analysis and research...,company,Notable companies reporting after market close
52,2023-07-21,GOOGL,Looking for stock market analysis and research...,company,YouTube Premium raises prices in the U.S. to $...
77,2023-02-24,AMZN,Hundreds of e-commerce vendors and platforms i...,company,"Retail giants Walmart, Alibaba and Amazon draw..."
112,2023-07-28,AMZN,(Bloomberg) -- Indonesia is planning to restri...,company,Indonesia Plans Imported Goods Restrictions in...
124,2023-06-30,AAPL,American Express is in talks to take over Gold...,company,Goldman Is Looking for a Way Out of Its Partne...
...,...,...,...,...,...
315924,2023-05-17,AAPL,Looking for stock market analysis and research...,company,Apple relies on China for components despite I...
315945,2023-04-08,GOOGL,"Google Flights is out with its ""top summer des...",company,Google Flights Just Released Its Most Popular ...
316028,2023-04-18,AMZN,Looking for stock market analysis and research...,company,Amazon vs Alibaba: Which Is a Better Value?
316051,2023-08-30,GOOGL,Looking for stock market analysis and research...,company,Instagram Is Testing TikTok-Style 10-Minute Re...


In [8]:
import numpy as np # type: ignore

headlines_array = np.array(aapl)
i=0
rango = 100
inputsArray =[]

while len(headlines_array) > i*rango:
    headlines_list = list(headlines_array[i*rango:(i+1)*rango, 4])
    inputsArray.append(tokenizer(headlines_list, padding = True, truncation = True, return_tensors='pt'))
    i = i + 1

In [9]:
import torch # type: ignore

positive = []
negative = []
neutral = []

for inputs in inputsArray:
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    positive.extend(predictions[:, 0].tolist())
    negative.extend(predictions[:, 1].tolist())
    neutral.extend(predictions[:, 2].tolist())

In [10]:
model.config.id2label

table = {'Date':headlines_array[:,0],
         'Stock': headlines_array[:,1],
         'Headline':headlines_array[:,4],
         "Positive":positive,
         "Negative":negative, 
         "Neutral":neutral}
      
df = pd.DataFrame(table, columns = ["Date","Stock", "Headline", "Positive", "Negative", "Neutral"])
df = df.sort_values("Date")
df = df.reset_index(drop=True)
df.to_csv('Datasets/Finnhub-News-2023_Sentiment.csv', index=False)
df

Unnamed: 0,Date,Stock,Headline,Positive,Negative,Neutral
0,2020-08-05,AMZN,10 Biggest Retail Companies,0.054260,0.026028,0.919712
1,2021-04-28,AAPL,Inside Intel: A Look at the Mega Chip Maker,0.066666,0.014147,0.919186
2,2021-07-28,AAPL,Why Not Every Break Of Support Is A Sell Signal,0.040128,0.099163,0.860709
3,2022-07-20,AAPL,Stock Market Today: Track Market Trends And Th...,0.042219,0.031128,0.926652
4,2022-12-01,AAPL,Best Dow Jones Stocks To Buy And Watch In Dece...,0.061930,0.027586,0.910484
...,...,...,...,...,...,...
12350,2023-08-30,GOOGL,Google AI will be able to go to meetings for y...,0.100624,0.010109,0.889267
12351,2023-08-30,GOOGL,"Google Announces Oct. 4 Event: Pixel 8 Series,...",0.034578,0.017830,0.947591
12352,2023-08-30,GOOGL,Did Google Just Accidentally Leak Official Ima...,0.018241,0.731800,0.249959
12353,2023-08-30,AAPL,Russia earns less from oil and spends more on ...,0.083143,0.308467,0.608390
