# Install Dependencies

In [40]:
# pip install bs4

In [41]:
# pip install torch torchvision torchaudio

In [42]:
# pip install transformers

In [43]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification


# Create Model

In [44]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [45]:
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [46]:
tokens = tokenizer.encode('This is amazing, I loved it. Great!', return_tensors='pt')
result = model(tokens)
result.logits

tensor([[-2.5497, -2.9072, -1.4034,  1.4186,  4.3728]],
       grad_fn=<AddmmBackward0>)

In [47]:
int(torch.argmax(result.logits))+1

5

# Collect Reviews

In [48]:
finviz_url = 'https://finviz.com/quote.ashx?t='

In [49]:
tickers = ['AMZN','FBCG','AMD']

In [50]:
news_tables = {}

In [51]:
for ticker in tickers:
    url = finviz_url + ticker
    
    req = Request(url=url,headers={'user-agent': 'my-app'})
    response = urlopen(req)
    html = BeautifulSoup(response, 'html')
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table
#     break

In [52]:
# print(news_tables)

In [53]:
parsed_data = []

In [54]:
for ticker, news_table in news_tables.items():
    for row in news_table.findAll('tr'):
        title = row.a.text
        date_data = row.td.text.strip().split(' ')
        
        if len(date_data) == 1:
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
            
        parsed_data.append([ticker,date,time,title])

In [56]:
import numpy as np
import pandas as pd

In [57]:
df = pd.DataFrame(np.array(parsed_data),columns=['Tickers','Date','Time','News'])

In [58]:
df.head()

Unnamed: 0,Tickers,Date,Time,News
0,AMZN,Aug-12-23,12:05PM,Lina Khan: The most feared person in Big Tech ...
1,AMZN,Aug-12-23,12:04PM,Lina Khan: The most feared person in Silicon V...
2,AMZN,Aug-12-23,11:59AM,The most feared person in tech isnt in Silicon...
3,AMZN,Aug-12-23,10:15AM,Is Amazon a Buy After Its Blowout Quarter?
4,AMZN,Aug-12-23,08:00AM,3 Warren Buffett Stocks to Buy Hand Over Fist ...


In [59]:
df.tail()

Unnamed: 0,Tickers,Date,Time,News
200,AMD,Jul-31-23,05:57PM,Advanced Micro Devices Q2 Preview: Time to Buy?
201,AMD,Jul-31-23,05:42PM,"AMD, JetBlue, Starbucks earnings, manufacturin..."
202,AMD,Jul-31-23,03:26PM,"Dow futures flat, Zoom dips 17.1% after earnings"
203,AMD,Jul-31-23,02:00PM,Jim Cramers 10 S&P 500 Stock Picks for 2023
204,AMD,Jul-31-23,01:22PM,25 Richest Women in the World


In [60]:
df.iloc[0][3]

'Lina Khan: The most feared person in Big Tech is a 34-year-old in DC'

In [61]:
def sentiment_score(news):
    tokens = tokenizer.encode(news, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [62]:
df['Impact'] = df['News'].apply(lambda x: sentiment_score(x[:512]))

In [63]:
df.head()

Unnamed: 0,Tickers,Date,Time,News,Impact
0,AMZN,Aug-12-23,12:05PM,Lina Khan: The most feared person in Big Tech ...,1
1,AMZN,Aug-12-23,12:04PM,Lina Khan: The most feared person in Silicon V...,4
2,AMZN,Aug-12-23,11:59AM,The most feared person in tech isnt in Silicon...,1
3,AMZN,Aug-12-23,10:15AM,Is Amazon a Buy After Its Blowout Quarter?,1
4,AMZN,Aug-12-23,08:00AM,3 Warren Buffett Stocks to Buy Hand Over Fist ...,5


In [82]:
filter1 = df['Tickers']=='AMZN'
filter2 = df['Date']=='Aug-12-23'
df['Impact'].where(filter1 & filter2).mean()

2.8333333333333335

In [83]:
df.where(filter1 & filter2)

Unnamed: 0,Tickers,Date,Time,News,Impact
0,AMZN,Aug-12-23,12:05PM,Lina Khan: The most feared person in Big Tech ...,1.0
1,AMZN,Aug-12-23,12:04PM,Lina Khan: The most feared person in Silicon V...,4.0
2,AMZN,Aug-12-23,11:59AM,The most feared person in tech isnt in Silicon...,1.0
3,AMZN,Aug-12-23,10:15AM,Is Amazon a Buy After Its Blowout Quarter?,1.0
4,AMZN,Aug-12-23,08:00AM,3 Warren Buffett Stocks to Buy Hand Over Fist ...,5.0
...,...,...,...,...,...
200,,,,,
201,,,,,
202,,,,,
203,,,,,
