In [45]:
from urllib.request import urlopen, Request

In [46]:
from bs4 import BeautifulSoup as soup

In [47]:
finviz_url = "https://finviz.com/quote.ashx?t="

In [51]:
tickers = ["AMZN", "NVDA", "LUV", "MSTR"]

portfolio = {}
for t in tickers:
    portfolio[t] = {
        "sentiment" : 1,
        "links" : {}
    }
portfolio

{'AMZN': {'sentiment': 1, 'links': {}},
 'NVDA': {'sentiment': 1, 'links': {}},
 'LUV': {'sentiment': 1, 'links': {}},
 'MSTR': {'sentiment': 1, 'links': {}}}

In [52]:
def get_page(url):
    request = Request(url=url, headers={'user-agent': 'Not-A-Browser'})
    response = urlopen(request)
    return soup(response, "html")

In [53]:

for ticker in tickers:
    url = finviz_url + ticker
    html = get_page(url)
    news = html.find(id="news-table").findAll('tr')
    for index, row in enumerate(news):
        if row.a and index < 6:
            title = row.a.get_text()
            link = row.a.get("href")
            portfolio[ticker]["links"][title] = {}
            portfolio[ticker]["links"][title]["url"] = link

portfolio

{'AMZN': {'sentiment': 1,
  'links': {'US FTC was poised to reject Amazon acquisition of iRobot -- source': {'url': 'https://finance.yahoo.com/news/us-ftc-poised-reject-amazon-164931981.html'},
   'iRobot Has a Major Cleanup After Scrapped Amazon Deal': {'url': 'https://finance.yahoo.com/m/62c49f24-4a16-3585-b7da-6123293ebce2/irobot-has-a-major-cleanup.html'},
   'Tech earnings due out this week: Amazon, Microsoft in focus': {'url': 'https://finance.yahoo.com/video/tech-earnings-due-week-amazon-164345039.html'},
   "Elon Musk is losing so much money that Jeff Bezos could beat him in the billionaires' race": {'url': 'https://qz.com/elon-musk-jeff-bezos-world-richest-person-tesla-stock-1851205323'},
   'One chart shows why Big Tech earnings are critical for the health of the market rally': {'url': 'https://finance.yahoo.com/news/one-chart-shows-why-big-tech-earnings-are-critical-for-the-health-of-the-market-rally-160444587.html'}}},
 'NVDA': {'sentiment': 1,
  'links': {'Magnificent Seve

In [54]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from typing import Tuple 
device = "cuda:0" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to(device)
labels = ["positive", "negative", "neutral"]

def estimate_sentiment(news):
    if news:
        tokens = tokenizer(news, return_tensors="pt", padding=True).to(device)

        result = model(tokens["input_ids"], attention_mask=tokens["attention_mask"])[
            "logits"
        ]
        result = torch.nn.functional.softmax(torch.sum(result, 0), dim=-1)
        probability = result[torch.argmax(result)]
        sentiment = labels[torch.argmax(result)]
        return probability, sentiment
    else:
        return 0, labels[-1]

In [55]:
for t in portfolio:
    f = portfolio[t]
    for v in f["links"].values():
        url = v['url']
        count_prob = []
        results = get_page(url).findAll('p')
        for p in results:
            news = p.get_text()
            probability, sentiment = estimate_sentiment(news)
            count_prob.append(probability)
    portfolio[t]["sentiment"] = (sum(count_prob) / len(count_prob)).item()

In [61]:
[(p, portfolio[p]["sentiment"]) for p in portfolio]

[('AMZN', 0.8544131517410278),
 ('NVDA', 0.7974177598953247),
 ('LUV', 0.8351674675941467),
 ('MSTR', 0.81805819272995)]