In [1]:
from urllib.request import urlopen, Request

In [2]:
from bs4 import BeautifulSoup as soup

In [3]:
finviz_url = "https://finviz.com/quote.ashx?t="

In [4]:
tickers = ["KPRX", "NRBO", "CNXA", "CMND", "MNTS", "NBY", "AUVI"]

portfolio = {}
for t in tickers:
    portfolio[t] = {
        "sentiment" : 1,
        "links" : {}
    }
portfolio

{'KPRX': {'sentiment': 1, 'links': {}},
 'NRBO': {'sentiment': 1, 'links': {}},
 'CNXA': {'sentiment': 1, 'links': {}},
 'CMND': {'sentiment': 1, 'links': {}},
 'MNTS': {'sentiment': 1, 'links': {}},
 'NBY': {'sentiment': 1, 'links': {}},
 'AUVI': {'sentiment': 1, 'links': {}}}

In [5]:
def get_page(url):
    request = Request(url=url, headers={'user-agent': 'Not-A-Browser'})
    response = urlopen(request)
    return soup(response, "html")

In [6]:

for ticker in tickers:
    url = finviz_url + ticker
    html = get_page(url)
    news = html.find(id="news-table").findAll('tr')
    for index, row in enumerate(news):
        if row.a and index < 6:
            title = row.a.get_text()
            link = row.a.get("href")
            portfolio[ticker]["links"][title] = {}
            portfolio[ticker]["links"][title]["url"] = link

portfolio

{'KPRX': {'sentiment': 1,
  'links': {'Why Is Kiora Pharmaceuticals (KPRX) Stock Up 39% Today?': {'url': 'https://investorplace.com/2024/02/why-is-kiora-pharmaceuticals-kprx-stock-up-39-today/'},
   'Kiora Pharmaceuticals Announces Private Placement of up to Approximately $45 Million': {'url': 'https://www.newsfilecorp.com/release/196432/Kiora-Pharmaceuticals-Announces-Private-Placement-of-up-to-Approximately-45-Million'},
   'Kiora Pharmaceuticals and Th a Open Innovation Enter Strategic Agreement to Develop and Commercialize KIO-301 for the Treatment of Inherited Retinal Diseases; Total Deal Value of up to $301 Million includes $16 Million Upfront, up to $285 Million in Clinical Development, Regulatory and Commercial Milestones, Plus Commercial Royalties': {'url': 'https://www.newsfilecorp.com/release/196399/Kiora-Pharmaceuticals-and-Tha-Open-Innovation-Enter-Strategic-Agreement-to-Develop-and-Commercialize-KIO301-for-the-Treatment-of-Inherited-Retinal-Diseases-Total-Deal-Value-of-up

In [8]:
# Load model directly
import torch
from typing import Tuple 
from transformers import AutoTokenizer, AutoModelForSequenceClassification

device = "cuda:0" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to(device)
labels = ["positive", "negative", "neutral"]

def estimate_sentiment(news):
    if news:
        tokens = tokenizer(news, return_tensors="pt", padding=True).to(device)

        result = model(tokens["input_ids"], attention_mask=tokens["attention_mask"])[
            "logits"
        ]
        result = torch.nn.functional.softmax(torch.sum(result, 0), dim=-1)
        probability = result[torch.argmax(result)]
        sentiment = labels[torch.argmax(result)]
        return probability, sentiment
    else:
        return 0, labels[-1]

In [None]:
from tqdm.notebook import tqdm

for t in tqdm(portfolio, desc="portfolio"):
    f = portfolio[t]
    for v in tqdm(f["links"].values(), desc="links"):
        url = v['url']
        count_prob = []
        results = get_page(url).findAll('p')
        for p in tqdm(results, desc="text"):
            news = p.get_text()
            probability, sentiment = estimate_sentiment(news)
            count_prob.append(probability)
    portfolio[t]["sentiment"] = (sum(count_prob) / len(count_prob)).item()

portfolio:   0%|          | 0/7 [00:00<?, ?it/s]

links:   0%|          | 0/5 [00:00<?, ?it/s]

text:   0%|          | 0/32 [00:00<?, ?it/s]

text:   0%|          | 0/34 [00:00<?, ?it/s]

text:   0%|          | 0/35 [00:00<?, ?it/s]

text:   0%|          | 0/29 [00:00<?, ?it/s]

text:   0%|          | 0/33 [00:00<?, ?it/s]

links:   0%|          | 0/5 [00:00<?, ?it/s]

text:   0%|          | 0/28 [00:00<?, ?it/s]

text:   0%|          | 0/29 [00:00<?, ?it/s]

text:   0%|          | 0/21 [00:00<?, ?it/s]

text:   0%|          | 0/21 [00:00<?, ?it/s]

text:   0%|          | 0/26 [00:00<?, ?it/s]

links:   0%|          | 0/5 [00:00<?, ?it/s]

text:   0%|          | 0/14 [00:00<?, ?it/s]

text:   0%|          | 0/27 [00:00<?, ?it/s]

text:   0%|          | 0/19 [00:00<?, ?it/s]

text:   0%|          | 0/18 [00:00<?, ?it/s]

text:   0%|          | 0/38 [00:00<?, ?it/s]

links:   0%|          | 0/5 [00:00<?, ?it/s]

text:   0%|          | 0/34 [00:00<?, ?it/s]

text:   0%|          | 0/37 [00:00<?, ?it/s]

text:   0%|          | 0/35 [00:00<?, ?it/s]

text:   0%|          | 0/20 [00:00<?, ?it/s]

text:   0%|          | 0/16 [00:00<?, ?it/s]

links:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
[(p, portfolio[p]["sentiment"]) for p in portfolio]