In [1]:
from finvizfinance.screener.overview import Overview
import pandas as pd
import csv
import os
import warnings


warnings.filterwarnings("ignore")

# Finding "undervalued" stocks to invest

In [2]:
def get_undervalued_stocks():
    """
    Returns a list of tickers with:
    
    - Positive Operating Margin
    - Debt-to-Equity ratio under 1
    - Low P/B (under 1)
    - Low P/E ratio (under 15)
    - Low PEG ratio (under 1)
    - Positive Insider Transactions
    """
    foverview = Overview()

    filters_dict = {'Debt/Equity':'Under 1', 
                    'PEG':'Low (<1)', 
                    'Operating Margin':'Positive (>0%)', 
                    'P/B':'Low (<1)',
                    'P/E':'Low (<15)',
                    'InsiderTransactions':'Positive (>0%)'}
    
    parameters = ['Exchange', 'Index', 'Sector', 'Industry', 'Country', 'Market Cap.',
        'P/E', 'Forward P/E', 'PEG', 'P/S', 'P/B', 'Price/Cash', 'Price/Free Cash Flow',
        'EPS growththis year', 'EPS growthnext year', 'EPS growthpast 5 years', 'EPS growthnext 5 years',
        'Sales growthpast 5 years', 'EPS growthqtr over qtr', 'Sales growthqtr over qtr',
        'Dividend Yield', 'Return on Assets', 'Return on Equity', 'Return on Investment',
        'Current Ratio', 'Quick Ratio', 'LT Debt/Equity', 'Debt/Equity', 'Gross Margin',
        'Operating Margin', 'Net Profit Margin', 'Payout Ratio', 'InsiderOwnership', 'InsiderTransactions',
        'InstitutionalOwnership', 'InstitutionalTransactions', 'Float Short', 'Analyst Recom.',
        'Option/Short', 'Earnings Date', 'Performance', 'Performance 2', 'Volatility', 'RSI (14)',
        'Gap', '20-Day Simple Moving Average', '50-Day Simple Moving Average',
        '200-Day Simple Moving Average', 'Change', 'Change from Open', '20-Day High/Low',
        '50-Day High/Low', '52-Week High/Low', 'Pattern', 'Candlestick', 'Beta',
        'Average True Range', 'Average Volume', 'Relative Volume', 'Current Volume',
        'Price', 'Target Price', 'IPO Date', 'Shares Outstanding', 'Float']
    
    foverview.set_filter(filters_dict=filters_dict)
    df_overview = foverview.screener_view()
    if not os.path.exists('out'): #ensures you have an 'out' folder ready
        os.makedirs('out')
    df_overview.to_csv('out/Overview.csv', index=False)
    tickers = df_overview['Ticker'].to_list()
    return tickers

print(get_undervalued_stocks())

['DCGO', 'HAFC', 'MTW', 'OPBK', 'STCN', 'TCBX']


# Using BERT to perform a sentiment analysis

In [4]:
import pandas as pd
import yfinance as yf
from transformers import TFBertForSequenceClassification, BertTokenizer, pipeline
from goose3 import Goose
from requests import get
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [5]:
# Step 1: Prepare Training Data
data = {
    'text': [
        'The stock market is doing great!',
        'The company is facing a lawsuit.',
        'The new product launch was a success.',
        'There are concerns about the CEO\'s health.',
        'The quarterly earnings exceeded expectations.',
        'The stock price fell after the earnings report.',
        'Investors are worried about the recent changes in management.',
        'The company announced a new partnership.',
        'There are rumors of a potential merger.',
        'The CEO gave a positive outlook for the future.',
        'The company\'s sales increased by 20% last quarter.',
        'The company faced a significant security breach.',
        'The economy is showing signs of recovery.',
        'The new infrastructure project was well received by the market.',
        'Investors are worried about rising inflation.',
        'The company launched an innovative product in the market.',
        'The CEO announced his retirement.',
        'The merger between the two companies was approved.',
        'The company lost a major contract bid.',
        'The stock prices surged after the acquisition news.',
        'The new marketing strategy failed to attract customers.',
        'The company received a prestigious industry award.',
        'The competitor\'s new product is outperforming in the market.',
        'The company announced layoffs due to restructuring.',
        'The quarterly report showed a decline in revenue.',
        'The company is under investigation for fraud.',
        'The new CEO is expected to bring major changes.',
        'The company reported record profits this year.',
        'The stock price remained stable despite market fluctuations.',
        'The company is expanding its operations internationally.',
        'The new software update received positive reviews.',
        'The company is struggling to keep up with competitors.',
        'The new branch opening was delayed due to regulatory issues.',
        'The company secured a lucrative government contract.',
        'The stock price dropped sharply after the CEO\'s resignation.',
        'The company is investing heavily in renewable energy.',
        'The company\'s new ad campaign went viral.',
        'The company is facing supply chain disruptions.',
        'The product recall affected the company\'s reputation.',
        'The company received a large investment from a venture capital firm.',
        'The company is expected to benefit from the new trade agreement.',
        'The new partnership is expected to drive growth.',
        'The company is facing increased competition in the market.',
        'The stock price hit a new all-time high.',
        'The company announced a dividend increase.',
        'The company is closing underperforming stores.',
        'The new product failed to meet sales expectations.',
        'The company is focusing on improving customer satisfaction.',
        'The company\'s research and development spending increased.',
        'The company is planning to launch an IPO next year.'
    ],
    'label': [
        2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 
        2, 0, 1, 2, 0, 2, 0, 2, 0, 2, 
        0, 2, 0, 0, 0, 0, 2, 2, 1, 2, 
        2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 
        2, 2, 0, 2, 2, 0, 0, 2, 2, 2
    ]
}

df = pd.DataFrame(data)

# Split the dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(df['text'], df['label'], test_size=0.2)

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained("ProsusAI/finbert")

# Tokenize the texts
train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(val_texts.tolist(), truncation=True, padding=True)

# Convert encodings to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_labels
)).batch(8)

val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings),
    val_labels
)).batch(8)

# Step 2: Fine-Tune the Model
model = TFBertForSequenceClassification.from_pretrained("ProsusAI/finbert", num_labels=3)  # Assuming 3 sentiment classes

# Define the optimizer, loss, and metrics
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy')]

# Compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# Train the model
model.fit(train_dataset, epochs=3, validation_data=val_dataset)

# Save the Fine-Tuned Model
model.save_pretrained("./finetuned_finbert")
tokenizer.save_pretrained("./finetuned_finbert")





All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Cause: for/else statement not yet supported
Cause: for/else statement not yet supported


Epoch 2/3
Epoch 3/3


('./finetuned_finbert\\tokenizer_config.json',
 './finetuned_finbert\\special_tokens_map.json',
 './finetuned_finbert\\vocab.txt',
 './finetuned_finbert\\added_tokens.json')

In [8]:
def get_ticker_news_sentiment(ticker):
    """
    Returns a Pandas dataframe of the given ticker's most recent news article headlines,
    with the overal sentiment of each article.

    Args:
        ticker (string)

    Returns:
        pd.DataFrame: {'Date', 'Article title', Article sentiment'}
    """
    ticker_news = yf.Ticker(ticker)
    news_list = ticker_news.get_news()
    extractor = Goose()
    pipe = pipeline("text-classification", model="ProsusAI/finbert")
    data = []
    for dic in news_list:
        title = dic['title']
        response = get(dic['link'])
        article = extractor.extract(raw_html=response.content)
        text = article.cleaned_text
        date = article.publish_date
        if len(text) > 512:
            data.append({'Date':f'{date}',
                         'Article title':f'{title}',
                         'Article sentiment':'NaN too long'})
        else:
            results = pipe(text)
            #print(results)
            data.append({'Date':f'{date}',
                         'Article title':f'{title}',
                         'Article sentiment':results[0]['label']})
    df = pd.DataFrame(data)
    return df

In [9]:
def generate_csv(ticker):
    get_ticker_news_sentiment(ticker).to_csv(f'out/{ticker}.csv', index=False)

undervalued = get_undervalued_stocks()
for ticker in undervalued:
    generate_csv(ticker)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized: ['classifier']
You should probably TRAIN this model on 