In [135]:
#!pip install urllib3
#!pip install bs4

# For data manipulation
import pandas as pd
from urllib.request import urlopen, Request
import numpy as np

# To extract fundamental data
from bs4 import BeautifulSoup

In [141]:
def transform_to_millions(value):
    if pd.isna(value) or value == '-':
        return np.nan
    suffix = value[-1]  # Get the last character (M, B, etc.)
    num = float(value[:-1])  # Get the numeric part
    if suffix == 'M':
        return num * 1e6  # Convert to millions
    elif suffix == 'B':
        return num * 1e9  # Convert to billions
    else:
        return num  # Return as is if no suffix

# Functions to Parse Data from FinViz

# Initialize Pandas DataFrame to Store the Data

In [163]:
output_directory = "output/"
input_directory = "input/"
csv_name='nasdaq-2023-05-31'
df = pd.read_csv(input_directory+csv_name+".csv",index_col=0)
df.head(10)

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,EPS next Y,Income,Perf Year,Perf YTD,Profit Margin,Payout,Sales
AVAV,3.69,-,44.45,-,0.27,-0.34,-,-1.40%,0.90%,-,0.90%,2.06,-8.50M,-3.66%,7.96%,-1.70%,-,487.10M
SGRP,1.17,-,4.68,-,0.87,-0.03,-,-10.80%,6.20%,45.40%,18.50%,0.25,-0.70M,-5.42%,-12.69%,-1.00%,-,261.30M
PWUP,,,,,,,,,,,,,,,,,,
SCHL,1.23,25.09,15.88,2.76,0.00,1.68,1.90%,5.20%,7.20%,-28.80%,1.10%,2.65,62.10M,11.08%,6.35%,3.70%,39.10%,1.69B
QH,0.02,-,-,-,0.13,-4.96,-,-2.70%,2.60%,125.60%,-,-,-1.90M,-50.23%,46.02%,-0.30%,-,539.70M
SABR,-,-,14.52,-,-,-1.84,-,74.90%,-7.10%,-408.40%,1.00%,0.22,-602.00M,-58.57%,-49.51%,-22.40%,-,2.69B
LMB,2.15,19.56,14.22,1.63,0.30,1.04,-,12.00%,7.30%,286.30%,0.20%,1.43,11.30M,286.83%,100.29%,2.20%,0.00%,503.00M
QSG,-,-,-,-,-,-1.17,-,-,32.10%,-,1.04%,-,-,-,-27.96%,-,-,390.70M
ENTF,,,,,,,,,,,,,,,,,,
HCTI,13.96,-,-,-,0.31,-2.27,-,-,-70.80%,-38.30%,64.68%,-,-10.90M,-67.74%,45.74%,-,-,44.70M


# Data Clearning: Further Parse the Data into Numeric Types
Remove % Sign and Convert Values to Numeric Type

In [144]:
df['Dividend %'] = df['Dividend %'].str.replace('%', '')
df['ROE'] = df['ROE'].str.replace('%', '')
df['ROI'] = df['ROI'].str.replace('%', '')
df['EPS Q/Q'] = df['EPS Q/Q'].str.replace('%', '')
df['EPS next Y'] = df['EPS next Y'].str.replace('%', '')
df['Insider Own'] = df['Insider Own'].str.replace('%', '')
df['Perf YTD'] = df['Perf YTD'].str.replace('%', '')
df['Perf Year'] = df['Perf Year'].str.replace('%', '')
df['Income'] = df['Income'].apply(lambda x: transform_to_millions(x))
df['Profit Margin'] = df['Profit Margin'].str.replace('%', '')
df = df.apply(pd.to_numeric, errors='coerce')
df

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,EPS next Y,Income,Perf Year,Perf YTD,Profit Margin,Payout,Sales
AVAV,3.69,,44.45,,0.27,-0.34,,-1.4,0.9,,0.9,2.06,-8500000.0,-3.66,7.96,-1.7,,
SGRP,1.17,,4.68,,0.87,-0.03,,-10.8,6.2,45.4,18.5,0.25,-700000.0,-5.42,-12.69,-1.0,,
PWUP,,,,,,,,,,,,,,,,,,
SCHL,1.23,25.09,15.88,2.76,0.00,1.68,1.9,5.2,7.2,-28.8,1.1,2.65,62100000.0,11.08,6.35,3.7,,
QH,0.02,,,,0.13,-4.96,,-2.7,2.6,125.6,,,-1900000.0,-50.23,46.02,-0.3,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GAN,,,,,,,,,,,,,,,,,,
SRZN,,,,,,,,,,,,,,,,,,
OCAXU,,,,,,,,,,,,,,,,,,
ACTG,,,,,,,,,,,,,,,,,,


# Filter Good Companies

### 1. Companies which are quoted at low valuations
P/E < 15 and P/B < 1

In [145]:
df_filtered = df[(df['P/E'].astype(float)<25) & (df['P/B'].astype(float) < 1)]
df_filtered = df_filtered[(df_filtered['Profit Margin'].astype(float) > 10)]
df_filtered

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,EPS next Y,Income,Perf Year,Perf YTD,Profit Margin,Payout,Sales
IROQ,0.61,8.89,,,0.00,1.59,2.82,7.2,27.5,-42.0,4.8,,5100000.0,-46.37,-18.26,17.2,,
CGBD,0.81,9.84,6.94,4.92,1.26,1.40,10.71,9.1,5.4,-6.0,0.2,1.99,79400000.0,-2.75,-4.13,36.4,,
BRKL,0.63,7.17,6.61,1.20,0.15,1.19,6.33,9.1,22.0,-72.5,2.7,1.29,92600000.0,-42.98,-41.45,23.0,,
PEBO,0.90,7.13,7.30,0.89,0.08,3.71,5.91,13.2,26.9,12.8,1.8,3.62,103800000.0,-8.51,-8.99,35.0,,
SRTS,0.97,7.35,10.41,,0.00,0.38,,13.5,23.0,-112.1,8.1,0.27,6300000.0,-69.53,-63.21,16.7,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PFIS,0.88,8.07,8.99,,0.10,5.00,4.06,11.5,25.5,-21.1,1.2,4.49,36000000.0,-25.72,-24.00,29.8,,
NAVI,0.67,4.26,5.07,0.57,21.35,3.60,4.17,16.9,0.9,-48.8,2.8,3.02,501000000.0,-7.18,-8.81,37.7,,
ASRV,0.46,7.41,,,0.00,0.38,4.24,6.2,17.3,-37.4,4.3,,6500000.0,-29.94,-28.70,12.4,,
AMAL,0.87,5.06,4.98,,0.15,2.85,2.77,14.4,24.2,56.7,26.2,2.90,88700000.0,-29.17,-38.67,24.8,,


### 2. Further filter companies which have demonstrated earning power 
EPS Q/Q > 10%

In [146]:
df_filtered = df_filtered[df_filtered['EPS next Y'].astype(float) > 5]
df_filtered

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,EPS next Y,Income,Perf Year,Perf YTD,Profit Margin,Payout,Sales
NRIM,0.96,7.67,7.3,,0.05,4.92,6.36,13.0,37.4,-30.4,1.2,5.17,28300000.0,-10.94,-31.75,25.8,,
CHK,0.99,1.59,11.44,,0.2,47.6,10.53,89.0,41.4,251.8,0.1,6.6,7020000000.0,-23.19,-19.7,50.8,,
ZION,0.9,4.94,5.63,,0.14,5.85,5.67,18.8,42.2,4.6,1.0,5.13,873000000.0,-52.12,-44.45,28.5,,
PNFP,0.7,6.93,7.27,0.22,0.08,7.27,1.75,10.6,16.1,6.4,2.21,6.93,553700000.0,-40.98,-34.55,34.2,,
INDB,0.72,7.65,8.8,3.06,0.04,5.94,4.84,9.5,17.4,21.4,0.4,5.16,272000000.0,-47.67,-48.21,39.5,,


# Filtering for good debt ratio

In [117]:
df_filtered = df_filtered[df_filtered['Debt/Eq'].astype(float) < 0.8]
df_filtered

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,EPS next Y,Income,Perf Year,Perf YTD,Profit Margin,Payout,Sales
NRIM,0.96,7.67,7.3,,0.05,4.92,6.36,13.0,37.4,-30.4,1.2,5.17,28.3,-10.94,-31.75,25.8,,
CHK,0.99,1.59,11.44,,0.2,47.6,10.53,89.0,41.4,251.8,0.1,6.6,7.02,-23.19,-19.7,50.8,,
ZION,0.9,4.94,5.63,,0.14,5.85,5.67,18.8,42.2,4.6,1.0,5.13,873.0,-52.12,-44.45,28.5,,
PNFP,0.7,6.93,7.27,0.22,0.08,7.27,1.75,10.6,16.1,6.4,2.21,6.93,553.7,-40.98,-34.55,34.2,,
INDB,0.72,7.65,8.8,3.06,0.04,5.94,4.84,9.5,17.4,21.4,0.4,5.16,272.0,-47.67,-48.21,39.5,,


In [159]:
df_filtered.to_csv(output_directory+csv_name+"output.csv", index=True)