In [59]:
import time
import requests
import numpy as np
import pandas as pd
import yfinance as yf
import urllib.request

from lxml import html
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC

pd.set_option('display.max_rows', None)

def format_currency(amount):
    return '${:,.2f}'.format(amount)

# Collecting EV/EBITDA Average For Each Industry

In [78]:
link = "https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datafile/vebitda.html"
url = requests.get(link, verify=False)

info = html.fromstring(url.content)

link = info.xpath("/html/body/div/div[1]/p[3]/a/text()")[0]

response = requests.get(link, verify=False)

with open("/home/bread/Coding/Finance/src/data/vebitda.xls", "wb") as file:
    file.write(response.content)



# Collecting PE Average For Each Industry

In [79]:
link  = "https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datafile/pedata.html"
url = requests.get(link, verify=False)

info = html.fromstring(url.content)

dataLink = info.xpath("/html/body/div[1]/div/p[3]/a/text()")[0]

response = requests.get(dataLink, verify=False)

with open("/home/bread/Coding/Finance/src/data/pedata.xls", "wb") as file:
    file.write(response.content)



# Writing a List of Every Yfinance Info Category

In [None]:
stock = yf.Ticker("MSFT")
info = list(stock.info.keys())

with open("yf_info.txt", "w+") as f:
    for items in info:
        f.write('%s\n' %items)

f.close()

# Getting List of All Sectors and Industries

In [14]:
# Sector List: 
# technology, 
# financial-services, 
# consumer-cyclical, 
# healthcare, 
# communication-services, 
# industrials,
# consumer-defensive,
# energy,
# basic-materials,
# real-estate,
# utilities

df = pd.read_csv("/home/bread/Coding/Finance/src/data/sectors.csv")

# Enterprise Ratio Analysis 

In [9]:
technology = yf.Industry("software-infrastructure").top_companies
tickers = technology.index.to_list()

ticker_info = []

for ticker in tickers:
    stock = yf.Ticker(ticker)
    temp = [ticker]
    try:
        temp.append(stock.info['enterpriseToEbitda'])
        temp.append(stock.info['quickRatio'])
        temp.append(stock.info['heldPercentInsiders'])
    except:
        continue
    ticker_info.append(temp)

df = pd.DataFrame(ticker_info, columns=["Ticker", "Enterprise Multiple", "Quick Ratio", "Held Percentage Insiders"])

# Scraping OpenInsider for Cluster Buys

In [28]:
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run in headless mode (no GUI)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

link = "http://openinsider.com/"
driver.get(link)
driver.implicitly_wait(10)  

page_source = driver.page_source
info = html.fromstring(page_source)

rows = info.xpath('/html/body/div[2]/table[1]/tbody/tr')

table_data = []

for row in rows:

    columns = row.xpath('.//td')
    row_data = [column.text_content().strip() for column in columns]
    table_data.append(row_data)

driver.quit()

df = pd.DataFrame(table_data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,5 DM,2025-01-31 17:54:39,2025-01-29,TLSI,"Trisalus Life Sciences, Inc.",Surgical & Medical Instruments & Apparatus,4,P - Purchase,$4.68,136554,7339288,+2%,"+$638,856",,,,
1,15 M,2025-01-30 21:06:32,2025-01-28,TKO,"Tko Group Holdings, Inc.",Amusement & Recreation Services,3,P - Purchase,$147.97,5363010,10700145,+100%,"+$793,583,415",,,,
2,9 D,2025-01-30 17:06:57,2025-01-29,SFD,Smithfield Foods Inc,Meat Packing Plants,9,P - Purchase,$20.00,3283233,4323233,+316%,"+$65,664,660",,,,
3,3 M,2025-01-30 17:01:32,2025-01-29,CSBB,"Csb Bancorp, Inc.",State Commercial Banks,2,P - Purchase,$38.66,4200,107020,+4%,"+$162,355",,,,
4,2,2025-01-30 17:00:08,2025-01-28,SNV,Synovus Financial Corp,National Commercial Banks,2,P - Purchase,$57.46,9180,67461,+16%,"+$527,450",,,,
5,5 D,2025-01-30 09:17:11,2025-01-29,RLI,Rli Corp,"Fire, Marine & Casualty Insurance",5,P - Purchase,$71.97,13200,540349,+3%,"+$949,974",,,,
6,3 M,2025-01-29 18:14:01,2024-11-18,XRAY,Dentsply Sirona Inc.,Dental Equipment & Supplies,2,P - Purchase,$18.05,23306,359977,+7%,"+$420,599",,,,
7,3 M,2025-01-29 17:33:13,2024-12-19,SUNS,"Sunrise Realty Trust, Inc.",Real Estate Investment Trusts,2,P - Purchase,$12.14,1106632,2981917,+59%,"+$13,429,374",,,,
8,2,2025-01-29 16:06:44,2025-01-27,FROPX,Flat Rock Opportunity Fund,Closed-End Funds,2,P - Purchase,$18.82,3985,39491,+11%,"+$75,000",,,,
9,2,2025-01-29 16:04:05,2025-01-27,ALLY,Ally Financial Inc.,State Commercial Banks,2,P - Purchase,$39.24,44734,496300,+10%,"+$1,755,571",,,,


# Comparing Recent OpenInsider Cluster Buys With Enterprise Ratios for Market

In [4]:
comparisonDF = pd.read_excel("/home/bread/Coding/Finance/src/data/vebitda.xls", sheet_name="Industry Averages", skiprows=8)

stocks = list(df[3])

temp = yf.Ticker(stocks[0]).info.keys()
print(temp)

for stock in stocks:
    temp = yf.Ticker(stock)
    try:
        print(f"{stock}, {temp.info['industry']}, {temp.info['enterpriseToEbitda']}")
    except:
        continue




dict_keys(['address1', 'address2', 'city', 'state', 'zip', 'country', 'phone', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'companyOfficers', 'auditRisk', 'boardRisk', 'compensationRisk', 'shareHolderRightsRisk', 'overallRisk', 'governanceEpochDate', 'compensationAsOfEpochDate', 'maxAge', 'priceHint', 'previousClose', 'open', 'dayLow', 'dayHigh', 'regularMarketPreviousClose', 'regularMarketOpen', 'regularMarketDayLow', 'regularMarketDayHigh', 'exDividendDate', 'forwardPE', 'volume', 'regularMarketVolume', 'averageVolume', 'averageVolume10days', 'averageDailyVolume10Day', 'bid', 'ask', 'bidSize', 'askSize', 'marketCap', 'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'priceToSalesTrailing12Months', 'fiftyDayAverage', 'twoHundredDayAverage', 'currency', 'enterpriseValue', 'profitMargins', 'floatShares', 'sharesOutstanding', 'sharesShort', 'sharesShortPriorMonth', 'sharesShortPreviousMonthDate', 'dateShortInterest', 'sharesPerce

# Getting Research Articles

In [69]:
research = yf.Search("tlsi", include_research=True).response

print(research['news'][0])
# print(research['news'][5])

# url = requests.get("https://finance.yahoo.com/news/apple-google-hit-uk-probe-105551097.html")
# info = html.fromstring(url.content)
# printing = [info.xpath(f"/html/body/div[2]/main/section/section/section/article/div/div[1]/div[3]/div[2]/p[{i}]/text()") for i in range(1, 5)]

{'uuid': 'e5af143c-0f02-312c-bcd9-c1c9ac7180bd', 'title': 'TriSalus Life Sciences Announces Preliminary Q4 and Full Year Unaudited 2024 Financial Results and Conference Call', 'publisher': 'Business Wire', 'link': 'https://finance.yahoo.com/news/trisalus-life-sciences-announces-preliminary-130000517.html', 'providerPublishTime': 1737637200, 'type': 'STORY', 'relatedTickers': ['TLSI', 'TLSIW']}


# Compile List of Stocks

In [107]:
link = "https://stockanalysis.com/list/mid-cap-stocks/"

chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode

# Set up the WebDriver
driver = webdriver.Chrome(options=chrome_options)
driver.get(link)

# Wait for the table to be visible
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.XPATH, "//table/tbody")))

data = []

while True:
    # Wait for the table rows to be present
    wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table/tbody/tr")))
    
    # Get all rows from the table
    rows = driver.find_elements(By.XPATH, "//table/tbody/tr")

    # Collect data from each row
    for row in rows:
        columns = row.find_elements(By.TAG_NAME, "td")
        if columns:  # Ensure the row has data
            data.append([column.text for column in columns])

    # Try clicking the "Next" button
    try:
        # Wait for the "Next" button to be clickable
        next_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main"]/div[2]/div/div/nav/button[2]')))
        
        # Check if the "Next" button is disabled (no more pages)
        if "disabled" in next_button.get_attribute("class"):
            print("Reached the last page.")
            break
        
        # Scroll the "Next" button into view and click it using JavaScript
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        driver.execute_script("arguments[0].click();", next_button)
        
        # Wait for the next page to load
        time.sleep(2)  # Adjust this delay if necessary
    except Exception as e:
        print(f"Error clicking the 'Next' button: {e}")
        break  # If "Next" button is not found or not clickable, break the loop

# Close the driver
driver.quit()

# Convert to a DataFrame
if data:
    df = pd.DataFrame(data)
    df = df.drop(0, axis=1)
    df = df.rename(columns={1: 'Symbol', 2: 'Company Name', 3: 'Market Cap', 4: 'Stock Price', 5: 'Percent Change', 6: 'Revenue'})
    df.to_csv("/home/bread/Coding/Finance/src/data/mid_cap.csv", index=False)
    print(df)
    print(f"Total stocks scraped: {len(df)}")
else:
    print("No data collected.")

Error clicking the 'Next' button: Message: 

      Symbol                                       Company Name Market Cap  \
0       VNOM                                 Viper Energy, Inc.      9.99B   
1        AIT              Applied Industrial Technologies, Inc.      9.98B   
2       BRBR                              BellRing Brands, Inc.      9.98B   
3       HSIC                                 Henry Schein, Inc.      9.97B   
4        DBX                                      Dropbox, Inc.      9.96B   
5        DOX                                     Amdocs Limited      9.95B   
6        ALB                              Albemarle Corporation      9.90B   
7        PNW                  Pinnacle West Capital Corporation      9.89B   
8       TTEK                                   Tetra Tech, Inc.      9.86B   
9       MTSI          MACOM Technology Solutions Holdings, Inc.      9.83B   
10       AOS                            A. O. Smith Corporation      9.76B   
11        CR       

# Getting Company Data // Financials, Balance Sheet, Cash Flow, Ratios

In [80]:
import random

dataLinks = [
    "https://stockanalysis.com/stocks/tko/financials/", 
    "https://stockanalysis.com/stocks/tko/financials/balance-sheet/", #/html/body/div/div[1]/div[2]/main/div[2]/nav[1]/ul/li[2]
    "https://stockanalysis.com/stocks/tko/financials/cash-flow-statement/", 
    "https://stockanalysis.com/stocks/tko/financials/ratios/"
]

chrome_options = Options()
chrome_options.add_argument("--headless")

# Set up the WebDriver
driver = webdriver.Chrome(options=chrome_options)

data = []

for link in dataLinks:

    print(link)

    driver.get(link)

    # Wait for the table to be visible
    WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div/div[1]/div[2]/main/div[4]/table/tbody")))

    # Now get all rows from the table
    rows = driver.find_elements(By.XPATH, "/html/body/div/div[1]/div[2]/main/div[4]/table/tbody/tr")

    # Get the header row
    header_row = driver.find_element(By.XPATH, "/html/body/div/div[1]/div[2]/main/div[4]/table/thead/tr[2]")

    # Extract the text from each header cell
    headers = [header.text for header in header_row.find_elements(By.TAG_NAME, "th")]

    # Collect data from each row
    temp = [headers]
    for row in rows:
        columns = row.find_elements(By.TAG_NAME, "td")
        temp.append([column.text for column in columns])

    data.append(temp)

# Close the driver
driver.quit()

financials = pd.DataFrame(data[0])
headers = financials.iloc[0]
financials.columns = [headers]
financials.drop(index=0, axis=0, inplace=True)

balanceSheet = pd.DataFrame(data[1])
headers = balanceSheet.iloc[0]
balanceSheet.columns = [headers]
balanceSheet.drop(index=0, axis=0, inplace=True)

cashFlow = pd.DataFrame(data[2])
headers = cashFlow.iloc[0]
cashFlow.columns = [headers]
cashFlow.drop(index=0, axis=0, inplace=True)

ratios = pd.DataFrame(data[3])
headers = ratios.iloc[0]
ratios.columns = [headers]
ratios.drop(index=0, axis=0, inplace=True)

print(financials)
print(balanceSheet)
print(cashFlow)
print(ratios)

https://stockanalysis.com/stocks/tko/financials/
https://stockanalysis.com/stocks/tko/financials/balance-sheet/
https://stockanalysis.com/stocks/tko/financials/cash-flow-statement/
https://stockanalysis.com/stocks/tko/financials/ratios/
0                             Period Ending Sep 30, 2024 Dec 31, 2023  \
1                                   Revenue        2,776        1,675   
2                      Revenue Growth (YoY)      108.30%       46.91%   
3                           Cost of Revenue       880.24        514.6   
4                              Gross Profit        1,896        1,160   
5                  Selling, General & Admin        1,240       549.09   
6                        Operating Expenses        1,652       713.71   
7                          Operating Income       243.85       446.66   
8                          Interest Expense      -259.47      -239.04   
9          Earnings From Equity Investments         0.76        -0.27   
10            Currency Exchange G

In [None]:
/html/body/main/div/div[4]/div/div/div/div/div[1]/div[2]/div[2]/div[2]/div[1]/table/tbody

In [81]:
financials.rename(columns={'Period Ending':'Markers'})
balanceSheet.rename(columns={'Period Ending':'Markers'})
cashFlow.rename(columns={'Period Ending':'Markers'})
ratios.rename(columns={'Period Ending':'Markers'})

Unnamed: 0,Markers,"Jan 31, 2025","Dec 31, 2023","Dec 31, 2022","Dec 31, 2021","Dec 31, 2020"
1,Market Capitalization,26513,6695,-,-,-
2,Enterprise Value,29044,14335,-,-,-
3,Last Close Price,155.21,81.58,-,-,-
4,Forward PE,62.18,28.74,-,-,-
5,PS Ratio,4.56,4.00,-,-,-
6,PB Ratio,3.11,0.76,-,-,-
7,P/FCF Ratio,40.38,15.95,-,-,-
8,P/OCF Ratio,35.48,14.29,-,-,-
9,PEG Ratio,1.87,1.87,-,-,-
10,EV/Sales Ratio,10.46,8.56,-,-,-


In [159]:
stock  = yf.Ticker('fcuv')
print(stock.info['industryKey'])
# print(stock.info['forwardEps'])
# print(stock.info['trailingEps'])
print(stock.info['heldPercentInsiders'])
print(stock.info['enterpriseToEbitda'])
print(stock.info.keys())

scientific-technical-instruments
5.0093203
-74.956
dict_keys(['address1', 'city', 'state', 'zip', 'country', 'phone', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'fullTimeEmployees', 'companyOfficers', 'auditRisk', 'boardRisk', 'compensationRisk', 'shareHolderRightsRisk', 'overallRisk', 'governanceEpochDate', 'compensationAsOfEpochDate', 'maxAge', 'priceHint', 'previousClose', 'open', 'dayLow', 'dayHigh', 'regularMarketPreviousClose', 'regularMarketOpen', 'regularMarketDayLow', 'regularMarketDayHigh', 'beta', 'volume', 'regularMarketVolume', 'averageVolume', 'averageVolume10days', 'averageDailyVolume10Day', 'bid', 'ask', 'bidSize', 'askSize', 'marketCap', 'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'priceToSalesTrailing12Months', 'fiftyDayAverage', 'twoHundredDayAverage', 'currency', 'enterpriseValue', 'profitMargins', 'floatShares', 'sharesOutstanding', 'sharesShort', 'sharesShortPriorMonth', 'sharesShortPreviousMonthDat

In [147]:
sectors = pd.read_csv("/home/bread/Coding/Finance/src/data/sectors.csv")
sectors = list(sectors["Sectors"])

industries = []

for sector in sectors:
    temp = yf.Sector(sector)
    industryList = temp.industries.index.values
    for industry in industryList:
        industries.append(industry)

sorted(industries)



['advertising-agencies',
 'aerospace-defense',
 'agricultural-inputs',
 'airlines',
 'airports-air-services',
 'aluminum',
 'apparel-manufacturing',
 'apparel-retail',
 'asset-management',
 'auto-manufacturers',
 'auto-parts',
 'auto-truck-dealerships',
 'banks-diversified',
 'banks-regional',
 'beverages-brewers',
 'beverages-non-alcoholic',
 'beverages-wineries-distilleries',
 'biotechnology',
 'broadcasting',
 'building-materials',
 'building-products-equipment',
 'business-equipment-supplies',
 'capital-markets',
 'chemicals',
 'coking-coal',
 'communication-equipment',
 'computer-hardware',
 'confectioners',
 'conglomerates',
 'consulting-services',
 'consumer-electronics',
 'copper',
 'credit-services',
 'department-stores',
 'diagnostics-research',
 'discount-stores',
 'drug-manufacturers-general',
 'drug-manufacturers-specialty-generic',
 'education-training-services',
 'electrical-equipment-parts',
 'electronic-components',
 'electronic-gaming-multimedia',
 'electronics-comput

In [None]:
import time
import requests
import numpy as np
import pandas as pd
import constants as c
import yfinance as yf
import seaborn as sns
import data_collection as data
import matplotlib.pyplot as plt

from lxml import html
from datetime import date
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)


class Correlation():

    def __init__(self, stocks=["AAPL"], date=c.today):
        if isinstance(stocks, list):
            self.stocks = stocks
        else:
            self.stocks = [stocks]
        self.date = date

    def get_correlation(self):
        pass


today = date.today()
today = today.strftime("%Y-%m-%d")
stock = 'MCO'

ticker = yf.Ticker("^IXIC")
df = ticker.history(start='2010-01-01', end=today)
df['NASDAQ'] = df['Close']

stock_list = data.DataScraping().get_volume_leaders()

j = 1
for i in stock_list:
    if j == 10:
        break
    ticker = yf.Ticker(i)
    df1 = ticker.history(start='2010-01-01', end=today)
    df[i] = df1['Close']
    j += 1

df = df.drop(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'], axis=1)
print(df)

fig = plt.figure(figsize=(10, 8))
ax = plt.axes()
ax.set_facecolor('#faf0e6')
fig.patch.set_facecolor('#faf0e6')
cor = df.corr()
sns.heatmap(cor, annot=True, cmap="Blues")
plt.savefig('heat_map.jpg')
plt.show()
