In [24]:
import numpy as np
import pandas as pd
import requests

#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.express as px

import time
from datetime import date

In [25]:
def get_ipo_data(target):
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    }

    url = "https://stockanalysis.com/ipos/" + target + "/"

    # header is not necessary here
    response = requests.get(url, headers=headers)

    ipo_raw = pd.read_html(response.text)
    
    return ipo_raw[0]

In [26]:
df_ipo_filings = get_ipo_data('filings')
df_ipo_filings.head(20)

  ipo_raw = pd.read_html(response.text)


Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,"Apr 26, 2024",EURK,Eureka Acquisition Corp,$10.00,5000000
1,"Apr 26, 2024",HDL,Super Hi International Holding Ltd.,-,-
2,"Apr 22, 2024",DRJT,Derun Group Inc,$5.00,-
3,"Apr 19, 2024",GPAT,GP-Act III Acquisition Corp.,$10.00,25000000
4,"Apr 16, 2024",GGL,Games Global Limited,-,-
5,"Apr 16, 2024",JLJT,Jialiang Holdings Ltd,$5.00,-
6,"Apr 15, 2024",GAUZ,Gauzy Ltd.,-,-
7,"Apr 12, 2024",BOW,Bowhead Specialty Holdings Inc.,-,-
8,"Apr 5, 2024",SPHL,Springview Holdings Ltd,$4.00 - $5.00,2000000
9,"Apr 2, 2024",FFFZ,Fuxing China Group Limited,$4.00 - $4.50,2000000


In [27]:
df_ipo_filings.dtypes

Filing Date       object
Symbol            object
Company Name      object
Price Range       object
Shares Offered    object
dtype: object

# Q1

In [28]:
df_ipo_filings['Filing Date'] = pd.to_datetime(df_ipo_filings['Filing Date'])
df_ipo_filings['Shares Offered'] = pd.to_numeric(df_ipo_filings['Shares Offered'], errors='coerce').fillna(0)

In [29]:
def average_price_range(row):
    if row == "-":
        return np.NaN
    elif '-' in row:
        prices = row.replace('$','').split(' - ')
        prices = [float(item) for item in prices]

        return sum(prices)/len(prices)
    elif '$' in row:
        return float(row.replace('$',''))
    else:
        return row

In [30]:
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0
1,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0
2,2024-04-22,DRJT,Derun Group Inc,$5.00,0.0
3,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0
4,2024-04-16,GGL,Games Global Limited,-,0.0
...,...,...,...,...,...
321,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0
322,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0
323,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0
324,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0


In [31]:
df_ipo_filings['avg_price'] = df_ipo_filings['Price Range'].apply(average_price_range).fillna(0)
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,avg_price
0,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.00
1,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0,0.00
2,2024-04-22,DRJT,Derun Group Inc,$5.00,0.0,5.00
3,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0,10.00
4,2024-04-16,GGL,Games Global Limited,-,0.0,0.00
...,...,...,...,...,...,...
321,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0,9.00
322,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0,10.00
323,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0,9.00
324,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0,4.00


In [32]:
df_ipo_filings['shares_offered_value'] = df_ipo_filings['Shares Offered'] * df_ipo_filings['avg_price']
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,avg_price,shares_offered_value
0,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.00,50000000.0
1,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0,0.00,0.0
2,2024-04-22,DRJT,Derun Group Inc,$5.00,0.0,5.00,0.0
3,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0,10.00,250000000.0
4,2024-04-16,GGL,Games Global Limited,-,0.0,0.00,0.0
...,...,...,...,...,...,...,...
321,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0,9.00,13500000.0
322,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0,10.00,50000000.0
323,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0,9.00,22500000.0
324,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0,4.00,17200000.0


In [33]:
df_ipo_fri_2023 = df_ipo_filings[(df_ipo_filings['Filing Date'].dt.dayofweek == 4) & (df_ipo_filings['Filing Date'].dt.year == 2023)]
q1_ans = df_ipo_fri_2023.groupby('Filing Date').agg(totalsum=('shares_offered_value', 'sum')).reset_index()

In [34]:
q1_ans['totalsum_M'] = round(q1_ans['totalsum']/1e6)
q1_ans.totalsum_M.sum()

285.0

# Q2

In [4]:
df_ipo_2023 = get_ipo_data("2023")
df_ipo_2024 = get_ipo_data("2024")

  ipo_raw = pd.read_html(response.text)
  ipo_raw = pd.read_html(response.text)


In [5]:
df_ipo_2024.head()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"Apr 26, 2024",ZONE,"CleanCore Solutions, Inc.",$4.00,$3.20,-20.00%
1,"Apr 25, 2024",RBRK,"Rubrik, Inc.",$32.00,$34.76,9.38%
2,"Apr 25, 2024",LOAR,Loar Holdings Inc.,$28.00,$50.60,78.04%
3,"Apr 25, 2024",MRX,Marex Group plc,$19.00,$19.34,1.79%
4,"Apr 23, 2024",NCI,Neo-Concept International Group Holdings Limited,$4.00,$1.31,-63.75%


In [6]:
df_ipo = pd.concat([df_ipo_2023, df_ipo_2024], axis=0)
df_ipo = df_ipo[df_ipo['Symbol'] != 'RYZB']

In [7]:
df_ipo[df_ipo['Symbol'] == 'IROH']

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"Dec 27, 2023",IROH,Iron Horse Acquisitions Corp.,$10.00,$10.05,0.50%


In [9]:
ticket_list = df_ipo['Symbol'].to_list()
ticket_list

['IROH',
 'LGCB',
 'ZKH',
 'BAYA',
 'INHD',
 'AFJK',
 'GSIW',
 'FEBO',
 'CLBR',
 'ELAB',
 'RR',
 'DDC',
 'SHIM',
 'GLAC',
 'SGN',
 'HG',
 'CRGX',
 'ANSC',
 'AITR',
 'GVH',
 'LXEO',
 'PAPL',
 'ATGL',
 'MNR',
 'WBUY',
 'NCL',
 'BIRK',
 'GMM',
 'LRHC',
 'PMEC',
 'GPAK',
 'SPKL',
 'QETA',
 'MSS',
 'ANL',
 'SYRA',
 'VSME',
 'LRE',
 'TURB',
 'MDBH',
 'KVYO',
 'CART',
 'DTCK',
 'NMRA',
 'ARM',
 'SPPL',
 'NWGL',
 'SWIN',
 'IVP',
 'NNAG',
 'SRM',
 'SPGC',
 'LQR',
 'NRXS',
 'FTEL',
 'MIRA',
 'PXDT',
 'CTNT',
 'HRYU',
 'SRFM',
 'PRZO',
 'HYAC',
 'KVAC',
 'JNVR',
 'ELWS',
 'WRNT',
 'TSBX',
 'ODD',
 'APGE',
 'NETD',
 'SGMT',
 'BOWN',
 'SXTP',
 'PWM',
 'VTMX',
 'INTS',
 'SVV',
 'KGS',
 'FIHL',
 'GENK',
 'BUJA',
 'BOF',
 'AZTR',
 'CAVA',
 'ESHA',
 'ATMU',
 'ATS',
 'IPXX',
 'CWD',
 'SGE',
 'SLRN',
 'ALCY',
 'KVUE',
 'GODN',
 'TRNR',
 'AACT',
 'JYD',
 'USGO',
 'UCAR',
 'WLGS',
 'TCJH',
 'TPET',
 'GDTC',
 'VCIG',
 'GDHG',
 'ARBB',
 'ISPR',
 'MGIH',
 'MWG',
 'HSHP',
 'SFWL',
 'SYT',
 'HKIT',
 'CHSN',
 'T

In [19]:
df_iroh = yf.download(tickers='IROH', period='max', interval='1d')

[*********************100%%**********************]  1 of 1 completed


In [10]:
def download_ipo_data(ticket_list):
    dataframes = {}

    for i in ticket_list:
        df = yf.download(tickers=i, period='max', interval='1d')

        if not df.empty:
            dataframes[f'df_{i}'] = df
        else:
            print(f'No data found for {i}')

    return dataframes

In [11]:
stock_data_dfs = download_ipo_data(ticket_list)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

No data found for PTHR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

No data found for IBAC



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BKHA']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed


No data found for BKHA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [18]:
stock_data_dfs

{'df_IROH':               Open    High     Low   Close  Adj Close  Volume  \
 Date                                                            
 2024-02-16  10.050  10.050  10.010  10.010     10.010   16700   
 2024-02-20  10.020  10.020  10.020  10.020     10.020    5200   
 2024-02-21  10.020  10.020  10.015  10.015     10.015   98600   
 2024-02-22  10.020  10.020  10.020  10.020     10.020    5600   
 2024-02-23  10.020  10.020  10.010  10.010     10.010   14800   
 2024-02-26  10.020  10.020  10.020  10.020     10.020   15300   
 2024-02-27  10.010  10.015  10.010  10.010     10.010   74200   
 2024-02-28  10.020  10.020   9.970   9.980      9.980  138200   
 2024-02-29   9.985   9.985   9.980   9.980      9.980    8100   
 2024-03-01   9.980   9.985   9.980   9.980      9.980    5000   
 2024-03-04   9.980   9.985   9.980   9.985      9.985    2100   
 2024-03-05   9.985   9.985   9.985   9.985      9.985       0   
 2024-03-06   9.985   9.985   9.980   9.980      9.980  105000   

In [39]:
import pandas as pd

# Define the columns
columns = ['symbol', 'optimal_day', 'highest_quantile']

# Create an empty list to store data rows
data_rows = []

# Assuming stock_data_dfs is a dictionary of DataFrames, one for each symbol
for symbol, df in stock_data_dfs.items():
    # Calculate growth rates
    for i in range(1, 31):
        df['growth_future_' + str(i) + 'd'] = df['Adj Close'] / df['Adj Close'].shift(-i)

    # List of growth columns names
    growth_columns = [f'growth_future_{i}d' for i in range(1, 31)]
    
    # Calculate the 75th quantile for each growth column
    quantiles_75th = df[growth_columns].quantile(0.75)
    
    # Identify the day with the highest 75th quantile
    optimal_day = quantiles_75th.idxmax()
    highest_quantile = quantiles_75th.max()

    # Append a new dictionary to the list
    data_rows.append({
        'symbol': symbol,
        'optimal_day': optimal_day,
        'highest_quantile': highest_quantile
    })

# Convert list of dictionaries to a DataFrame
q2_ans = pd.DataFrame(data_rows)


      symbol        optimal_day  highest_quantile
0    df_IROH   growth_future_6d          1.001002
1    df_LGCB  growth_future_11d          1.073686
2     df_ZKH  growth_future_28d          1.206940
3    df_BAYA  growth_future_26d          1.005958
4    df_INHD  growth_future_27d          1.630167
..       ...                ...               ...
208  df_PSBD   growth_future_6d          1.007646
209  df_CCTG  growth_future_29d          1.572460
210  df_SYNX  growth_future_10d          1.085731
211  df_SDHC  growth_future_27d          1.084823
212  df_ROMA  growth_future_29d          1.636986

[213 rows x 3 columns]


In [41]:
q2_ans[q2_ans['highest_quantile'] == q2_ans.highest_quantile.max()]

Unnamed: 0,symbol,optimal_day,highest_quantile
205,df_JL,growth_future_30d,15.063694
