# Question 1

Question 1. What is the total withdrawn IPO value (in $ millions) for the company class with the highest total withdrawal value?

In [6]:
import pandas as pd
import numpy as np
import requests
from io import StringIO


pd.set_option("display.float_format", "{:,.2f}".format)   # pretty printing

In [7]:

def get_withdrawn() -> pd.DataFrame:
    """
    Returns a DataFrame containing the withdrawn IPOs.
    """
    url = f"https://stockanalysis.com/ipos/withdrawn/"
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Wrap HTML text in StringIO to avoid deprecation warning
        # "Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object."
        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No tables found for year {year}.")

        return tables[0]

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except ValueError as ve:
        print(f"Data error: {ve}")
    except Exception as ex:
        print(f"Unexpected error: {ex}")

    return pd.DataFrame()

In [10]:

withdrawn_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Symbol          100 non-null    object
 1   Company Name    100 non-null    object
 2   Price Range     100 non-null    object
 3   Shares Offered  100 non-null    object
dtypes: object(4)
memory usage: 3.3+ KB


In [9]:

withdrawn_df = get_withdrawn()
withdrawn_df.head(10)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-
5,HPOT,The Great Restaurant Development Holdings Limited,$4.00 - $6.00,1400000
6,CABR,"Caring Brands, Inc.",$4.00,750000
7,SQVI,"Sequoia Vaccines, Inc.",$8.00 - $10.00,2775000
8,SNI,Shenni Holdings Limited,$4.00 - $6.00,3000000
9,KMCM,Key Mining Corp.,$2.25,4444444


In [None]:

# Shares Offered  → numeric
withdrawn_df["Shares Offered"] = (
    withdrawn_df["Shares Offered"]
        .astype(str)
        .str.replace(",", "", regex=False)
        .replace({"–": np.nan, "-": np.nan, "—": np.nan, "": np.nan})
        .astype(float)
)

# Ensure Price Range is object (string) for parsing
withdrawn_df["Price Range"] = withdrawn_df["Price Range"].astype(str)

withdrawn_df




Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000.00
2,AURN,"Aurion Biotech, Inc.",-,
3,ROTR,"PHI Group, Inc.",-,
4,ONE,One Power Company,-,
...,...,...,...,...
95,FHP,"Freehold Properties, Inc.",-,
96,CHO,Chobani Inc.,-,
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231.00
98,GLGX,"Gerson Lehrman Group, Inc.",-,


In [15]:


def classify_company(name: str) -> str:
    """
    Map company names to classes in *priority* order:
    1) Acquisition Corp(/oration)  → Acq.Corp
    2) Inc(orporated)              → Inc
    3) Group                       → Group
    4) Ltd/Limited                 → Limited
    5) Holdings                    → Holdings
    else                           → Other
    """
    n = name.lower()
    # use word boundaries so 'inc' doesn't match 'income'
    patterns = [
        (r"\bacquisition corp(oration)?\b", "Acq.Corp"),
        (r"\binc(?:\.|orporated)?\b",        "Inc"),
        (r"\bgroup\b",                       "Group"),
        (r"\b(ltd|limited)\b",               "Limited"),
        (r"\bholdings?\b",                   "Holdings"),
    ]
    for pat, cls in patterns:
        if re.search(pat, n):
            return cls
    return "Other"

withdrawn_df["Company Class"] = withdrawn_df["Company Name"].apply(classify_company)
withdrawn_df["Company Class"].value_counts()


Company Class
Inc         51
Acq.Corp    21
Limited     17
Other        6
Group        4
Holdings     1
Name: count, dtype: int64

In [None]:


# 
def price_range_to_avg(price_range):
    if pd.isna(price_range):
        return np.nan
    try:
        parts = str(price_range).replace('$', '').split('-')
        if len(parts) == 2:
            low, high = map(float, parts)
            return (low + high) / 2
        else:
            return float(parts[0])
    except Exception:
        return np.nan

withdrawn_df['Avg Price'] = withdrawn_df['Price Range'].apply(price_range_to_avg)

withdrawn_df


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg Price
26,NVL,Novelis Inc.,$18.00 - $21.00,45000000.00,Inc,19.50
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231.00,Inc,19.50
99,HCG,hear.com N.V.,$17.00 - $20.00,16220000.00,Other,18.50
41,GGL,Games Global Limited,$16.00 - $19.00,14500000.00,Limited,17.50
52,TFG,"The Fortegra Group, Inc.",$15.00 - $18.00,18000000.00,Inc,16.50
...,...,...,...,...,...,...
98,GLGX,"Gerson Lehrman Group, Inc.",-,,Inc,
49,COFO,Coforge Limited,-,,Limited,
54,VNG,VNG Limited,-,21687082.00,Limited,
71,PICS,PicS Ltd.,-,,Limited,


In [34]:
withdrawn_df[["Price Range", "Avg Price"]].head(6)

Unnamed: 0,Price Range,Avg Price
26,$18.00 - $21.00,19.5
97,$18.00 - $21.00,19.5
99,$17.00 - $20.00,18.5
41,$16.00 - $19.00,17.5
52,$15.00 - $18.00,16.5
86,$13.00 - $15.00,14.0


In [44]:


# Then convert to numeric with coercion
withdrawn_df['Shares Offered'] = pd.to_numeric(withdrawn_df['Shares Offered'], errors='coerce')

# Ensure the dtype is float64 explicitly
withdrawn_df['Shares Offered'] = withdrawn_df['Shares Offered'].astype('float64')


withdrawn_df["Withdrawn Value"] = (
    withdrawn_df["Shares Offered"] * withdrawn_df["Avg Price"] / 1_000_000
)

print("Non-null value rows:", withdrawn_df["Withdrawn Value"].notna().sum())

withdrawn_df

Non-null value rows: 71


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg Price,Withdrawn Value ($m),Withdrawn Value
26,NVL,Novelis Inc.,$18.00 - $21.00,45000000.00,Inc,19.50,877.50,877.50
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231.00,Inc,19.50,600.00,600.00
99,HCG,hear.com N.V.,$17.00 - $20.00,16220000.00,Other,18.50,300.07,300.07
41,GGL,Games Global Limited,$16.00 - $19.00,14500000.00,Limited,17.50,253.75,253.75
52,TFG,"The Fortegra Group, Inc.",$15.00 - $18.00,18000000.00,Inc,16.50,297.00,297.00
...,...,...,...,...,...,...,...,...
98,GLGX,"Gerson Lehrman Group, Inc.",-,,Inc,,,
49,COFO,Coforge Limited,-,,Limited,,,
54,VNG,VNG Limited,-,21687082.00,Limited,,,
71,PICS,PicS Ltd.,-,,Limited,,,


In [40]:

# group by company class
withdrawn_df.groupby('Company Class')['Withdrawn Value'].sum()




Company Class
Acq.Corp   4,021.00
Group         33.79
Holdings      75.00
Inc        2,257.16
Limited      549.73
Other        767.92
Name: Withdrawn Value, dtype: float64


# Question 2

Question 2. What is the median Sharpe ratio (as of 6 June 2025) for companies that went public in the first 5 months of 2024? 

In [47]:

import pandas as pd
import requests
from io import StringIO

def get_ipos_by_year(year: int) -> pd.DataFrame:
    """
    Fetch IPO data for the given year from stockanalysis.com.
    """
    url = f"https://stockanalysis.com/ipos/{year}/"
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Wrap HTML text in StringIO to avoid deprecation warning
        # "Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object."
        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No tables found for year {year}.")

        return tables[0]

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except ValueError as ve:
        print(f"Data error: {ve}")
    except Exception as ex:
        print(f"Unexpected error: {ex}")

    return pd.DataFrame()


# Load IPO data for 2024, filter to first 5 months
ipos = get_ipos_by_year(2024)
ipos['IPO Date'] = pd.to_datetime(ipos['IPO Date'], errors='coerce')

ipos = ipos[ipos['IPO Date'] < '2024-06-01']

ipos



Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
148,2024-05-31,NAKA,"Kindly MD, Inc.",-,$14.22,-
149,2024-05-23,BOW,Bowhead Specialty Holdings Inc.,$17.00,$36.49,114.65%
150,2024-05-17,HDL,Super Hi International Holding Ltd.,$19.56,$18.70,-4.40%
151,2024-05-17,RFAI,RF Acquisition Corp II,$10.00,$10.60,6.00%
152,2024-05-15,JDZG,JIADE Limited,$4.00,$0.26,-93.40%
...,...,...,...,...,...,...
220,2024-01-18,CCTG,CCSC Technology International Holdings Limited,$6.00,$1.07,-82.17%
221,2024-01-18,PSBD,Palmer Square Capital BDC Inc.,$16.45,$14.50,-11.85%
222,2024-01-12,SYNX,Silynxcom Ltd.,$4.00,$2.07,-48.25%
223,2024-01-11,SDHC,Smith Douglas Homes Corp.,$21.00,$18.26,-13.05%


In [48]:

tickers = ipos['Symbol'].unique().tolist()

tickers



['NAKA',
 'BOW',
 'HDL',
 'RFAI',
 'JDZG',
 'RAY',
 'BTOC',
 'ZK',
 'GPAT',
 'PAL',
 'SVCO',
 'NNE',
 'CCIX',
 'VIK',
 'ZONE',
 'LOAR',
 'MRX',
 'RBRK',
 'NCI',
 'MFI',
 'YYGH',
 'TRSG',
 'CDTG',
 'CTRI',
 'IBTA',
 'MTEN',
 'SUPX',
 'TWG',
 'ULS',
 'PACS',
 'MNDR',
 'CTNM',
 'MAMO',
 'ZBAO',
 'BOLD',
 'MMA',
 'UBXG',
 'IBAC',
 'AUNA',
 'BKHA',
 'LOBO',
 'RDDT',
 'ALAB',
 'INTJ',
 'RYDE',
 'LGCL',
 'SMXT',
 'VHAI',
 'DYCQ',
 'CHRO',
 'UMAC',
 'HLXB',
 'MGX',
 'TBBB',
 'TELO',
 'KYTX',
 'PMNT',
 'AHR',
 'LEGT',
 'ANRO',
 'GUTS',
 'AS',
 'FBLG',
 'AVBP',
 'BTSG',
 'HAO',
 'CGON',
 'YIBO',
 'JL',
 'SUGP',
 'JVSA',
 'KSPI',
 'CCTG',
 'PSBD',
 'SYNX',
 'SDHC',
 'ROMA']

In [50]:

import yfinance as yf

START, END = "2024-01-01", "2025-06-07"

# 1) Bulk download
raw = yf.download(" ".join(tickers), start=START, end=END,
                  group_by="column", threads=True, progress=False)

# 2) Dict of DataFrames (one per ticker)
stocks = {
    sym: raw.xs(sym, axis=1, level=1).dropna(how='all')
    for sym in tickers
    if sym in raw.columns.get_level_values(1)
}

# 3) Log tickers with no valid rows
missing = [s for s, df in stocks.items() if df.empty]
if missing:
    print("No price data returned for:", ", ".join(missing))
    for m in missing:
        stocks.pop(m)

# `stocks` is now a clean {ticker: OHLCV DataFrame} dictionary
print(f"Loaded {len(stocks)} tickers")

YF.download() has changed argument auto_adjust default to True
Loaded 77 tickers


In [None]:

import numpy as np

def sharpe_ratio(adj_series: pd.Series, rf_daily: float = 0.0) -> float:
    """
    Annualised Sharpe ratio from a daily *Adj Close* price series.

    Parameters
    ----------
    adj_series : pd.Series
        Daily Adjusted Close prices with a DateTimeIndex.
    rf_daily   : float, optional
        Daily risk-free rate (decimal).  Set to 0.0 if you have no series.

    Returns
    -------
    float
        Annualised Sharpe ratio (√252 factor).  np.nan if insufficient data.
    """
    r = adj_series.pct_change().dropna()
    if r.empty:
        return np.nan
    mu, sigma = r.mean(), r.std()
    return (mu - rf_daily) / sigma * np.sqrt(252)

 

In [64]:
sharpes = {}

for sym, df in stocks.items():
    if "Close" not in df.columns:         # defensive
        continue
    sr = sharpe_ratio(df["Close"])
    sharpes[sym] = sr

sharpes = pd.Series(sharpes, name="Sharpe")
print(sharpes.sort_values(ascending=False).head())


AHR    2.90
RFAI   2.73
GPAT   2.25
ULS    2.25
BKHA   2.23
Name: Sharpe, dtype: float64


In [65]:

median_sharpe = sharpes.median()
mean_sharpe   = sharpes.mean()

print(f"\nSharpe ratios computed for {sharpes.notna().sum()} tickers")
print(f"Median Sharpe : {median_sharpe:.2f}")
print(f"Mean   Sharpe : {mean_sharpe:.2f}")



Sharpe ratios computed for 77 tickers
Median Sharpe : 0.21
Mean   Sharpe : 0.43
