4. Data Loader and Builder



4.1 MetaData Sourcing

In [34]:
import pandas as pd
import os

IPO_META_FOLDER = os.path.join("IPO_Meta")
os.makedirs(IPO_META_FOLDER, exist_ok=True)
IPO_METADATA_PATH = os.path.join(IPO_META_FOLDER, "ipo_metadata.csv")

# ===== CORRECTED DATA: 40 Mainboard IPOs =====

# HIGH SUBSCRIPTION IPOs (20)
company_names_high = [
    "Mamata Machinery", "Unimech Aerospace", "Mobikwik Systems", "Senores Pharmaceuticals",
    "Transrail Lighting", "Waaree Energies", "Tata Technologies", "Azad Engineering",
    "Protean eGov Technologies", "Motisons Jewellers", "Jyoti CNC Automation", "DOMS Industries",
    "Eternal", "Bharti Hexacom", "Suraj Estate Developers", "Paytm",
    "Bajaj Housing Finance", "Awfis Space Solutions", "Aadhar Housing Finance", "Nykaa"
]

tickers_high = [
    "MAMATA.NS", "UNIMECH.NS", "MOBIKWIK.NS", "SENORES.NS",
    "TRANSRAILL.NS", "WAAREEENER.NS", "TATATECH.NS", "AZAD.NS",
    "PROTEAN.BO", "MOTISONS.NS", "JYOTICNC.NS", "DOMS.NS",
    "ETERNAL.NS", "BHARTIHEXA.NS", "SURAJEST.NS", "PAYTM.NS",
    "BAJAJHFL.NS", "AWFIS.NS", "AADHARHFC.NS", "NYKAA.NS"
]

listing_dates_high = [
    "2024-12-27", "2024-12-31", "2024-12-18", "2024-12-30",
    "2024-12-27", "2024-10-28", "2023-11-30", "2024-01-19",
    "2024-02-14", "2024-12-20", "2024-01-12", "2023-12-20",
    "2021-07-23", "2024-04-12", "2024-06-14", "2021-11-18",
    "2024-09-16", "2024-05-30", "2024-05-15", "2021-11-10"
]

# LOW SUBSCRIPTION IPOs (20)
company_names_low = [
    "Hyundai Motor India", "Swiggy", "Sagility India", "Niva Bupa Health Insurance",
    "Ola Electric Mobility", "Delhivery", "EaseMyTrip", "Carraro India",
    "Suraksha Diagnostic", "Indo Farm Equipment", "CarTrade Tech", "Sanathan Textiles",
    "Nazara Tech", "Latent View", "Le Travenues Technology", "IRCTC",
    "Fino Payments Bank", "Indiamart Intermesh", "Stanley Lifestyles", "Akums Drugs"
]

tickers_low = [
    "HYUNDAI.NS", "SWIGGY.NS", "SAGILITY.NS", "NIVABUPA.NS",
    "OLAELEC.NS", "DELHIVERY.NS", "EASEMYTRIP.NS", "CARRARO.NS",
    "SURAKSHA.NS", "INDOFARM.NS", "CARTRADE.NS", "SANATHAN.NS",
    "NAZARA.NS", "LATENTVIEW.NS", "IXIGO.NS", "IRCTC.NS",
    "FINOPB.NS", "INDIAMART.NS", "STANLEY.NS", "AKUMS.NS"
]

listing_dates_low = [
    "2024-10-22", "2024-11-13", "2024-11-11", "2024-11-11",
    "2024-08-09", "2022-05-24", "2021-03-19", "2024-12-24",
    "2024-12-06", "2024-12-31", "2021-08-20", "2024-12-27",
    "2021-03-30", "2021-11-23", "2024-06-18", "2019-10-14",
    "2021-10-29", "2019-07-04", "2024-06-28", "2024-07-31"
]

# Combine both samples
company_names = company_names_high + company_names_low
tickers = tickers_high + tickers_low
listing_dates = listing_dates_high + listing_dates_low
subscription_category = ["High"] * 20 + ["Low"] * 20

# Create DataFrame
df_meta = pd.DataFrame({
    "company_name": company_names,
    "ticker": tickers,
    "listing_date": listing_dates,
    "subscription_category": subscription_category
})

# Save to CSV
df_meta.to_csv(IPO_METADATA_PATH, index=False)

print(f"✓ IPO Metadata saved to: {IPO_METADATA_PATH}")
print(f"✓ Total IPOs: {len(df_meta)}")
print(f"✓ High Subscription: {len(df_meta[df_meta['subscription_category'] == 'High'])}")
print(f"✓ Low Subscription: {len(df_meta[df_meta['subscription_category'] == 'Low'])}")
print("\nFirst 5 rows:")
print(df_meta.head())
print("\nLast 5 rows:")
print(df_meta.tail())

✓ IPO Metadata saved to: IPO_Meta\ipo_metadata.csv
✓ Total IPOs: 40
✓ High Subscription: 20
✓ Low Subscription: 20

First 5 rows:
              company_name         ticker listing_date subscription_category
0         Mamata Machinery      MAMATA.NS   2024-12-27                  High
1        Unimech Aerospace     UNIMECH.NS   2024-12-31                  High
2         Mobikwik Systems    MOBIKWIK.NS   2024-12-18                  High
3  Senores Pharmaceuticals     SENORES.NS   2024-12-30                  High
4       Transrail Lighting  TRANSRAILL.NS   2024-12-27                  High

Last 5 rows:
           company_name        ticker listing_date subscription_category
35                IRCTC      IRCTC.NS   2019-10-14                   Low
36   Fino Payments Bank     FINOPB.NS   2021-10-29                   Low
37  Indiamart Intermesh  INDIAMART.NS   2019-07-04                   Low
38   Stanley Lifestyles    STANLEY.NS   2024-06-28                   Low
39          Akums Drugs      

4.2 Imports & Configs

In [35]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import warnings

#Suppressing warnings here to keep notebook output clean
warnings.filterwarnings("ignore")

#Displaying Settings for Clean Tables
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 50)

print ("Section 4.2 Loaded Successfully")



Section 4.2 Loaded Successfully


4.3 IPO MetaData Loading



In [36]:

import pandas as pd

# Load metadata
try:
    ipo_df = pd.read_csv(IPO_METADATA_PATH)
    print("IPO metadata loaded successfully.\n")
except FileNotFoundError:
    print("File not found. Please check path and load again.")
    raise

# Preview data
display(ipo_df.head())

# Validating essential columns
required_cols = ["company_name", "ticker", "listing_date", "subscription_category"]


print("\nValidating required columns...\n")
missing_cols = [c for c in required_cols if c not in ipo_df.columns]

if missing_cols:
    print("Missing columns:", missing_cols)
    raise Exception("Metadata file incomplete. Fix before proceeding.")

print("All required columns present.\n")

# Convert date column into proper datetime format
ipo_df["listing_date"] = pd.to_datetime(ipo_df["listing_date"])

# Sort by listing date for cleaner processing
ipo_df = ipo_df.sort_values("listing_date").reset_index(drop=True)

print("Section 4.3 completed successfully.")
display(ipo_df.head())



IPO metadata loaded successfully.



Unnamed: 0,company_name,ticker,listing_date,subscription_category
0,Mamata Machinery,MAMATA.NS,2024-12-27,High
1,Unimech Aerospace,UNIMECH.NS,2024-12-31,High
2,Mobikwik Systems,MOBIKWIK.NS,2024-12-18,High
3,Senores Pharmaceuticals,SENORES.NS,2024-12-30,High
4,Transrail Lighting,TRANSRAILL.NS,2024-12-27,High



Validating required columns...

All required columns present.

Section 4.3 completed successfully.


Unnamed: 0,company_name,ticker,listing_date,subscription_category
0,Indiamart Intermesh,INDIAMART.NS,2019-07-04,Low
1,IRCTC,IRCTC.NS,2019-10-14,Low
2,EaseMyTrip,EASEMYTRIP.NS,2021-03-19,Low
3,Nazara Tech,NAZARA.NS,2021-03-30,Low
4,Eternal,ETERNAL.NS,2021-07-23,High


In [37]:
import os
from datetime import timedelta
import pandas as pd
import numpy as np
import yfinance as yf
import warnings
warnings.filterwarnings("ignore")

# Paths (update only if you changed folder names)
META_PATH = os.path.join("..", "Notebooks", "IPO_Meta", "ipo_metadata.csv")   # <-- metadata CSV you created
OUT_PROCESSED = os.path.join("..", "Notebooks", "IPO_Meta","meta_output_processed.csv")
os.makedirs(OUT_PROCESSED, exist_ok=True)

OUT_DAILY_MASTER = os.path.join(OUT_PROCESSED, "df_stock_master.csv")
OUT_POOLED_MASTER = os.path.join(OUT_PROCESSED, "df_pooled_master.csv")
OUT_SAMPLE1 = os.path.join(OUT_PROCESSED, "sample1_final.csv")
OUT_SAMPLE2 = os.path.join(OUT_PROCESSED, "sample2_final.csv")

# Global fetch window (broad enough to cover event windows)
GLOBAL_START = "2019-01-01"
GLOBAL_END   = "2025-02-15"
EVENT_WINDOW_DAYS = 30  # 30-day post-listing as decided 
print("Metadata path (expected):", META_PATH)


Metadata path (expected): ..\Notebooks\IPO_Meta\ipo_metadata.csv


4.4 Data Loader Functions

In [39]:
import yfinance as yf
import pandas as pd

# 1. Fetch data for a single stock
def fetch_stock_data(ticker, start_date, end_date):
    """
    Fetch historical OHLCV data for a single ticker.
    Includes error handling so pipeline never breaks.
    """
    try:
        df = yf.download(ticker, start=start_date, end=end_date, progress=False, threads=False)

        if df is None or df.empty:
            print(f" Warning: No data found for {ticker}.")
            return None

        df = df.reset_index()
        return df

    except Exception as e:
        print(f" Error fetching {ticker}: {e}")
        return None

 
# 2. Fetch benchmark (NIFTY)
 
def fetch_benchmark(start_date, end_date, index_ticker="^NSEI"):
    """
    Fetch benchmark index used for CAR calculations.
    Using NIFTY (^NSEI).
    """
    try:
        df = yf.download(index_ticker, start=start_date, end=end_date, progress=False, threads=False)

        if df is None or df.empty:
            print(" Warning: No benchmark data found.")
            return None

        df = df.reset_index()
        return df

    except Exception as e:
        print(f" Error fetching benchmark: {e}")
        return None



In [40]:
#SOLUTION FOR: YFTzMissingError('possibly delisted; no timezone found')


import yfinance as yf
import pandas as pd
from datetime import timedelta

def fetch_stock_data_robust(ticker, start_date, end_date, max_retries=2):
    """
    Robust stock data fetcher with multiple fallback strategies.
    
    Strategy:
    1. Try NSE ticker (.NS)
    2. If fails, try BSE ticker (.BO)
    3. If still fails, use alternative download parameters
    """
    
    attempts = [
        ticker,  # Original ticker
        ticker.replace('.NS', '.BO'),  # Try BSE if NSE fails
    ]
    
    for attempt_ticker in attempts:
        for retry in range(max_retries):
            try:
                # Method 1: Standard download
                df = yf.download(
                    attempt_ticker, 
                    start=start_date, 
                    end=end_date, 
                    progress=False,
                    auto_adjust=False,  # Explicitly set to False for consistency
                    threads=False
                )
                
                if df is not None and not df.empty:
                    df = df.reset_index()
                    print(f"✓ Successfully fetched: {attempt_ticker}")
                    return df
                    
            except Exception as e:
                if "YFTzMissingError" in str(e) or "timezone" in str(e).lower():
                    # Try alternative method for timezone issues
                    try:
                        stock = yf.Ticker(attempt_ticker)
                        df = stock.history(start=start_date, end=end_date, auto_adjust=False)
                        if not df.empty:
                            df = df.reset_index()
                            print(f"✓ Fetched via .history(): {attempt_ticker}")
                            return df
                    except:
                        pass
                
                if retry == max_retries - 1 and attempt_ticker == attempts[-1]:
                    print(f"❌ All attempts failed for {ticker}: {str(e)[:100]}")
                    
    return None


def fetch_benchmark_robust(start_date, end_date, index_ticker="^NSEI"):
    """
    Robust benchmark fetcher with fallback to BSE Sensex if NIFTY fails.
    """
    indexes = [index_ticker, "^BSESN"]  # NIFTY 50, then BSE Sensex
    
    for idx in indexes:
        try:
            df = yf.download(idx, start=start_date, end=end_date, progress=False, threads=False)
            if df is not None and not df.empty:
                df = df.reset_index()
                print(f"✓ Benchmark loaded: {idx}")
                return df
        except Exception as e:
            print(f"⚠️ {idx} failed: {str(e)[:50]}")
            continue
    
    raise Exception("Could not fetch any benchmark index (NIFTY or Sensex)")



In [41]:
TICKER_CORRECTIONS = {
    # Wrong -> Correct mapping
    'TRANSRAIL.NS': 'TRANSRAILL.NS',  # Double L
    'WAAREE.NS': 'WAAREEENER.NS',     # Full company name
    'PROTEAN.NS': 'PROTEAN.BO'       # BSE works better
   }

def apply_ticker_corrections(ticker):
    """Apply known ticker corrections before fetching."""
    return TICKER_CORRECTIONS.get(ticker, ticker)


4.5 Small Utility Helpers

In [42]:
def flatten_multiindex_columns_if_needed(df):
    """If df has multiindex columns, flatten to single level."""
    if isinstance(df.columns, pd.MultiIndex):
        # Take only the first level (the actual column names)
        df.columns = df.columns.get_level_values(0)
    return df

def pick_price_column_and_rename(df, target_name="Adj_Close"):
    # Check for exact matches first
    if "Adj Close" in df.columns:
        df = df.rename(columns={"Adj Close": target_name})
        return df
    elif "Close" in df.columns:
        df = df.rename(columns={"Close": target_name})
        return df
    
    # Fallback: search for columns containing these terms
    adj_cols = [c for c in df.columns if "adj" in str(c).lower() and "close" in str(c).lower()]
    close_cols = [c for c in df.columns if "close" in str(c).lower()]
    
    if adj_cols:
        df = df.rename(columns={adj_cols[0]: target_name})
        return df
    elif close_cols:
        df = df.rename(columns={close_cols[0]: target_name})
        return df
    else:
        raise KeyError(f"No price column (Adj Close / Close) found. Columns: {list(df.columns)}")

def event_window_bounds(listing_date, pre_days=7 , post_days= EVENT_WINDOW_DAYS):
    #Using a small buffer (pre_days and post_days) to ensure we capture trading days.
    start = (listing_date - timedelta(days=pre_days)).strftime("%Y-%m-%d")
    end = (listing_date + timedelta(days = post_days + pre_days)).strftime("%Y-%m-%d")
    return start, end

4.6 Loading MetaData

In [44]:
if not os.path.exists(META_PATH):
    raise FileNotFoundError(f"MetaData file not found at {META_PATH}.create it first and re-run")

ipo_meta = pd.read_csv(META_PATH)
ipo_meta.columns = [c.strip() for c in ipo_meta.columns]
if "subscription_category" in ipo_meta.columns and "subscription_cat" not in ipo_meta.columns:
    ipo_meta = ipo_meta.rename(columns={"subscription_category" : "subscription_cat"})

#Ensuring Required Columns Exist
required = ["company_name", "ticker", "listing_date", "subscription_cat"]
missing = [c for c in required if c not in ipo_meta.columns]
if missing:
    raise Exception(f"MetaData missing required column: {missing}. Add them and re-run.")

ipo_meta["listing_date"] = pd.to_datetime(ipo_meta["listing_date"])
print("Loaded metadata. Total rows:", len(ipo_meta))

Loaded metadata. Total rows: 40


4.7 Preload Benchmark NIFTY



In [53]:
nifty_full = fetch_benchmark_robust(GLOBAL_START, GLOBAL_END)
if nifty_full is None:
    raise Exception("Failed to fetch benchmark data (NIFTY)")

nifty_full = flatten_multiindex_columns_if_needed(nifty_full)

if "Date" not in nifty_full.columns and "date" not in nifty_full.columns:
    nifty_full = nifty_full.reset_index()

date_cols= [c for c in nifty_full.columns if "date" in c.lower()]
if not date_cols:
    raise Exception("Could not find a date column in NIFTY Data")

#price column
price_col = None
if "Adj Close" in nifty_full.columns:
    price_col = "Adj Close"
elif "Close" in nifty_full.columns:
    price_col = "Close"
else: 
    raise Exception(f"No price column in NIFTY. Available: {nifty_full.columns.tolist()}")

#Selecting only needed columns and using direct assignment
nifty_full = nifty_full[[date_cols[0], price_col]].copy()
nifty_full.columns = ["date", "Adj_Close"]

nifty_full["date"] = pd.to_datetime(nifty_full["date"])
nifty_full = nifty_full.sort_values("date").reset_index(drop=True)

#Computation of NIFTY Returns
nifty_full["nifty_return"] = nifty_full["Adj_Close"].pct_change()

print ("NIFTY Prepared. Rows: ", len(nifty_full))

                                 

✓ Benchmark loaded: ^NSEI
NIFTY Prepared. Rows:  1512


In [55]:
#Creation of NIFTY CSV
nifty_output_path = os.path.join(IPO_META_FOLDER, "nifty_final.csv")
nifty_full.to_csv(nifty_output_path, index=False)
print("NIFTY saved:", nifty_output_path)



NIFTY saved: IPO_Meta\nifty_final.csv


In [46]:
#Containers For Results
daily_rows = []
ipo_summary = []
failed_tickers = []

4.8 Helper Metric Functions (Operate on merged df indexed by Date)

In [47]:
def compute_car30(merged_df, listing_first_date):
    """
    merged_df must contain columns: 'date', 'abnormal_return', 'days_from_listing'
    Returns CAR at day 30 (or last available <=30) as float or NaN.
    """
    window = merged_df[(merged_df["days_from_listing"] >= 0) & (merged_df["days_from_listing"] <= EVENT_WINDOW_DAYS)]
    if window.empty:
        return np.nan
    # cumulative abnormal return from day 0 onwards
    window = window.sort_values("days_from_listing")
    car_series = window["abnormal_return"].fillna(0).cumsum()
    # take value at day 30 if exists else last available <=30
    if (window["days_from_listing"] == EVENT_WINDOW_DAYS).any():
        return float(car_series.iloc[(window["days_from_listing"] == EVENT_WINDOW_DAYS).values.argmax()])
    else:
        return float(car_series.iloc[-1])

def compute_day1_return(merged_df):
    row = merged_df[merged_df["days_from_listing"] == 1]
    if row.empty:
        return np.nan
    return float(row["ipo_return"].iloc[0])

def compute_vol30(merged_df):
    window = merged_df[(merged_df["days_from_listing"] >= 1) & (merged_df["days_from_listing"] <= EVENT_WINDOW_DAYS)]
    if window.empty:
        return np.nan
    return float(window["ipo_return"].std())

4.9 Main Loop

In [48]:
for idx, meta_row in ipo_meta.iterrows():
    company = meta_row["company_name"]
    ticker = meta_row["ticker"]
    ticker = apply_ticker_corrections(ticker)
    listing_date = meta_row["listing_date"]

    print(f"\nProcessing {company} ({ticker}) ...")

    # If listing_date is NaT, skip for now
    if pd.isna(listing_date):
        print(f" - SKIP: listing_date is NaT for {company}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "listing_date_NaT"})
        continue

    # Build fetch window (with buffer)
    start_date, end_date = event_window_bounds(listing_date, pre_days=7, post_days=EVENT_WINDOW_DAYS)

    # Fetch stock data
    stock_df = fetch_stock_data_robust(ticker, start_date, end_date)
    if stock_df is None or stock_df.empty:
        print(f" - SKIP: No stock data for {ticker}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "no_stock_data"})
        continue

    # Flatten MultiIndex if present
    stock_df = flatten_multiindex_columns_if_needed(stock_df)

    # Ensure Date column exists
    if "Date" not in stock_df.columns and "date" not in stock_df.columns:
        stock_df = stock_df.reset_index()
    
    # Find date column
    stock_date_cols = [c for c in stock_df.columns if "date" in c.lower()]
    if not stock_date_cols:
        print(f" - SKIP: No date column for {ticker}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "no_date_column"})
        continue
    
    stock_df = stock_df.rename(columns={stock_date_cols[0]: "date"})
    stock_df["date"] = pd.to_datetime(stock_df["date"], errors="coerce")
    
    if stock_df["date"].isna().all():
        print(f" - SKIP: all dates NaT for {ticker}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "all_dates_NaT"})
        continue

    # Find and normalize price column
    try:
        stock_price_col = None
        if "Adj Close" in stock_df.columns:
            stock_price_col = "Adj Close"
        elif "Close" in stock_df.columns:
            stock_price_col = "Close"
        else:
            raise KeyError("No price column")
        
        # Add ticker column before selection
        stock_df["ticker"] = ticker
        
        # Select and rename using direct assignment
        stock_df = stock_df[["date", stock_price_col, "ticker"]].copy()
        stock_df.columns = ["date", "Adj_Close", "ticker"]
        
    except (KeyError, Exception) as e:
        print(f" - SKIP: {ticker} has no usable price column ({e})")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "no_price_col"})
        continue

    stock_df = stock_df.sort_values("date").reset_index(drop=True)
    stock_df["ipo_return"] = stock_df["Adj_Close"].pct_change()

    # Align NIFTY slice for this stock window
    nifty_slice = nifty_full[(nifty_full["date"] >= stock_df["date"].min()) & 
                             (nifty_full["date"] <= stock_df["date"].max())].copy()
    if nifty_slice.empty:
        print(f" - SKIP: no overlapping NIFTY dates for {ticker}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "no_nifty_overlap"})
        continue

    # Merge on date (left join so stock rows remain)
    merged = pd.merge(stock_df, nifty_slice, on="date", how="left")

    # Compute abnormal returns (if nifty_return missing, remains NaN)
    merged["abnormal_return"] = merged["ipo_return"] - merged["nifty_return"]

    # Identify first trading date >= listing_date (listing_first_date)
    trading_dates = merged[merged["date"] >= listing_date]
    if trading_dates.empty:
        print(f" - SKIP: {ticker} has no trading rows on/after listing date {listing_date}")
        failed_tickers.append({"company": company, "ticker": ticker, "reason": "no_trading_after_listing"})
        continue
    
    listing_first_date = trading_dates["date"].min()
    merged["days_from_listing"] = (merged["date"] - listing_first_date).dt.days

    # Compute CAR, CAR30, day1_return, vol30
    merged["car"] = merged["abnormal_return"].fillna(0).cumsum()
    car_30 = compute_car30(merged, listing_first_date)
    day1_return = compute_day1_return(merged)
    vol_30 = compute_vol30(merged)

    # Add meta columns into merged daily df (so daily master is self-contained)
    for c in ipo_meta.columns:
        if c not in merged.columns:
            merged[c] = meta_row.get(c, np.nan)

    # Append to daily_rows and pooled summary
    daily_rows.append(merged)
    ipo_summary.append({
        "company_name": company,
        "ticker": ticker,
        "listing_date": listing_date,
        "listing_first_date": listing_first_date,
        "day1_return": day1_return,
        "car_30": car_30,
        "vol_30": vol_30,
        **{c: meta_row[c] for c in ipo_meta.columns if c not in ["company_name","ticker","listing_date"]}
    })

    print(f" ✓ OK: {ticker} | CAR30={car_30:.4f} | vol30={vol_30 if not np.isnan(vol_30) else 'NaN'}")


Processing Mamata Machinery (MAMATA.NS) ...
✓ Successfully fetched: MAMATA.NS
 ✓ OK: MAMATA.NS | CAR30=-0.3334 | vol30=0.06264024202847081

Processing Unimech Aerospace (UNIMECH.NS) ...
✓ Successfully fetched: UNIMECH.NS
 ✓ OK: UNIMECH.NS | CAR30=-0.0484 | vol30=0.034314914824892545

Processing Mobikwik Systems (MOBIKWIK.NS) ...
✓ Successfully fetched: MOBIKWIK.NS
 ✓ OK: MOBIKWIK.NS | CAR30=-0.0290 | vol30=0.06582373315385136

Processing Senores Pharmaceuticals (SENORES.NS) ...
✓ Successfully fetched: SENORES.NS
 ✓ OK: SENORES.NS | CAR30=-0.0362 | vol30=0.048853728356994114

Processing Transrail Lighting (TRANSRAILL.NS) ...
✓ Successfully fetched: TRANSRAILL.NS
 ✓ OK: TRANSRAILL.NS | CAR30=0.0065 | vol30=0.050415761633091555

Processing Waaree Energies (WAAREEENER.NS) ...
✓ Successfully fetched: WAAREEENER.NS
 ✓ OK: WAAREEENER.NS | CAR30=0.1518 | vol30=0.06630510393593858

Processing Tata Technologies (TATATECH.NS) ...
✓ Successfully fetched: TATATECH.NS
 ✓ OK: TATATECH.NS | CAR30=-0.

4.10 Post Loop Assemebly and Save


In [49]:
if len(daily_rows) ==0:
    print("\nNo daily rows collected. Nothing to save. Check metadata or fetch functions.")
else:
    
    df_stock_master = pd.concat(daily_rows, ignore_index=True)
    df_pooled_master = pd.DataFrame(ipo_summary)

    # Save both
    df_stock_master.to_csv(OUT_DAILY_MASTER, index=False)
    df_pooled_master.to_csv(OUT_POOLED_MASTER, index=False)

    print("\n" + "="*60)
    print("✓ SAVED OUTPUTS:")
    print("="*60)
    print(f"Daily master: {OUT_DAILY_MASTER} ({len(df_stock_master)} rows)")
    print(f"Pooled master: {OUT_POOLED_MASTER} ({len(df_pooled_master)} IPOs)")

    # Create sample1 & sample2 based on subscription category (case-insensitive)
    if "subscription_cat" in df_pooled_master.columns:
        sample1_df = df_pooled_master[df_pooled_master["subscription_cat"].str.lower() == "high"].reset_index(drop=True)
        sample2_df = df_pooled_master[df_pooled_master["subscription_cat"].str.lower() == "low"].reset_index(drop=True)
        sample1_df.to_csv(OUT_SAMPLE1, index=False)
        sample2_df.to_csv(OUT_SAMPLE2, index=False)
        print(f"Sample 1 (High): {OUT_SAMPLE1} ({len(sample1_df)} IPOs)")
        print(f"Sample 2 (Low): {OUT_SAMPLE2} ({len(sample2_df)} IPOs)")
    else:
        print("subscription_cat not present in pooled master; skipping sample split.")



✓ SAVED OUTPUTS:
Daily master: ..\Notebooks\IPO_Meta\meta_output_processed.csv\df_stock_master.csv (1014 rows)
Pooled master: ..\Notebooks\IPO_Meta\meta_output_processed.csv\df_pooled_master.csv (40 IPOs)
Sample 1 (High): ..\Notebooks\IPO_Meta\meta_output_processed.csv\sample1_final.csv (20 IPOs)
Sample 2 (Low): ..\Notebooks\IPO_Meta\meta_output_processed.csv\sample2_final.csv (20 IPOs)


4.11 Quick Diagnostics

In [50]:
print("\n" + "="*60)
print("DIAGNOSTICS:")
print("="*60)
print(f"Failed tickers: {len(failed_tickers)}")
if failed_tickers:
    print("\nFailed tickers breakdown:")
    print(pd.DataFrame(failed_tickers))
else:
    print("✓ All tickers processed successfully!")
print("="*60)


DIAGNOSTICS:
Failed tickers: 0
✓ All tickers processed successfully!


In [51]:
pip freeze > requirements.txt


Note: you may need to restart the kernel to use updated packages.
