In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("Libraries loaded successfully")

Libraries loaded successfully


In [3]:
NIFTY_50_STOCKS = ['ADANIENT.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS', 'GRASIM.NS', 'HCLTECH.NS', 'HDFCAMC.NS', 'HDFCBANK.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'INDUSINDBK.NS', 'ITC.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TCS.NS', 'TATACONSUM.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TECHM.NS', 'TITAN.NS', 'TRENT.NS', 'ULTRACEMCO.NS', 'UPL.NS', 'WIPRO.NS', 'ZEEL.NS', 'DMART.NS', 'ADANIPORTS.NS'
]
print(f"Total Nifty 50 stocks to analyze: {len(NIFTY_50_STOCKS)}")

Total Nifty 50 stocks to analyze: 50


In [5]:
# Analysing 6 Months Data to get top 20 liquid stocks from NIFTY_50_STOCKS
end_date = datetime.now()
start_date = end_date - timedelta(days=180) 
print(f"Analyzing data from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
print("="*80)
print("\nDownloading and analyzing liquidity of stocks...")

liquidity_data = []
failed_stocks = []

for idx, stock in enumerate(NIFTY_50_STOCKS, 1):
    try:
        stock_name = stock.replace('.NS', '')
        print(f"[{idx:2d}/{len(NIFTY_50_STOCKS)}] {stock_name:15s}", end=" ")
        
        data = yf.download(
            stock,
            start=start_date,
            end=end_date,
            progress=False
        )
        
        if len(data) < 50:  # Need at least 50 trading days
            print(" Insufficient data")
            failed_stocks.append(stock_name)
            continue

        close_price = data['Close']
        volume = data['Volume']
        
        # Daily turnover = Close Price × Volume (in crores)
        daily_turnover = (close_price * volume) / 10_000_000 
        
        avg_turnover = daily_turnover.mean()
        median_turnover = daily_turnover.median()
        min_turnover = daily_turnover.min()
        max_turnover = daily_turnover.max()
        
        avg_volume = volume.mean()
        
        latest_price = close_price.iloc[-1]
        
        # Price range (for position sizing reference)
        price_range = close_price.max() - close_price.min()
        
        # Bid-ask spread proxy (High-Low as % of Close)
        spread_proxy = ((data['High'] - data['Low']) / close_price * 100).mean()
        
        liquidity_data.append({
        'Stock': stock_name,
        'Avg_Turnover_Cr': float(avg_turnover),   
        'Median_Turnover_Cr': float(median_turnover),
        'Min_Turnover_Cr': float(min_turnover),
        'Max_Turnover_Cr': float(max_turnover),
        'Avg_Volume': float(avg_volume),
        'Latest_Price': float(latest_price),
        'Avg_Spread_%': float(spread_proxy),
        'Trading_Days': int(len(data))
        })
        
        print(f"Avg: ₹{avg_turnover.item():6,.0f}Cr | Med: ₹{median_turnover.item():6,.0f}Cr")
        
    except Exception as e:
        print(f"Error: {str(e)[:40]}")
        failed_stocks.append(stock_name)

print("\n" + "="*80)
print(f"Successfully analyzed: {len(liquidity_data)} stocks")
if failed_stocks:
    print(f" Failed: {failed_stocks}")

Analyzing data from 2025-06-16 to 2025-12-13

Downloading and analyzing liquidity of stocks...
[ 1/50] ADANIENT        Avg: ₹   290Cr | Med: ₹   189Cr
[ 2/50] APOLLOHOSP      Avg: ₹   288Cr | Med: ₹   248Cr
[ 3/50] ASIANPAINT      Avg: ₹   297Cr | Med: ₹   216Cr
[ 4/50] AXISBANK        Avg: ₹   759Cr | Med: ₹   644Cr
[ 5/50] BAJAJ-AUTO      Avg: ₹   319Cr | Med: ₹   287Cr
[ 6/50] BAJFINANCE      Avg: ₹   716Cr | Med: ₹   605Cr
[ 7/50] BAJAJFINSV      Avg: ₹   225Cr | Med: ₹   190Cr
[ 8/50] BHARTIARTL      Avg: ₹ 1,352Cr | Med: ₹   968Cr
[ 9/50] BRITANNIA       Avg: ₹   198Cr | Med: ₹   154Cr
[10/50] CIPLA           Avg: ₹   210Cr | Med: ₹   175Cr
[11/50] COALINDIA       Avg: ₹   200Cr | Med: ₹   172Cr
[12/50] DIVISLAB        Avg: ₹   240Cr | Med: ₹   199Cr
[13/50] DRREDDY         Avg: ₹   208Cr | Med: ₹   177Cr
[14/50] EICHERMOT       Avg: ₹   314Cr | Med: ₹   281Cr
[15/50] GRASIM          Avg: ₹   165Cr | Med: ₹   142Cr
[16/50] HCLTECH         Avg: ₹   428Cr | Med: ₹   375Cr
[17/50] H

In [7]:
liquidity_df = pd.DataFrame(liquidity_data)
# Sort by average turnover
liquidity_df = liquidity_df.sort_values('Avg_Turnover_Cr', ascending=False)

# Add rank column
liquidity_df.insert(0, 'Rank', range(1, len(liquidity_df) + 1))

print("\n" + "="*80)
print("TOP 20 MOST LIQUID NIFTY 50 STOCKS (Last 6 Months)")
print("="*80)
print("\nRanked by Average Daily Turnover\n")

top_20 = liquidity_df.head(20)

print(top_20.to_string(index=False))

print("\n" + "="*80)


TOP 20 MOST LIQUID NIFTY 50 STOCKS (Last 6 Months)

Ranked by Average Daily Turnover

 Rank      Stock  Avg_Turnover_Cr  Median_Turnover_Cr  Min_Turnover_Cr  Max_Turnover_Cr   Avg_Volume  Latest_Price  Avg_Spread_%  Trading_Days
    1   HDFCBANK      1777.579095         1647.641870              0.0      4995.550635 1.804723e+07   1001.500000      1.287369           126
    2   RELIANCE      1510.113798         1406.288177              0.0      4236.618653 1.045534e+07   1556.500000      1.424757           126
    3  ICICIBANK      1406.834711         1233.990609              0.0      3609.204393 1.007450e+07   1366.000000      1.175241           126
    4 BHARTIARTL      1352.177883          967.572986              0.0     17433.873146 6.837639e+06   2083.399902      1.538044           126
    5        TCS       884.405887          828.480519              0.0      2660.240782 2.834376e+06   3220.500000      1.434181           126
    6       SBIN       838.751513          711.803279  

In [10]:
print("\nFILTERING FOR CASH MARKET SUITABILITY")
print("="*80)

# Apply filters for cash market

filtered = liquidity_df.drop('Rank', axis=1).copy()

print("\nApplying filters...")
print(f"Starting with: {len(filtered)} stocks\n")

# Filter 1: Minimum average turnover (₹50 crore)
min_turnover = 50  # crores
filtered = filtered[filtered['Avg_Turnover_Cr'] >= min_turnover]
print(f"After turnover ≥ ₹{min_turnover}Cr filter: {len(filtered)} stocks")

# Filter 2: Minimum median turnover (consistency check)
min_median = 30  # crores
filtered = filtered[filtered['Median_Turnover_Cr'] >= min_median]
print(f"After median turnover ≥ ₹{min_median}Cr: {len(filtered)} stocks")

# Filter 3: Price range suitable for position sizing (₹300 to ₹4000)
filtered = filtered[
    (filtered['Latest_Price'] >= 300) & 
    (filtered['Latest_Price'] <= 4000)
]
print(f"After price range ₹300-₹4000: {len(filtered)} stocks")

# Filter 4: Reasonable spread (< 2% average high-low spread)
filtered = filtered[filtered['Avg_Spread_%'] < 2.0]
print(f"After spread < 2%: {len(filtered)} stocks")

# Filter 5: Minimum trading days (data quality)
filtered = filtered[filtered['Trading_Days'] >= 100]
print(f"After min 100 trading days: {len(filtered)} stocks")

print("\n" + "="*80)
print(f"FINAL FILTERED LIST: {len(filtered)} STOCKS")
print("="*80)
print("\nThese stocks meet ALL criteria for cash market stat arb:\n")

# Reset rank
filtered = filtered.reset_index(drop=True)
filtered.insert(0, 'Rank', range(1, len(filtered) + 1))

# Display filtered stocks
print(filtered[['Rank', 'Stock', 'Avg_Turnover_Cr', 'Latest_Price', 'Avg_Spread_%', 'Trading_Days']].to_string(index=False))




FILTERING FOR CASH MARKET SUITABILITY

Applying filters...
Starting with: 50 stocks

After turnover ≥ ₹50Cr filter: 50 stocks
After median turnover ≥ ₹30Cr: 50 stocks
After price range ₹300-₹4000: 35 stocks
After spread < 2%: 28 stocks
After min 100 trading days: 28 stocks

FINAL FILTERED LIST: 28 STOCKS

These stocks meet ALL criteria for cash market stat arb:

 Rank      Stock  Avg_Turnover_Cr  Latest_Price  Avg_Spread_%  Trading_Days
    1   HDFCBANK      1777.579095   1001.500000      1.287369           126
    2   RELIANCE      1510.113798   1556.500000      1.424757           126
    3  ICICIBANK      1406.834711   1366.000000      1.175241           126
    4 BHARTIARTL      1352.177883   2083.399902      1.538044           126
    5        TCS       884.405887   3220.500000      1.434181           126
    6       SBIN       838.751513    963.150024      1.351335           126
    7        M&M       811.971372   3679.600098      1.945317           126
    8   AXISBANK       759

In [11]:
import os

os.makedirs('../results', exist_ok=True)

# Save full liquidity data
liquidity_df.to_csv('../results/nifty50_liquidity_full.csv', index=False)
print("\n Full analysis saved to: results/nifty50_liquidity_full.csv")

# Save top 20
liquidity_df.head(20).to_csv('../results/top20_liquid_stocks.csv', index=False)
print(" Top 20 saved to: results/top20_liquid_stocks.csv")

# Save filtered list (recommended for stat arb)
filtered.to_csv('../results/filtered_statarb_stocks.csv', index=False)
print(f" Filtered list ({len(filtered)} stocks) saved to: results/filtered_statarb_stocks.csv")


 Full analysis saved to: results/nifty50_liquidity_full.csv
 Top 20 saved to: results/top20_liquid_stocks.csv
 Filtered list (28 stocks) saved to: results/filtered_statarb_stocks.csv


In [12]:
# Extract top 16 stocks for pair selection
num_stocks = min(16, len(filtered))
recommended_stocks = filtered.head(num_stocks)['Stock'].tolist()

print("\n" + "="*80)
print(f"YOUR FINAL STOCK LIST FOR STAT ARB ({num_stocks} stocks)")
print("="*80)
print("\nThese are the most liquid stocks meeting ALL criteria:\n")

# Create the Python list format for easy copy-paste
print("LIQUID_STOCKS = [")
for i, stock in enumerate(recommended_stocks, 1):
    print(f"    '{stock}.NS',  # Rank {i}")
print("]")

print("\n" + "="*80)
print("\n Copy this list for next step: Downloading 2-year data")
print("="*80)

# Save as Python code snippet
os.makedirs('../src', exist_ok=True)
with open('../src/stock_universe.py', 'w') as f:
    f.write("# Auto-generated stock universe based on liquidity analysis\n")
    f.write(f"# Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"# Analysis period: Last 6 months\n\n")
    f.write("LIQUID_STOCKS = [\n")
    for stock in recommended_stocks:
        f.write(f"    '{stock}.NS',\n")
    f.write("]\n\n")
    f.write(f"# Total stocks: {len(recommended_stocks)}\n")
    f.write(f"# Expected pairs: {len(recommended_stocks) * (len(recommended_stocks) - 1) // 2}\n")

print("\n Stock list also saved to: src/stock_universe.py")


YOUR FINAL STOCK LIST FOR STAT ARB (16 stocks)

These are the most liquid stocks meeting ALL criteria:

LIQUID_STOCKS = [
    'HDFCBANK.NS',  # Rank 1
    'RELIANCE.NS',  # Rank 2
    'ICICIBANK.NS',  # Rank 3
    'BHARTIARTL.NS',  # Rank 4
    'TCS.NS',  # Rank 5
    'SBIN.NS',  # Rank 6
    'M&M.NS',  # Rank 7
    'AXISBANK.NS',  # Rank 8
    'KOTAKBANK.NS',  # Rank 9
    'ITC.NS',  # Rank 10
    'HCLTECH.NS',  # Rank 11
    'HINDUNILVR.NS',  # Rank 12
    'HINDALCO.NS',  # Rank 13
    'SUNPHARMA.NS',  # Rank 14
    'TITAN.NS',  # Rank 15
    'NTPC.NS',  # Rank 16
]


 Copy this list for next step: Downloading 2-year data

 Stock list also saved to: src/stock_universe.py
