In [3]:
import os
import sys
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pickle
import warnings
warnings.filterwarnings('ignore')

project_root = os.path.abspath('..')  
sys.path.insert(0, project_root)

from src.stock_universe import LIQUID_STOCKS

print(f"Successfully imported {len(LIQUID_STOCKS)} stocks")

print(f"Downloading 2 years data for {len(LIQUID_STOCKS)} stocks...")
print("="*80)

# Download 2 years of data
end_date = datetime.now()
start_date = end_date - timedelta(days=730)  

print(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
print("\nThis will take 5-10 minutes...\n")

stock_data = {}
failed_stocks = []

for idx, stock in enumerate(LIQUID_STOCKS, 1):
    try:
        stock_name = stock.replace('.NS', '')
        print(f"[{idx:2d}/{len(LIQUID_STOCKS)}] {stock_name:15s}", end=" ")
        
        # Download data
        data = yf.download(
            stock,
            start=start_date,
            end=end_date,
            progress=False
        )
        
        if len(data) > 400:  # At least 400 trading days
            stock_data[stock_name] = {
                'close': data['Close'],
                'open': data['Open'],
                'high': data['High'],
                'low': data['Low'],
                'volume': data['Volume']
            }
            print(f"{len(data)} days")
        else:
            print(f" Insufficient data")
            failed_stocks.append(stock_name)
            
    except Exception as e:
        print(f"✗ Error: {str(e)[:40]}")
        failed_stocks.append(stock_name)

print("\n" + "="*80)
print(f" Successfully downloaded: {len(stock_data)} stocks")
if failed_stocks:
    print(f"✗ Failed: {failed_stocks}")

# Save raw data
os.makedirs('../data/raw', exist_ok=True)
with open('../data/raw/stock_data.pkl', 'wb') as f:
    pickle.dump(stock_data, f)

print("\n Data saved to: data/raw/stock_data.pkl")
print(f" File size: {os.path.getsize('../data/raw/stock_data.pkl') / 1_000_000:.2f} MB")


Successfully imported 16 stocks
Downloading 2 years data for 16 stocks...
Date range: 2023-12-15 to 2025-12-14

This will take 5-10 minutes...

[ 1/16] HDFCBANK        494 days
[ 2/16] RELIANCE        494 days
[ 3/16] ICICIBANK       494 days
[ 4/16] BHARTIARTL      494 days
[ 5/16] TCS             494 days
[ 6/16] SBIN            494 days
[ 7/16] M&M             494 days
[ 8/16] AXISBANK        494 days
[ 9/16] KOTAKBANK       494 days
[10/16] ITC             494 days
[11/16] HCLTECH         494 days
[12/16] HINDUNILVR      494 days
[13/16] HINDALCO        494 days
[14/16] SUNPHARMA       494 days
[15/16] TITAN           494 days
[16/16] NTPC            494 days

 Successfully downloaded: 16 stocks

 Data saved to: data/raw/stock_data.pkl
 File size: 0.40 MB
