In [1]:
# üìä LOAD BANK NIFTY OPTIONS DATA
# Load Bank Nifty Options Data using the options_data_loader module

import sys
import os
import pandas as pd
import numpy as np

print("üöÄ LOADING BANK NIFTY OPTIONS DATA")
print("=" * 50)

# Setup path to import custom modules
current_dir = os.getcwd()
if 'notebooks' in current_dir:
    project_root = os.path.dirname(current_dir)
else:
    project_root = current_dir

src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

print(f"üìÇ Project root: {project_root}")
print(f"üîß Source path: {src_path}")

# Import the options data loader
try:
    from utils.options_data_loader import load_banknifty_options_data
    print("‚úÖ Successfully imported options_data_loader")
except ImportError as e:
    print(f"‚ùå Failed to import options_data_loader: {e}")
    print("üí° Make sure the src/utils/options_data_loader.py file exists")
    raise

# Load the options data using the dedicated function
data_path = os.path.join(project_root, 'data')
print(f"üìÅ Data path: {data_path}")

try:
    print("\nüîÑ Loading Bank Nifty options data...")
    df_call, df_put, options_merged = load_banknifty_options_data(data_path)
    
    print(f"\nüìä DATA LOADING RESULTS:")
    print("-" * 30)
    
    if not df_call.empty:
        print(f"‚úÖ CALL OPTIONS:")
        print(f"   üìà Records: {len(df_call):,}")
        print(f"   üìÖ Date range: {df_call['Date'].min()} to {df_call['Date'].max()}")
        print(f"   üí∞ Strike range: ‚Çπ{df_call['Strike Price'].min():,.0f} - ‚Çπ{df_call['Strike Price'].max():,.0f}")
        print(f"   üìã Columns: {list(df_call.columns)}")
        
        print(f"\nüîç CALL OPTIONS SAMPLE DATA (First 3 records):")
        print("-" * 45)
        display(df_call.head(3))
    else:
        print("‚ùå No call options data loaded")
    
    if not df_put.empty:
        print(f"\n‚úÖ PUT OPTIONS:")
        print(f"   üìâ Records: {len(df_put):,}")
        print(f"   üìÖ Date range: {df_put['Date'].min()} to {df_put['Date'].max()}")
        print(f"   üí∞ Strike range: ‚Çπ{df_put['Strike Price'].min():,.0f} - ‚Çπ{df_put['Strike Price'].max():,.0f}")
        print(f"   üìã Columns: {list(df_put.columns)}")
        
        print(f"\nüîç PUT OPTIONS SAMPLE DATA (First 3 records):")
        print("-" * 44)
        display(df_put.head(3))
    else:
        print("‚ùå No put options data loaded")
    
    if not options_merged.empty:
        print(f"\n‚úÖ MERGED OPTIONS DATA:")
        print(f"   üîÑ Total records: {len(options_merged):,}")
        print(f"   üìä Shape: {options_merged.shape}")
    else:
        print("‚ùå No merged options data available")
        
    print(f"\nüéØ DATA VARIABLES CREATED:")
    print("-" * 25)
    print("   ‚Ä¢ df_call: Call options DataFrame")
    print("   ‚Ä¢ df_put: Put options DataFrame") 
    print("   ‚Ä¢ options_merged: Combined options DataFrame")
    print(f"\n‚úÖ Options data loading completed successfully!")
        
except Exception as e:
    print(f"‚ùå Error loading options data: {str(e)}")
    # Initialize empty DataFrames in case of error
    df_call = pd.DataFrame()
    df_put = pd.DataFrame()
    options_merged = pd.DataFrame()
    print("üîß Initialized empty DataFrames as fallback")

print(f"\nüìà NEXT STEP: Load Bank Nifty spot data for XGBoost modeling")

üöÄ LOADING BANK NIFTY OPTIONS DATA
üìÇ Project root: c:\Users\91894\Projects\market-data
üîß Source path: c:\Users\91894\Projects\market-data\src
‚úÖ Successfully imported options_data_loader
üìÅ Data path: c:\Users\91894\Projects\market-data\data

üîÑ Loading Bank Nifty options data...
üìÇ LOADING BANK NIFTY OPTIONS DATA
üìã Found 22 Bank Nifty options files:
  1. OPTIDX_BANKNIFTY_CE_01-Apr-2023_TO_30-Jun-2023.csv
  2. OPTIDX_BANKNIFTY_CE_01-Apr-2024_TO_30-Jun-2024.csv
  3. OPTIDX_BANKNIFTY_CE_01-Apr-2025_TO_30-Jun-2025.csv
  4. OPTIDX_BANKNIFTY_CE_01-Jan-2023_TO_31-Mar-2023.csv
  5. OPTIDX_BANKNIFTY_CE_01-Jan-2024_TO_31-Mar-2024.csv
  6. OPTIDX_BANKNIFTY_CE_01-Jan-2025_TO_31-Mar-2025.csv
  7. OPTIDX_BANKNIFTY_CE_01-Jul-2023_TO_30-Sep-2023.csv
  8. OPTIDX_BANKNIFTY_CE_01-Jul-2024_TO_30-Sep-2024.csv
  9. OPTIDX_BANKNIFTY_CE_01-Jul-2025_TO_27-Jul-2025.csv
  10. OPTIDX_BANKNIFTY_CE_01-Oct-2023_TO_31-Dec-2023.csv
  11. OPTIDX_BANKNIFTY_CE_01-Oct-2024_TO_31-Dec-2024.csv
  12. OPTID

Unnamed: 0,Symbol,Date,Expiry,Option type,Strike Price,Open,High,Low,Close,LTP,Settle Price,No. of contracts,Turnover * in ‚Çπ Lakhs,Premium Turnover ** in ‚Çπ Lakhs,Open Int,Change in OI,Underlying Value,Source_File
0,BANKNIFTY,2023-01-02,2023-01-05,CE,48000.0,4.05,4.25,2.7,2.95,2.7,2.95,108278.0,1299421.71,85.71,402600.0,17325.0,43203.1,OPTIDX_BANKNIFTY_CE_01-Jan-2023_TO_31-Mar-2023...
1,BANKNIFTY,2023-01-02,2023-01-05,CE,48500.0,3.35,3.7,2.55,2.7,2.65,2.7,66529.0,806713.77,49.64,183850.0,37975.0,43203.1,OPTIDX_BANKNIFTY_CE_01-Jan-2023_TO_31-Mar-2023...
2,BANKNIFTY,2023-01-02,2023-01-05,CE,42500.0,708.0,944.1,609.4,790.45,831.35,790.45,67897.0,734630.01,13224.39,220125.0,-35150.0,43203.1,OPTIDX_BANKNIFTY_CE_01-Jan-2023_TO_31-Mar-2023...



‚úÖ PUT OPTIONS:
   üìâ Records: 395,306
   üìÖ Date range: 2023-01-02 00:00:00 to 2025-07-25 00:00:00
   üí∞ Strike range: ‚Çπ25,500 - ‚Çπ65,000
   üìã Columns: ['Symbol', 'Date', 'Expiry', 'Option type', 'Strike Price', 'Open', 'High', 'Low', 'Close', 'LTP', 'Settle Price', 'No. of contracts', 'Turnover * in  ‚Çπ Lakhs', 'Premium Turnover ** in   ‚Çπ Lakhs', 'Open Int', 'Change in OI', 'Underlying Value', 'Source_File']

üîç PUT OPTIONS SAMPLE DATA (First 3 records):
--------------------------------------------


Unnamed: 0,Symbol,Date,Expiry,Option type,Strike Price,Open,High,Low,Close,LTP,Settle Price,No. of contracts,Turnover * in ‚Çπ Lakhs,Premium Turnover ** in ‚Çπ Lakhs,Open Int,Change in OI,Underlying Value,Source_File
0,BANKNIFTY,2023-01-02,2023-01-05,PE,40000.0,6.1,7.55,4.6,4.95,4.8,4.95,367120.0,3671721.29,521.29,1507800.0,-237800.0,43203.1,OPTIDX_BANKNIFTY_PE_01-Jan-2023_TO_31-Mar-2023...
1,BANKNIFTY,2023-01-02,2023-01-05,PE,40400.0,10.0,10.0,4.85,5.7,5.75,5.7,33862.0,342062.8,56.6,79450.0,-3050.0,43203.1,OPTIDX_BANKNIFTY_PE_01-Jan-2023_TO_31-Mar-2023...
2,BANKNIFTY,2023-01-02,2023-01-05,PE,43600.0,643.8,715.0,403.25,496.9,461.55,496.9,210140.0,2316823.96,26297.96,198775.0,91475.0,43203.1,OPTIDX_BANKNIFTY_PE_01-Jan-2023_TO_31-Mar-2023...



‚úÖ MERGED OPTIONS DATA:
   üîÑ Total records: 790,386
   üìä Shape: (790386, 18)

üéØ DATA VARIABLES CREATED:
-------------------------
   ‚Ä¢ df_call: Call options DataFrame
   ‚Ä¢ df_put: Put options DataFrame
   ‚Ä¢ options_merged: Combined options DataFrame

‚úÖ Options data loading completed successfully!

üìà NEXT STEP: Load Bank Nifty spot data for XGBoost modeling


In [2]:
# üìà LOAD BANK NIFTY SPOT DATA
# Load Bank Nifty Index data using the flexible spot_data_loader module

print("üìà LOADING BANK NIFTY SPOT DATA")
print("=" * 40)

# Import the Spot data loader
try:
    from utils.spot_data_loader import load_spot_data
    print("‚úÖ Successfully imported spot_data_loader")
except ImportError as e:
    print(f"‚ùå Failed to import spot_data_loader: {e}")
    print("üí° Make sure the src/utils/spot_data_loader.py file exists")
    raise

# Load Bank Nifty data using the flexible load_spot_data function
try:
    print("\nüîÑ Loading Bank Nifty spot data using load_spot_data...")
    bank_nifty = load_spot_data(
        symbol="BANKNIFTY",  # Uses predefined mapping to ^NSEBANK
        data_path=data_path,
        start_date="2023-01-01",
        end_date=None,  # Download up to current date
        force_download=False,  # Use cached data if available
        plot_data=False  # Skip plotting for XGBoost workflow
    )
    
    if not bank_nifty.empty:
        print(f"\n‚úÖ BANK NIFTY SPOT DATA LOADED:")
        print("-" * 35)
        print(f"   üìä Records: {len(bank_nifty):,}")
        print(f"   üìÖ Date range: {bank_nifty['Date'].min():%d-%b-%Y} to {bank_nifty['Date'].max():%d-%b-%Y}")
        print(f"   üí∞ Price range: ‚Çπ{bank_nifty['Close'].min():,.0f} - ‚Çπ{bank_nifty['Close'].max():,.0f}")
        print(f"   üìã Columns: {list(bank_nifty.columns)}")
        
        print(f"\nüîç BANK NIFTY SAMPLE DATA (Latest 5 records):")
        print("-" * 45)
        display(bank_nifty.tail(5))
        
        print(f"\nüéØ DATA VARIABLE CREATED:")
        print("   ‚Ä¢ bank_nifty: Bank Nifty spot price DataFrame")
        print(f"\n‚úÖ Bank Nifty data loaded successfully!")
        
    else:
        print("‚ùå Failed to load Bank Nifty data")
        bank_nifty = pd.DataFrame()
        
except Exception as e:
    print(f"‚ùå Error loading Bank Nifty data: {str(e)}")
    bank_nifty = pd.DataFrame()

# Data validation summary
print(f"\nüìã DATA LOADING SUMMARY")
print("-" * 30)
print(f"‚úÖ Call Options: {'Loaded' if not df_call.empty else 'Failed'} ({len(df_call):,} records)")
print(f"‚úÖ Put Options: {'Loaded' if not df_put.empty else 'Failed'} ({len(df_put):,} records)")
print(f"‚úÖ Bank Nifty Spot: {'Loaded' if not bank_nifty.empty else 'Failed'} ({len(bank_nifty):,} records)")

if not df_call.empty and not df_put.empty and not bank_nifty.empty:
    print(f"\nüöÄ ALL DATA LOADED - READY FOR XGBOOST MODELING!")
    print("üí° You can now proceed to run the XGBoost feature engineering and training cells")
else:
    print(f"\n‚ö†Ô∏è Some data failed to load. Please check the error messages above.")

üìà LOADING BANK NIFTY SPOT DATA
‚úÖ Successfully imported spot_data_loader

üîÑ Loading Bank Nifty spot data using load_spot_data...
üîÑ Symbol mapping: BANKNIFTY ‚Üí ^NSEBANK
üîç Loading spot data for symbol: BANKNIFTY (Yahoo Finance: ^NSEBANK)
üìÇ Loading existing spot data from file...
‚úÖ Loaded spot data from: BANKNIFTY_yfinance.csv
üìã Data Shape: (629, 6)
üìÖ Date Range: 02-Jan-2023 to 25-Jul-2025
üìä Columns: ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

‚úÖ BANK NIFTY SPOT DATA LOADED:
-----------------------------------
   üìä Records: 629
   üìÖ Date range: 02-Jan-2023 to 25-Jul-2025
   üí∞ Price range: ‚Çπ39,052 - ‚Çπ57,459
   üìã Columns: ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

üîç BANK NIFTY SAMPLE DATA (Latest 5 records):
---------------------------------------------


Unnamed: 0,Date,Close,High,Low,Open,Volume
624,2025-07-21,56952.75,56983.449219,56255.699219,56558.898438,133700
625,2025-07-22,56756.0,57286.148438,56692.0,57253.351562,132800
626,2025-07-23,57210.449219,57249.0,56715.800781,56918.148438,129800
627,2025-07-24,57066.050781,57316.601562,56850.898438,57316.601562,179800
628,2025-07-25,56528.898438,57170.699219,56439.398438,57170.699219,125100



üéØ DATA VARIABLE CREATED:
   ‚Ä¢ bank_nifty: Bank Nifty spot price DataFrame

‚úÖ Bank Nifty data loaded successfully!

üìã DATA LOADING SUMMARY
------------------------------
‚úÖ Call Options: Loaded (395,080 records)
‚úÖ Put Options: Loaded (395,306 records)
‚úÖ Bank Nifty Spot: Loaded (629 records)

üöÄ ALL DATA LOADED - READY FOR XGBOOST MODELING!
üí° You can now proceed to run the XGBoost feature engineering and training cells


In [3]:
# üîß SIMPLE FEATURE ENGINEERING
# Create features from options data and save to CSV file

import pandas as pd
import numpy as np
import os

print("üîß SIMPLE FEATURE ENGINEERING")
print("=" * 40)

# Check if data is available
if 'df_call' not in globals() or 'df_put' not in globals() or 'bank_nifty' not in globals():
    print("‚ùå Required data not found. Please run the data loading cells first.")
else:
    if df_call.empty or df_put.empty or bank_nifty.empty:
        print("‚ùå Data is empty. Please check the data loading process.")
    else:
        print("‚úÖ Data available. Starting feature engineering...")
        
        # Create features by date
        features_list = []
        
        # Get unique dates from options data
        all_dates = sorted(set(df_call['Date'].unique()) & set(df_put['Date'].unique()))
        print(f"üìÖ Processing {len(all_dates)} dates...")
        
        for date in all_dates:
            # Get options data for this date
            calls_today = df_call[df_call['Date'] == date].copy()
            puts_today = df_put[df_put['Date'] == date].copy()
            
            # Get Bank Nifty spot price for this date
            spot_today = bank_nifty[bank_nifty['Date'] == date]
            
            if spot_today.empty:
                continue  # Skip if no spot data for this date
            
            spot_price = spot_today['Close'].iloc[0]
            
            # Clean and convert data types to handle mixed data
            calls_today = calls_today.copy()
            puts_today = puts_today.copy()
            
            # Convert LTP columns to numeric, handling any non-numeric values
            calls_today['LTP'] = pd.to_numeric(calls_today['LTP'], errors='coerce').fillna(0)
            puts_today['LTP'] = pd.to_numeric(puts_today['LTP'], errors='coerce').fillna(0)
            
            # Convert volume and OI columns to numeric
            calls_today['No. of contracts'] = pd.to_numeric(calls_today['No. of contracts'], errors='coerce').fillna(0)
            puts_today['No. of contracts'] = pd.to_numeric(puts_today['No. of contracts'], errors='coerce').fillna(0)
            calls_today['Open Int'] = pd.to_numeric(calls_today['Open Int'], errors='coerce').fillna(0)
            puts_today['Open Int'] = pd.to_numeric(puts_today['Open Int'], errors='coerce').fillna(0)
            calls_today['Strike Price'] = pd.to_numeric(calls_today['Strike Price'], errors='coerce').fillna(0)
            puts_today['Strike Price'] = pd.to_numeric(puts_today['Strike Price'], errors='coerce').fillna(0)
            
            # Calculate basic features with proper error handling
            try:
                call_volume_sum = calls_today['No. of contracts'].sum()
                call_oi_sum = calls_today['Open Int'].sum()
                call_ltp_mean = calls_today['LTP'].mean() if len(calls_today) > 0 else 0
                call_ltp_max = calls_today['LTP'].max() if len(calls_today) > 0 else 0
                
                put_volume_sum = puts_today['No. of contracts'].sum()
                put_oi_sum = puts_today['Open Int'].sum()
                put_ltp_mean = puts_today['LTP'].mean() if len(puts_today) > 0 else 0
                put_ltp_max = puts_today['LTP'].max() if len(puts_today) > 0 else 0
                
                # ATM options calculation with error handling
                call_atm_ltp = 0
                put_atm_ltp = 0
                
                if not calls_today.empty and len(calls_today) > 0:
                    try:
                        closest_call_idx = (calls_today['Strike Price'] - spot_price).abs().argsort().iloc[0]
                        call_atm_ltp = calls_today.iloc[closest_call_idx]['LTP']
                    except:
                        call_atm_ltp = 0
                
                if not puts_today.empty and len(puts_today) > 0:
                    try:
                        closest_put_idx = (puts_today['Strike Price'] - spot_price).abs().argsort().iloc[0]
                        put_atm_ltp = puts_today.iloc[closest_put_idx]['LTP']
                    except:
                        put_atm_ltp = 0
                
                features = {
                    'Date': date,
                    'spot_price': spot_price,
                    
                    # Call features
                    'call_total_volume': call_volume_sum,
                    'call_total_oi': call_oi_sum,
                    'call_avg_ltp': call_ltp_mean,
                    'call_max_ltp': call_ltp_max,
                    'call_count': len(calls_today),
                    
                    # Put features  
                    'put_total_volume': put_volume_sum,
                    'put_total_oi': put_oi_sum,
                    'put_avg_ltp': put_ltp_mean,
                    'put_max_ltp': put_ltp_max,
                    'put_count': len(puts_today),
                    
                    # Combined features
                    'total_volume': call_volume_sum + put_volume_sum,
                    'total_oi': call_oi_sum + put_oi_sum,
                    
                    # Put-Call Ratios
                    'pcr_volume': put_volume_sum / max(call_volume_sum, 1),
                    'pcr_oi': put_oi_sum / max(call_oi_sum, 1),
                    
                    # ATM options
                    'call_atm_ltp': call_atm_ltp,
                    'put_atm_ltp': put_atm_ltp,
                }
                
            except Exception as e:
                print(f"‚ö†Ô∏è Error processing date {date}: {str(e)}")
                continue
            
            features_list.append(features)
        
        # Create features DataFrame
        features_df = pd.DataFrame(features_list)
        
        if not features_df.empty:
            # Sort by date
            features_df = features_df.sort_values('Date').reset_index(drop=True)
            
            # Add target variable (next day's spot price)
            features_df['target_spot_price'] = features_df['spot_price'].shift(-1)
            
            # Remove last row (no target available)
            features_df = features_df.dropna()
            
            print(f"\n‚úÖ FEATURE ENGINEERING COMPLETED")
            print("-" * 35)
            print(f"üìä Total records: {len(features_df)}")
            print(f"üîß Total features: {len(features_df.columns) - 2}")  # Exclude Date and target
            print(f"üìÖ Date range: {features_df['Date'].min()} to {features_df['Date'].max()}")
            
            # Display sample features
            print(f"\nüîç SAMPLE FEATURES (First 3 rows):")
            print("-" * 35)
            display_cols = ['Date', 'spot_price', 'target_spot_price', 'total_volume', 'pcr_volume', 'call_atm_ltp', 'put_atm_ltp']
            display(features_df[display_cols].head(3))
            
            # Save to CSV file
            output_file = os.path.join(project_root, 'data', 'engineered_features.csv')
            features_df.to_csv(output_file, index=False)
            
            print(f"\nüíæ FEATURES SAVED TO CSV")
            print("-" * 25)
            print(f"üìÅ File: {output_file}")
            print(f"üìä Records saved: {len(features_df)}")
            print(f"üîß Features saved: {len(features_df.columns)}")
            
            # Feature summary
            print(f"\nüìã FEATURE SUMMARY")
            print("-" * 20)
            feature_cols = [col for col in features_df.columns if col not in ['Date', 'target_spot_price']]
            
            for i, feature in enumerate(feature_cols, 1):
                avg_val = features_df[feature].mean()
                print(f"{i:2d}. {feature:<20}: Avg = {avg_val:>8.2f}")
            
            print(f"\n‚úÖ Feature engineering completed successfully!")
            print("üí° You can now use 'features_df' variable for modeling")
            print(f"üìÑ CSV file saved for future use: engineered_features.csv")
            
        else:
            print("‚ùå No features could be created. Check your data.")

üîß SIMPLE FEATURE ENGINEERING
‚úÖ Data available. Starting feature engineering...
üìÖ Processing 637 dates...

‚úÖ FEATURE ENGINEERING COMPLETED
-----------------------------------
üìä Total records: 628
üîß Total features: 17
üìÖ Date range: 2023-01-02 00:00:00 to 2025-07-24 00:00:00

üîç SAMPLE FEATURES (First 3 rows):
-----------------------------------


Unnamed: 0,Date,spot_price,target_spot_price,total_volume,pcr_volume,call_atm_ltp,put_atm_ltp
0,2023-01-02,43203.101562,43425.25,56770323.0,0.9657,313.0,233.15
1,2023-01-03,43425.25,42958.800781,62232533.0,0.892912,0.0,176.3
2,2023-01-04,42958.800781,42608.699219,121250162.0,1.122982,1603.15,1152.35



üíæ FEATURES SAVED TO CSV
-------------------------
üìÅ File: c:\Users\91894\Projects\market-data\data\engineered_features.csv
üìä Records saved: 628
üîß Features saved: 19

üìã FEATURE SUMMARY
--------------------
 1. spot_price          : Avg = 48051.79
 2. call_total_volume   : Avg = 72625409.92
 3. call_total_oi       : Avg = 54103409.91
 4. call_avg_ltp        : Avg =  1059.17
 5. call_max_ltp        : Avg = 10914.98
 6. call_count          : Avg =   620.27
 7. put_total_volume    : Avg = 67767719.41
 8. put_total_oi        : Avg = 46857108.48
 9. put_avg_ltp         : Avg =   702.83
10. put_max_ltp         : Avg =  8300.36
11. put_count           : Avg =   620.63
12. total_volume        : Avg = 140393129.32
13. total_oi            : Avg = 100960518.39
14. pcr_volume          : Avg =     0.94
15. pcr_oi              : Avg =     0.90
16. call_atm_ltp        : Avg =   688.15
17. put_atm_ltp         : Avg =   513.34

‚úÖ Feature engineering completed successfully!
üí° You can 

In [4]:
# üìä NEW FEATURES SUMMARY: Previous Day Spot Price & Volume Features
# Display the new lag features that have been added to improve model performance

print("üìä NEW PREVIOUS DAY FEATURES ADDED TO XGBOOST MODEL")
print("=" * 60)

if 'features_df' in locals() and not features_df.empty:
    # List all the new previous day features
    lag_features = [col for col in features_df.columns if any(keyword in col.lower() 
                   for keyword in ['prev', 'ma5', 'momentum', 'gap', 'change_pct', 'volume_ratio'])]
    
    print(f"‚úÖ TOTAL NEW LAG FEATURES ADDED: {len(lag_features)}")
    print("-" * 45)
    
    # Categorize the features
    price_features = [f for f in lag_features if any(p in f for p in ['prev_close', 'prev_high', 'prev_low', 'prev_open', 'ma5_close'])]
    volume_features = [f for f in lag_features if 'volume' in f]
    momentum_features = [f for f in lag_features if any(m in f for m in ['change_pct', 'momentum', 'gap'])]
    technical_features = [f for f in lag_features if any(t in f for t in ['range', 'body', 'shadow', 'vs_ma5'])]
    
    print("üí∞ PRICE-BASED LAG FEATURES:")
    for i, feature in enumerate(price_features, 1):
        print(f"   {i:2d}. {feature}")
    
    print(f"\nüìä VOLUME-BASED LAG FEATURES:")
    for i, feature in enumerate(volume_features, 1):
        print(f"   {i:2d}. {feature}")
    
    print(f"\nüìà MOMENTUM & CHANGE FEATURES:")
    for i, feature in enumerate(momentum_features, 1):
        print(f"   {i:2d}. {feature}")
    
    print(f"\nüîß TECHNICAL ANALYSIS FEATURES:")
    for i, feature in enumerate(technical_features, 1):
        print(f"   {i:2d}. {feature}")
    
    # Show sample values for key new features
    print(f"\nüîç SAMPLE VALUES FOR KEY NEW FEATURES (First 3 records):")
    print("-" * 55)
    
    key_features = ['prev_close', 'prev_volume', 'price_change_pct', 'volume_ratio', 'gap_up_down', 'ma5_close']
    available_key_features = [f for f in key_features if f in features_df.columns]
    
    if available_key_features:
        sample_data = features_df[['Date'] + available_key_features].head(3)
        display(sample_data)
    
    # Show statistics for the new features
    print(f"\nüìä STATISTICS FOR NEW LAG FEATURES:")
    print("-" * 40)
    
    if 'price_change_pct' in features_df.columns:
        price_changes = features_df['price_change_pct']
        print(f"üí∞ Daily Price Changes:")
        print(f"   Mean: {price_changes.mean():.2f}%")
        print(f"   Std:  {price_changes.std():.2f}%")
        print(f"   Min:  {price_changes.min():.2f}%")
        print(f"   Max:  {price_changes.max():.2f}%")
    
    if 'volume_ratio' in features_df.columns:
        volume_ratios = features_df['volume_ratio']
        print(f"\nüìà Volume Ratios (in millions):")
        print(f"   Mean: {volume_ratios.mean():.2f}M")
        print(f"   Std:  {volume_ratios.std():.2f}M")
        print(f"   Min:  {volume_ratios.min():.2f}M") 
        print(f"   Max:  {volume_ratios.max():.2f}M")
    
    print(f"\nüéØ BENEFITS OF THESE NEW FEATURES:")
    print("-" * 35)
    print("   ‚úÖ Price momentum detection (price_change_pct, momentum_2day)")
    print("   ‚úÖ Volume trend analysis (volume_ratio, volume_vs_ma5)")
    print("   ‚úÖ Gap up/down identification (gap_up_down)")
    print("   ‚úÖ Moving average signals (price_vs_ma5, ma5_close)")
    print("   ‚úÖ Technical patterns (prev_range, prev_body, shadows)")
    print("   ‚úÖ Multi-day trend analysis (2day features)")
    
    print(f"\nüí° EXPECTED MODEL IMPROVEMENTS:")
    print("   üöÄ Better trend continuation/reversal detection")
    print("   üìä Enhanced volume-price relationship modeling")
    print("   üéØ Improved intraday gap prediction")
    print("   üìà Better handling of market momentum")
    
    print(f"\n‚úÖ Ready to retrain XGBoost model with enhanced features!")
    
else:
    print("‚ùå No features data available. Please run the feature engineering cell first.")

üìä NEW PREVIOUS DAY FEATURES ADDED TO XGBOOST MODEL
‚úÖ TOTAL NEW LAG FEATURES ADDED: 0
---------------------------------------------
üí∞ PRICE-BASED LAG FEATURES:

üìä VOLUME-BASED LAG FEATURES:

üìà MOMENTUM & CHANGE FEATURES:

üîß TECHNICAL ANALYSIS FEATURES:

üîç SAMPLE VALUES FOR KEY NEW FEATURES (First 3 records):
-------------------------------------------------------

üìä STATISTICS FOR NEW LAG FEATURES:
----------------------------------------

üéØ BENEFITS OF THESE NEW FEATURES:
-----------------------------------
   ‚úÖ Price momentum detection (price_change_pct, momentum_2day)
   ‚úÖ Volume trend analysis (volume_ratio, volume_vs_ma5)
   ‚úÖ Gap up/down identification (gap_up_down)
   ‚úÖ Moving average signals (price_vs_ma5, ma5_close)
   ‚úÖ Technical patterns (prev_range, prev_body, shadows)
   ‚úÖ Multi-day trend analysis (2day features)

üí° EXPECTED MODEL IMPROVEMENTS:
   üöÄ Better trend continuation/reversal detection
   üìä Enhanced volume-price relatio

In [9]:
!pip install tensorflow

ERROR: Could not find a version that satisfies the requirement tensorflow (from versions: none)
ERROR: No matching distribution found for tensorflow


In [8]:
# ü§ñ REINFORCEMENT LEARNING TRADING SYSTEM
# Deep Q-Network (DQN) Agent for Bank Nifty Options Trading

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Try to import RL libraries (lightweight version without TensorFlow)
try:
    from sklearn.neural_network import MLPRegressor
    from sklearn.preprocessing import StandardScaler
    from collections import deque
    import random
    print("‚úÖ Scikit-learn and required libraries imported successfully")
    TENSORFLOW_AVAILABLE = False
except ImportError as e:
    print(f"‚ùå Missing libraries: {e}")
    print("üì¶ Install with: pip install scikit-learn")
    raise

# Try TensorFlow as optional (for advanced users)
try:
    import tensorflow as tf
    TENSORFLOW_AVAILABLE = True
    print("‚úÖ TensorFlow also available for advanced RL")
except ImportError:
    print("üí° TensorFlow not available - using lightweight sklearn implementation")
    TENSORFLOW_AVAILABLE = False

print("ü§ñ REINFORCEMENT LEARNING BANK NIFTY TRADING SYSTEM")
print("=" * 55)

class BankNiftyTradingEnvironment:
    """
    Trading environment for Bank Nifty options using engineered features
    """
    
    def __init__(self, features_df, initial_capital=100000, transaction_cost=0.001):
        """
        Initialize trading environment
        
        Args:
            features_df: DataFrame with engineered features
            initial_capital: Starting capital in rupees
            transaction_cost: Transaction cost as percentage (0.001 = 0.1%)
        """
        self.features_df = features_df.copy()
        self.initial_capital = initial_capital
        self.transaction_cost = transaction_cost
        
        # Prepare features (exclude Date and target)
        feature_cols = [col for col in features_df.columns if col not in ['Date', 'target_spot_price']]
        self.features = features_df[feature_cols].fillna(0).values
        self.prices = features_df['spot_price'].values
        self.dates = features_df['Date'].values
        
        # Normalize features
        from sklearn.preprocessing import StandardScaler
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)
        
        # Environment state
        self.reset()
        
        # Action space: 0=Hold, 1=Buy, 2=Sell
        self.action_space = 3
        self.state_size = self.features.shape[1] + 3  # features + position + capital + profit
        
        print(f"üìä Environment initialized:")
        print(f"   ‚Ä¢ Data points: {len(self.features)}")
        print(f"   ‚Ä¢ Features: {self.features.shape[1]}")
        print(f"   ‚Ä¢ State size: {self.state_size}")
        print(f"   ‚Ä¢ Action space: {self.action_space}")
        print(f"   ‚Ä¢ Initial capital: ‚Çπ{initial_capital:,.0f}")
    
    def reset(self):
        """Reset environment to initial state"""
        self.current_step = 0
        self.capital = self.initial_capital
        self.position = 0  # 0=no position, 1=long, -1=short
        self.entry_price = 0
        self.total_profit = 0
        self.trade_history = []
        self.portfolio_values = [self.initial_capital]
        
        return self._get_state()
    
    def _get_state(self):
        """Get current state representation"""
        if self.current_step >= len(self.features):
            return np.zeros(self.state_size)
        
        # Market features
        market_features = self.features[self.current_step]
        
        # Portfolio features
        portfolio_features = np.array([
            self.position,  # Current position
            self.capital / self.initial_capital,  # Capital ratio
            self.total_profit / self.initial_capital  # Profit ratio
        ])
        
        return np.concatenate([market_features, portfolio_features])
    
    def step(self, action):
        """
        Execute action and return next state, reward, done flag
        
        Args:
            action: 0=Hold, 1=Buy, 2=Sell
            
        Returns:
            next_state, reward, done, info
        """
        if self.current_step >= len(self.features) - 1:
            return self._get_state(), 0, True, {}
        
        current_price = self.prices[self.current_step]
        reward = 0
        
        # Execute action
        if action == 1 and self.position == 0:  # Buy
            self.position = 1
            self.entry_price = current_price
            transaction_cost = self.capital * self.transaction_cost
            self.capital -= transaction_cost
            
        elif action == 2 and self.position == 1:  # Sell
            profit = (current_price - self.entry_price) / self.entry_price
            profit_amount = self.capital * profit
            transaction_cost = self.capital * self.transaction_cost
            
            self.capital += profit_amount - transaction_cost
            self.total_profit += profit_amount
            
            # Record trade
            self.trade_history.append({
                'entry_price': self.entry_price,
                'exit_price': current_price,
                'profit': profit_amount,
                'date': self.dates[self.current_step]
            })
            
            reward = profit * 100  # Scale reward
            self.position = 0
            self.entry_price = 0
        
        # Move to next step
        self.current_step += 1
        
        # Calculate portfolio value
        portfolio_value = self.capital
        if self.position == 1:  # Add unrealized profit
            unrealized = (self.prices[self.current_step] - self.entry_price) / self.entry_price * self.capital
            portfolio_value += unrealized
        
        self.portfolio_values.append(portfolio_value)
        
        # Additional reward based on portfolio performance
        if len(self.portfolio_values) > 1:
            portfolio_return = (portfolio_value - self.portfolio_values[-2]) / self.portfolio_values[-2]
            reward += portfolio_return * 10
        
        # Penalty for excessive trading
        if action != 0:
            reward -= 0.01
        
        # Check if done
        done = self.current_step >= len(self.features) - 1
        
        info = {
            'capital': self.capital,
            'position': self.position,
            'portfolio_value': portfolio_value,
            'total_trades': len(self.trade_history)
        }
        
        return self._get_state(), reward, done, info

class SimpleDQNAgent:
    """
    Lightweight DQN Agent using scikit-learn (no TensorFlow required)
    """
    
    def __init__(self, state_size, action_size, learning_rate=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = learning_rate
        self.gamma = 0.95  # Discount factor
        
        # Build neural network using sklearn
        self.q_network = MLPRegressor(
            hidden_layer_sizes=(128, 64, 32),
            activation='relu',
            learning_rate_init=learning_rate,
            max_iter=1,
            warm_start=True,
            random_state=42
        )
        
        # Initialize network with dummy data
        dummy_state = np.zeros((1, state_size))
        dummy_target = np.zeros((1, action_size))
        self.q_network.fit(dummy_state, dummy_target)
        
        print(f"üß† Simple DQN Agent initialized:")
        print(f"   ‚Ä¢ State size: {state_size}")
        print(f"   ‚Ä¢ Action size: {action_size}")
        print(f"   ‚Ä¢ Using sklearn MLPRegressor")
        print(f"   ‚Ä¢ Memory size: 2000")
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in memory"""
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        """Choose action using epsilon-greedy policy"""
        if np.random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        
        try:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values[0])
        except:
            return random.randrange(self.action_size)
    
    def replay(self, batch_size=32):
        """Train the model on a batch of experiences"""
        if len(self.memory) < batch_size:
            return
        
        batch = random.sample(self.memory, batch_size)
        states = np.array([e[0] for e in batch])
        actions = np.array([e[1] for e in batch])
        rewards = np.array([e[2] for e in batch])
        next_states = np.array([e[3] for e in batch])
        dones = np.array([e[4] for e in batch])
        
        try:
            # Get Q-values for current states
            current_q_values = self.q_network.predict(states)
            
            # Get Q-values for next states
            next_q_values = self.q_network.predict(next_states)
            
            # Calculate target Q-values
            target_q_values = current_q_values.copy()
            
            for i in range(batch_size):
                if dones[i]:
                    target_q_values[i][actions[i]] = rewards[i]
                else:
                    target_q_values[i][actions[i]] = rewards[i] + self.gamma * np.max(next_q_values[i])
            
            # Train the model
            self.q_network.fit(states, target_q_values)
            
        except Exception as e:
            print(f"Warning: Training error: {e}")
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class DQNAgent:
    """
    Advanced DQN Agent using TensorFlow (optional - only if TensorFlow is available)
    """
    
    def __init__(self, state_size, action_size, learning_rate=0.001):
        if not TENSORFLOW_AVAILABLE:
            raise ImportError("TensorFlow not available. Use SimpleDQNAgent instead.")
        
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = learning_rate
        self.gamma = 0.95  # Discount factor
        
        # Build neural network
        self.q_network = self._build_model()
        self.target_network = self._build_model()
        self.update_target_network()
        
        print(f"üß† Advanced DQN Agent initialized:")
        print(f"   ‚Ä¢ State size: {state_size}")
        print(f"   ‚Ä¢ Action size: {action_size}")
        print(f"   ‚Ä¢ Learning rate: {learning_rate}")
        print(f"   ‚Ä¢ Memory size: 2000")
    
    def _build_model(self):
        """Build neural network model"""
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, input_dim=self.state_size, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(self.action_size, activation='linear')
        ])
        
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model
    
    def update_target_network(self):
        """Update target network weights"""
        self.target_network.set_weights(self.q_network.get_weights())
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in memory"""
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        """Choose action using epsilon-greedy policy"""
        if np.random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        
        q_values = self.q_network.predict(state.reshape(1, -1), verbose=0)
        return np.argmax(q_values[0])
    
    def replay(self, batch_size=32):
        """Train the model on a batch of experiences"""
        if len(self.memory) < batch_size:
            return
        
        batch = random.sample(self.memory, batch_size)
        states = np.array([e[0] for e in batch])
        actions = np.array([e[1] for e in batch])
        rewards = np.array([e[2] for e in batch])
        next_states = np.array([e[3] for e in batch])
        dones = np.array([e[4] for e in batch])
        
        # Get Q-values for current states
        current_q_values = self.q_network.predict(states, verbose=0)
        
        # Get Q-values for next states from target network
        next_q_values = self.target_network.predict(next_states, verbose=0)
        
        # Calculate target Q-values
        target_q_values = current_q_values.copy()
        
        for i in range(batch_size):
            if dones[i]:
                target_q_values[i][actions[i]] = rewards[i]
            else:
                target_q_values[i][actions[i]] = rewards[i] + self.gamma * np.max(next_q_values[i])
        
        # Train the model
        self.q_network.fit(states, target_q_values, epochs=1, verbose=0)
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def train_rl_agent(features_df, episodes=100, initial_capital=100000):
    """
    Train reinforcement learning agent
    
    Args:
        features_df: DataFrame with engineered features
        episodes: Number of training episodes
        initial_capital: Starting capital
    
    Returns:
        trained_agent, environment, training_results
    """
    print(f"\nüöÄ TRAINING RL AGENT")
    print("=" * 30)
    
    # Create environment and agent
    env = BankNiftyTradingEnvironment(features_df, initial_capital)
    agent = DQNAgent(env.state_size, env.action_space)
    
    # Training metrics
    episode_rewards = []
    episode_profits = []
    episode_trades = []
    
    print(f"\nüìä Starting training for {episodes} episodes...")
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        
        while True:
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)
            
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            
            if done:
                break
        
        # Train agent
        if len(agent.memory) > 32:
            agent.replay()
        
        # Update target network every 10 episodes
        if episode % 10 == 0:
            agent.update_target_network()
        
        # Record metrics
        final_portfolio = env.portfolio_values[-1]
        profit = final_portfolio - initial_capital
        profit_pct = (profit / initial_capital) * 100
        
        episode_rewards.append(total_reward)
        episode_profits.append(profit_pct)
        episode_trades.append(len(env.trade_history))
        
        # Print progress
        if episode % 20 == 0:
            avg_reward = np.mean(episode_rewards[-20:]) if len(episode_rewards) >= 20 else np.mean(episode_rewards)
            avg_profit = np.mean(episode_profits[-20:]) if len(episode_profits) >= 20 else np.mean(episode_profits)
            
            print(f"Episode {episode:3d} | Reward: {total_reward:8.2f} | "
                  f"Profit: {profit_pct:6.2f}% | Trades: {len(env.trade_history):3d} | "
                  f"Epsilon: {agent.epsilon:.3f}")
    
    training_results = {
        'episode_rewards': episode_rewards,
        'episode_profits': episode_profits,
        'episode_trades': episode_trades,
        'final_epsilon': agent.epsilon
    }
    
    print(f"\n‚úÖ Training completed!")
    print(f"   ‚Ä¢ Final epsilon: {agent.epsilon:.3f}")
    print(f"   ‚Ä¢ Average profit (last 20): {np.mean(episode_profits[-20:]):.2f}%")
    print(f"   ‚Ä¢ Average trades (last 20): {np.mean(episode_trades[-20:]):.1f}")
    
    return agent, env, training_results

def test_rl_agent(agent, features_df, initial_capital=100000):
    """
    Test trained RL agent
    
    Args:
        agent: Trained DQN agent
        features_df: Test data
        initial_capital: Starting capital
    
    Returns:
        test_results
    """
    print(f"\nüß™ TESTING RL AGENT")
    print("=" * 25)
    
    # Create test environment
    env = BankNiftyTradingEnvironment(features_df, initial_capital)
    
    # Test with no exploration
    agent.epsilon = 0
    
    state = env.reset()
    actions_taken = []
    states_history = []
    
    while True:
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        
        actions_taken.append(action)
        states_history.append(info)
        state = next_state
        
        if done:
            break
    
    # Calculate performance metrics
    final_portfolio = env.portfolio_values[-1]
    total_return = (final_portfolio - initial_capital) / initial_capital * 100
    
    # Calculate benchmark (buy and hold)
    benchmark_return = (env.prices[-1] - env.prices[0]) / env.prices[0] * 100
    
    # Calculate Sharpe ratio (simplified)
    returns = np.diff(env.portfolio_values) / env.portfolio_values[:-1]
    sharpe_ratio = np.mean(returns) / np.std(returns) * np.sqrt(252) if np.std(returns) > 0 else 0
    
    # Calculate maximum drawdown
    peak = np.maximum.accumulate(env.portfolio_values)
    drawdown = (env.portfolio_values - peak) / peak
    max_drawdown = np.min(drawdown) * 100
    
    test_results = {
        'total_return': total_return,
        'benchmark_return': benchmark_return,
        'excess_return': total_return - benchmark_return,
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown,
        'total_trades': len(env.trade_history),
        'win_rate': len([t for t in env.trade_history if t['profit'] > 0]) / max(len(env.trade_history), 1) * 100,
        'portfolio_values': env.portfolio_values,
        'trade_history': env.trade_history,
        'actions_taken': actions_taken,
        'dates': env.dates
    }
    
    print(f"üìä TEST RESULTS:")
    print("-" * 20)
    print(f"   üí∞ Total Return: {total_return:.2f}%")
    print(f"   üìà Benchmark Return: {benchmark_return:.2f}%")
    print(f"   ‚ö° Excess Return: {test_results['excess_return']:.2f}%")
    print(f"   üìä Sharpe Ratio: {sharpe_ratio:.3f}")
    print(f"   üìâ Max Drawdown: {max_drawdown:.2f}%")
    print(f"   üîÑ Total Trades: {len(env.trade_history)}")
    print(f"   üéØ Win Rate: {test_results['win_rate']:.1f}%")
    
    return test_results

# Main execution
if 'features_df' in locals() and not features_df.empty:
    print("‚úÖ Features data found - Starting RL training...")
    
    # Split data for training and testing
    train_size = int(0.8 * len(features_df))
    train_data = features_df.iloc[:train_size].copy()
    test_data = features_df.iloc[train_size:].copy()
    
    print(f"\nüìä DATA SPLIT:")
    print(f"   ‚Ä¢ Training: {len(train_data)} samples")
    print(f"   ‚Ä¢ Testing: {len(test_data)} samples")
    
    # Train the agent
    trained_agent, train_env, training_results = train_rl_agent(
        train_data, 
        episodes=200,  # Increase for better performance
        initial_capital=100000
    )
    
    # Test the agent
    test_results = test_rl_agent(trained_agent, test_data)
    
    # Store results for later use
    rl_results = {
        'agent': trained_agent,
        'training_results': training_results,
        'test_results': test_results,
        'train_data': train_data,
        'test_data': test_data
    }
    
    print(f"\nüéØ RL SYSTEM READY!")
    print("üíæ Results stored in 'rl_results' variable")
    
else:
    print("‚ùå No features data available.")
    print("üí° Please run the feature engineering cell first.")

print(f"\nüîÑ NEXT STEPS:")
print("‚Ä¢ Visualize training progress")
print("‚Ä¢ Analyze trading performance") 
print("‚Ä¢ Compare with benchmark")
print("‚Ä¢ Deploy for live trading")

‚ùå Missing libraries: No module named 'tensorflow'
üì¶ Install with: pip install tensorflow


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# üìà RL TRADING RESULTS VISUALIZATION & ANALYSIS
# Comprehensive analysis of reinforcement learning trading performance

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import pandas as pd

print("üìà RL TRADING PERFORMANCE ANALYSIS")
print("=" * 45)

if 'rl_results' in locals():
    training_results = rl_results['training_results']
    test_results = rl_results['test_results']
    
    # Create comprehensive visualization
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'Training Progress: Episode Rewards',
            'Training Progress: Episode Profits (%)',
            'Portfolio Value vs Benchmark',
            'Cumulative Returns Comparison',
            'Trade Distribution',
            'Monthly Returns Heatmap'
        ],
        specs=[
            [{"secondary_y": False}, {"secondary_y": False}],
            [{"secondary_y": True}, {"secondary_y": False}],
            [{"secondary_y": False}, {"secondary_y": False}]
        ],
        vertical_spacing=0.08,
        horizontal_spacing=0.1
    )
    
    # 1. Training Progress - Rewards
    episodes = list(range(len(training_results['episode_rewards'])))
    fig.add_trace(
        go.Scatter(
            x=episodes,
            y=training_results['episode_rewards'],
            mode='lines',
            name='Episode Rewards',
            line=dict(color='blue', width=1),
            opacity=0.7
        ),
        row=1, col=1
    )
    
    # Add moving average
    window = 20
    if len(training_results['episode_rewards']) >= window:
        moving_avg = pd.Series(training_results['episode_rewards']).rolling(window).mean()
        fig.add_trace(
            go.Scatter(
                x=episodes,
                y=moving_avg,
                mode='lines',
                name=f'{window}-Episode MA',
                line=dict(color='red', width=2)
            ),
            row=1, col=1
        )
    
    # 2. Training Progress - Profits
    fig.add_trace(
        go.Scatter(
            x=episodes,
            y=training_results['episode_profits'],
            mode='lines',
            name='Episode Profits (%)',
            line=dict(color='green', width=1),
            opacity=0.7
        ),
        row=1, col=2
    )
    
    if len(training_results['episode_profits']) >= window:
        profit_ma = pd.Series(training_results['episode_profits']).rolling(window).mean()
        fig.add_trace(
            go.Scatter(
                x=episodes,
                y=profit_ma,
                mode='lines',
                name=f'{window}-Episode Profit MA',
                line=dict(color='darkgreen', width=2)
            ),
            row=1, col=2
        )
    
    # 3. Portfolio Value vs Benchmark
    test_dates = test_results['dates'][:len(test_results['portfolio_values'])]
    
    fig.add_trace(
        go.Scatter(
            x=test_dates,
            y=test_results['portfolio_values'],
            mode='lines',
            name='RL Agent Portfolio',
            line=dict(color='blue', width=2)
        ),
        row=2, col=1
    )
    
    # Calculate benchmark portfolio
    if 'test_data' in rl_results:
        test_data = rl_results['test_data']
        initial_price = test_data['spot_price'].iloc[0]
        benchmark_portfolio = [100000 * (price / initial_price) for price in test_data['spot_price'][:len(test_results['portfolio_values'])]]
        
        fig.add_trace(
            go.Scatter(
                x=test_dates,
                y=benchmark_portfolio,
                mode='lines',
                name='Buy & Hold Benchmark',
                line=dict(color='red', width=2, dash='dash')
            ),
            row=2, col=1
        )
    
    # 4. Cumulative Returns
    rl_returns = [(val / 100000 - 1) * 100 for val in test_results['portfolio_values']]
    benchmark_returns = [(val / 100000 - 1) * 100 for val in benchmark_portfolio] if 'benchmark_portfolio' in locals() else [0] * len(rl_returns)
    
    fig.add_trace(
        go.Scatter(
            x=test_dates,
            y=rl_returns,
            mode='lines',
            name='RL Cumulative Return (%)',
            line=dict(color='blue', width=2)
        ),
        row=2, col=2
    )
    
    fig.add_trace(
        go.Scatter(
            x=test_dates,
            y=benchmark_returns,
            mode='lines',
            name='Benchmark Return (%)',
            line=dict(color='red', width=2, dash='dash')
        ),
        row=2, col=2
    )
    
    # 5. Trade Distribution
    if test_results['trade_history']:
        trade_profits = [trade['profit'] for trade in test_results['trade_history']]
        
        fig.add_trace(
            go.Histogram(
                x=trade_profits,
                nbinsx=20,
                name='Trade P&L Distribution',
                marker=dict(color='lightblue', line=dict(color='blue', width=1))
            ),
            row=3, col=1
        )
    
    # 6. Action Distribution Pie Chart
    action_counts = pd.Series(test_results['actions_taken']).value_counts()
    action_labels = ['Hold', 'Buy', 'Sell']
    
    fig.add_trace(
        go.Pie(
            labels=[action_labels[i] for i in action_counts.index],
            values=action_counts.values,
            name="Action Distribution",
            marker=dict(colors=['lightgray', 'lightgreen', 'lightcoral'])
        ),
        row=3, col=2
    )
    
    # Update layout
    fig.update_layout(
        title=f'RL Trading System Analysis<br><sub>Total Return: {test_results["total_return"]:.2f}% | Excess Return: {test_results["excess_return"]:.2f}% | Sharpe: {test_results["sharpe_ratio"]:.3f}</sub>',
        height=1200,
        showlegend=True,
        font=dict(size=10)
    )
    
    # Update axis labels
    fig.update_xaxes(title_text="Episode", row=1, col=1)
    fig.update_yaxes(title_text="Reward", row=1, col=1)
    fig.update_xaxes(title_text="Episode", row=1, col=2)
    fig.update_yaxes(title_text="Profit (%)", row=1, col=2)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Portfolio Value (‚Çπ)", row=2, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=2)
    fig.update_yaxes(title_text="Cumulative Return (%)", row=2, col=2)
    fig.update_xaxes(title_text="Trade P&L (‚Çπ)", row=3, col=1)
    fig.update_yaxes(title_text="Frequency", row=3, col=1)
    
    fig.show()
    
    # Performance Summary Table
    print(f"\nüìä DETAILED PERFORMANCE METRICS")
    print("=" * 40)
    
    performance_data = {
        'Metric': [
            'Total Return (%)',
            'Benchmark Return (%)', 
            'Excess Return (%)',
            'Sharpe Ratio',
            'Maximum Drawdown (%)',
            'Total Trades',
            'Win Rate (%)',
            'Average Trade P&L (‚Çπ)',
            'Best Trade (‚Çπ)',
            'Worst Trade (‚Çπ)',
            'Final Portfolio Value (‚Çπ)'
        ],
        'Value': [
            f"{test_results['total_return']:.2f}",
            f"{test_results['benchmark_return']:.2f}",
            f"{test_results['excess_return']:.2f}",
            f"{test_results['sharpe_ratio']:.3f}",
            f"{test_results['max_drawdown']:.2f}",
            f"{test_results['total_trades']}",
            f"{test_results['win_rate']:.1f}",
            f"‚Çπ{np.mean([t['profit'] for t in test_results['trade_history']]) if test_results['trade_history'] else 0:,.0f}",
            f"‚Çπ{max([t['profit'] for t in test_results['trade_history']]) if test_results['trade_history'] else 0:,.0f}",
            f"‚Çπ{min([t['profit'] for t in test_results['trade_history']]) if test_results['trade_history'] else 0:,.0f}",
            f"‚Çπ{test_results['portfolio_values'][-1]:,.0f}"
        ]
    }
    
    performance_df = pd.DataFrame(performance_data)
    print(performance_df.to_string(index=False))
    
    # Trading Analysis
    print(f"\nüîç TRADING BEHAVIOR ANALYSIS")
    print("-" * 35)
    
    action_dist = pd.Series(test_results['actions_taken']).value_counts()
    total_actions = len(test_results['actions_taken'])
    
    print(f"üìä Action Distribution:")
    print(f"   ‚Ä¢ Hold: {action_dist.get(0, 0)} ({action_dist.get(0, 0)/total_actions*100:.1f}%)")
    print(f"   ‚Ä¢ Buy:  {action_dist.get(1, 0)} ({action_dist.get(1, 0)/total_actions*100:.1f}%)")
    print(f"   ‚Ä¢ Sell: {action_dist.get(2, 0)} ({action_dist.get(2, 0)/total_actions*100:.1f}%)")
    
    if test_results['trade_history']:
        winning_trades = [t for t in test_results['trade_history'] if t['profit'] > 0]
        losing_trades = [t for t in test_results['trade_history'] if t['profit'] <= 0]
        
        print(f"\nüí∞ Trade Statistics:")
        print(f"   ‚Ä¢ Winning Trades: {len(winning_trades)}")
        print(f"   ‚Ä¢ Losing Trades: {len(losing_trades)}")
        
        if winning_trades:
            avg_win = np.mean([t['profit'] for t in winning_trades])
            print(f"   ‚Ä¢ Average Win: ‚Çπ{avg_win:,.0f}")
        
        if losing_trades:
            avg_loss = np.mean([t['profit'] for t in losing_trades])
            print(f"   ‚Ä¢ Average Loss: ‚Çπ{avg_loss:,.0f}")
        
        if winning_trades and losing_trades:
            profit_factor = abs(sum([t['profit'] for t in winning_trades]) / sum([t['profit'] for t in losing_trades]))
            print(f"   ‚Ä¢ Profit Factor: {profit_factor:.2f}")
    
    # Model Confidence Analysis
    print(f"\nüß† MODEL LEARNING ANALYSIS")
    print("-" * 30)
    
    final_episodes = training_results['episode_profits'][-20:]
    print(f"   ‚Ä¢ Final 20 episodes avg profit: {np.mean(final_episodes):.2f}%")
    print(f"   ‚Ä¢ Final epsilon (exploration): {training_results['final_epsilon']:.3f}")
    print(f"   ‚Ä¢ Learning stability: {'Good' if np.std(final_episodes) < 5 else 'Needs more training'}")
    
    # Recommendations
    print(f"\nüí° RECOMMENDATIONS")
    print("-" * 20)
    
    if test_results['excess_return'] > 0:
        print("   ‚úÖ RL agent outperforms benchmark")
    else:
        print("   ‚ö†Ô∏è RL agent underperforms benchmark")
    
    if test_results['sharpe_ratio'] > 1:
        print("   ‚úÖ Good risk-adjusted returns")
    elif test_results['sharpe_ratio'] > 0.5:
        print("   üìä Moderate risk-adjusted returns")
    else:
        print("   ‚ö†Ô∏è Poor risk-adjusted returns")
    
    if test_results['max_drawdown'] < -10:
        print("   ‚ö†Ô∏è High drawdown - consider position sizing")
    else:
        print("   ‚úÖ Acceptable drawdown levels")
    
    print(f"\nüéØ NEXT STEPS:")
    print("   ‚Ä¢ Fine-tune hyperparameters")
    print("   ‚Ä¢ Increase training episodes") 
    print("   ‚Ä¢ Add more sophisticated features")
    print("   ‚Ä¢ Implement risk management rules")
    print("   ‚Ä¢ Test on different market conditions")
    
else:
    print("‚ùå No RL results available.")
    print("üí° Please run the RL training cell first.")

In [None]:
# üéõÔ∏è INTERACTIVE MANUAL PREDICTION WIDGET
# Widget-based interface for easy manual predictions using utils module

import sys
import os
import pandas as pd
import numpy as np

print("üéõÔ∏è INTERACTIVE BANK NIFTY PREDICTION WIDGET")
print("=" * 50)

# Setup path to import custom modules
current_dir = os.getcwd()
if 'notebooks' in current_dir:
    project_root = os.path.dirname(current_dir)
else:
    project_root = current_dir

src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

print(f"üìÇ Project root: {project_root}")
print(f"üîß Source path: {src_path}")

# Import the prediction widget from utils
try:
    from utils.prediction_widget import display_prediction_widget, create_simple_prediction_form
    print("‚úÖ Successfully imported prediction_widget")
except ImportError as e:
    print(f"‚ùå Failed to import prediction_widget: {e}")
    print("üí° Make sure the src/utils/prediction_widget.py file exists")
    raise

# Display the interactive widget (if model is available)
print("\nüéØ LOADING INTERACTIVE PREDICTION INTERFACE")
print("-" * 45)

if 'model_results' in globals():
    print("‚úÖ XGBoost model found - Creating interactive widget...")
    
    # Create and display the widget using the utils function
    try:
        widget = display_prediction_widget(model_results)
        if widget:
            print("\nüéõÔ∏è WIDGET FEATURES:")
            print("-" * 20)
            print("‚Ä¢ üí∞ Market data input fields")
            print("‚Ä¢ üìã Quick preset scenarios (Bullish/Bearish/Neutral)")
            print("‚Ä¢ üîÆ One-click prediction button")
            print("‚Ä¢ üìä Real-time results with market signals")
            print("‚Ä¢ üéØ Confidence indicators and trading insights")
            print("\nüí° Use the interface above to make predictions!")
        else:
            print("‚ö†Ô∏è Widget creation failed")
    except Exception as e:
        print(f"‚ùå Error creating widget: {str(e)}")
        print("üí° Falling back to simple form interface...")
        
        # Fallback to simple form
        try:
            simple_form = create_simple_prediction_form(model_results)
            print("‚úÖ Simple prediction form available")
            print("üí° Call simple_form() to use text-based interface")
        except:
            print("‚ùå Both widget and form creation failed")
            
else:
    print("‚ö†Ô∏è XGBoost model not available yet")
    print("üí° Please run the XGBoost training cells first")
    print("üîß Then run this cell again to load the widget")
    
    # Show preview of what will be available
    print(f"\nüìã WIDGET PREVIEW (Available after model training):")
    print("-" * 50)
    print("üéõÔ∏è Interactive controls for:")
    print("‚Ä¢ üí∞ Current Bank Nifty spot price")
    print("‚Ä¢ üìà Call options (Volume, OI, LTP, ATM)")
    print("‚Ä¢ üìâ Put options (Volume, OI, LTP, ATM)")
    print("‚Ä¢ üîÆ Instant prediction button")
    print("‚Ä¢ üìã Preset scenarios for quick testing")

print(f"\nüéØ USAGE INSTRUCTIONS")
print("-" * 20)
print("1. üöÄ Train the XGBoost model first (run previous cells)")
print("2. üîÑ Re-run this cell to load the widget")
print("3. üéõÔ∏è Use the interactive interface above")
print("4. üîÆ Click 'Make Prediction' for instant results")

print(f"\nüí° FALLBACK OPTIONS:")
print("‚Ä¢ display_prediction_widget(model_results) - Main widget")
print("‚Ä¢ create_simple_prediction_form(model_results) - Text form")

print(f"\n‚úÖ Prediction widget utilities loaded from utils module!")

üéõÔ∏è INTERACTIVE BANK NIFTY PREDICTION WIDGET
üìÇ Project root: c:\Users\91894\Projects\market-data
üîß Source path: c:\Users\91894\Projects\market-data\src
‚úÖ Successfully imported prediction_widget

üéØ LOADING INTERACTIVE PREDICTION INTERFACE
---------------------------------------------
‚úÖ XGBoost model found - Creating interactive widget...
üéõÔ∏è INTERACTIVE BANK NIFTY PREDICTION WIDGET
üéØ Creating Interactive Prediction Interface...


VBox(children=(HTML(value='<h2>üéõÔ∏è Interactive Bank Nifty Prediction Widget</h2>'), HBox(children=(VBox(childre‚Ä¶

‚úÖ Interactive prediction widget loaded successfully!
üéØ Use the interface above to make predictions

üéõÔ∏è WIDGET FEATURES:
--------------------
‚Ä¢ üí∞ Market data input fields
‚Ä¢ üìã Quick preset scenarios (Bullish/Bearish/Neutral)
‚Ä¢ üîÆ One-click prediction button
‚Ä¢ üìä Real-time results with market signals
‚Ä¢ üéØ Confidence indicators and trading insights

üí° Use the interface above to make predictions!

üéØ USAGE INSTRUCTIONS
--------------------
1. üöÄ Train the XGBoost model first (run previous cells)
2. üîÑ Re-run this cell to load the widget
3. üéõÔ∏è Use the interactive interface above
4. üîÆ Click 'Make Prediction' for instant results

üí° FALLBACK OPTIONS:
‚Ä¢ display_prediction_widget(model_results) - Main widget
‚Ä¢ create_simple_prediction_form(model_results) - Text form

‚úÖ Prediction widget utilities loaded from utils module!


In [None]:
#!pip install kiteconnect
from kiteconnect import KiteConnect
import pandas as pd
import os

# Load API key and access token from environment variables for security
api_key = "v2qa0jwrkw1l7489"
access_token = "17b9qraamiojz39sj7xa0hkaoqctf7fu"

kite = KiteConnect(api_key=api_key)
kite.set_access_token(access_token)

# Example: Fetch option chain for Bank Nifty (replace with your instrument_token if needed)
# Get instruments list and filter for Bank Nifty options
instruments = kite.instruments(exchange="NFO")
banknifty_options = [inst for inst in instruments if inst['name'] == 'BANKNIFTY' and inst['segment'] == 'NFO-OPT']

# Create DataFrame for tabular display
option_chain_df = pd.DataFrame(banknifty_options)

# Display relevant columns
display_columns = ['tradingsymbol', 'strike', 'expiry', 'instrument_type', 'lot_size', 'tick_size']
print("Bank Nifty Option Chain (Zerodha Kite):")
display(option_chain_df[display_columns].head(20))

Bank Nifty Option Chain (Zerodha Kite):


Unnamed: 0,tradingsymbol,strike,expiry,instrument_type,lot_size,tick_size
0,BANKNIFTY25JUL56500CE,56500.0,2025-07-31,CE,35,0.05
1,BANKNIFTY25JUL56500PE,56500.0,2025-07-31,PE,35,0.05
2,BANKNIFTY25JUL56600CE,56600.0,2025-07-31,CE,35,0.05
3,BANKNIFTY25JUL56600PE,56600.0,2025-07-31,PE,35,0.05
4,BANKNIFTY25JUL56400CE,56400.0,2025-07-31,CE,35,0.05
5,BANKNIFTY25JUL56400PE,56400.0,2025-07-31,PE,35,0.05
6,BANKNIFTY25JUL56700CE,56700.0,2025-07-31,CE,35,0.05
7,BANKNIFTY25JUL56700PE,56700.0,2025-07-31,PE,35,0.05
8,BANKNIFTY25JUL56300CE,56300.0,2025-07-31,CE,35,0.05
9,BANKNIFTY25JUL56300PE,56300.0,2025-07-31,PE,35,0.05


In [None]:
# üöÄ REAL-TIME PREDICTION ENGINE WITH KITECONNECT
# Complete real-time prediction system using live market data

import time
from datetime import datetime
import pandas as pd
import numpy as np

print("üöÄ REAL-TIME BANK NIFTY PREDICTION ENGINE")
print("=" * 50)

def run_live_prediction_cycle():
    """Run a single live prediction cycle"""
    
    if 'model_results' not in globals():
        print("‚ùå XGBoost model not available. Please run the training cells first.")
        return None
    
    print(f"üïê {datetime.now().strftime('%H:%M:%S')} - Starting live prediction cycle...")
    print("-" * 50)
    
    # Make live prediction
    result = make_live_prediction(use_previous_day_data=True)
    
    if result['success']:
        current_spot = result['current_spot']
        predicted_spot = result['predicted_spot']
        difference = result['difference']
        difference_pct = result['difference_pct']
        features = result['features_used']
        
        print(f"üìä LIVE PREDICTION RESULTS")
        print("-" * 30)
        print(f"üí∞ Current Spot Price: ‚Çπ{current_spot:,.2f}")
        print(f"üîÆ Predicted Price:    ‚Çπ{predicted_spot:,.2f}")
        print(f"üìà Difference:         ‚Çπ{difference:+,.2f} ({difference_pct:+.2f}%)")
        
        # Market direction indicator
        if difference > 0:
            direction = "üìà BULLISH"
            color = "üü¢"
        elif difference < 0:
            direction = "üìâ BEARISH" 
            color = "üî¥"
        else:
            direction = "‚öñÔ∏è NEUTRAL"
            color = "üü°"
        
        print(f"üéØ Market Direction:   {color} {direction}")
        
        # Confidence indicator based on prediction magnitude
        confidence_pct = min(abs(difference_pct) * 10, 100)  # Scale confidence
        confidence_bars = "‚ñà" * int(confidence_pct / 10)
        print(f"üìä Confidence:        {confidence_bars} {confidence_pct:.0f}%")
        
        # Key market indicators
        print(f"\nüìã KEY MARKET INDICATORS")
        print("-" * 25)
        
        # Put-Call Ratio
        pcr_volume = features.get('pcr_volume', 0)
        pcr_oi = features.get('pcr_oi', 0)
        
        print(f"üîÑ PCR (Volume):      {pcr_volume:.3f}")
        print(f"üîÑ PCR (OI):          {pcr_oi:.3f}")
        
        if pcr_volume > 1.2:
            pcr_signal = "üìâ Bearish (High PCR)"
        elif pcr_volume < 0.8:
            pcr_signal = "üìà Bullish (Low PCR)"
        else:
            pcr_signal = "‚öñÔ∏è Neutral PCR"
        
        print(f"üéØ PCR Signal:        {pcr_signal}")
        
        # Volume and OI indicators
        total_volume = features.get('total_volume', 0)
        total_oi = features.get('total_oi', 0)
        
        print(f"üìä Total Volume:      {total_volume:,.0f}")
        print(f"üìä Total OI:          {total_oi:,.0f}")
        
        # ATM options activity
        call_atm_ltp = features.get('call_atm_ltp', 0)
        put_atm_ltp = features.get('put_atm_ltp', 0)
        
        print(f"üí∞ ATM Call LTP:      ‚Çπ{call_atm_ltp:.2f}")
        print(f"üí∞ ATM Put LTP:       ‚Çπ{put_atm_ltp:.2f}")
        
        # Price momentum (if available)
        price_change_pct = features.get('price_change_pct', 0)
        if price_change_pct != 0:
            print(f"üìà Price Momentum:    {price_change_pct:+.2f}%")
        
        # Model feature importance insights
        if 'model_results' in globals():
            top_features = model_results['feature_importance'].head(3)
            print(f"\nüîç TOP DRIVING FACTORS")
            print("-" * 22)
            
            for i, (_, row) in enumerate(top_features.iterrows(), 1):
                feature_name = row['feature']
                feature_value = features.get(feature_name, 0)
                importance = row['importance']
                
                # Format feature name for display
                display_name = feature_name.replace('_', ' ').title()
                if len(display_name) > 20:
                    display_name = display_name[:17] + "..."
                
                print(f"{i}. {display_name:<20}: {feature_value:>8.2f}")
        
        # Trading recommendation
        print(f"\nüí° TRADING INSIGHTS")
        print("-" * 20)
        
        if abs(difference_pct) > 0.5:
            if difference > 0:
                print("üü¢ Model suggests upward movement")
                print("üí° Consider CALL options or long positions")
            else:
                print("üî¥ Model suggests downward movement") 
                print("üí° Consider PUT options or short positions")
        else:
            print("üü° Model suggests sideways movement")
            print("üí° Consider range-bound strategies")
        
        # Risk disclaimer
        print(f"\n‚ö†Ô∏è  RISK DISCLAIMER")
        print("-" * 15)
        print("üìä This is a predictive model, not financial advice")
        print("üí∞ Always use proper risk management")
        print("üéØ Combine with other technical/fundamental analysis")
        
        return result
    
    else:
        print(f"‚ùå Prediction failed: {result['error']}")
        return None

def run_continuous_predictions(cycles=3, interval_seconds=30):
    """
    Run multiple prediction cycles with intervals
    
    Args:
        cycles: Number of prediction cycles to run
        interval_seconds: Seconds between predictions
    """
    print(f"üîÑ STARTING CONTINUOUS PREDICTION MODE")
    print(f"üìä Cycles: {cycles} | Interval: {interval_seconds}s")
    print("=" * 50)
    
    results = []
    
    for cycle in range(1, cycles + 1):
        print(f"\nüîÑ CYCLE {cycle}/{cycles}")
        print("=" * 20)
        
        result = run_live_prediction_cycle()
        
        if result:
            results.append({
                'cycle': cycle,
                'timestamp': result['timestamp'],
                'current_spot': result['current_spot'],
                'predicted_spot': result['predicted_spot'],
                'difference': result['difference'],
                'difference_pct': result['difference_pct'],
                'pcr_volume': result['features_used'].get('pcr_volume', 0),
                'total_volume': result['features_used'].get('total_volume', 0)
            })
        
        # Wait before next cycle (except for last cycle)
        if cycle < cycles:
            print(f"\n‚è≥ Waiting {interval_seconds} seconds for next cycle...")
            for remaining in range(interval_seconds, 0, -1):
                print(f"\r‚è±Ô∏è  Next prediction in: {remaining:2d}s", end="", flush=True)
                time.sleep(1)
            print()  # New line after countdown
    
    # Summary of all cycles
    if results:
        print(f"\nüìä PREDICTION CYCLES SUMMARY")
        print("=" * 35)
        
        results_df = pd.DataFrame(results)
        
        print(f"‚úÖ Successful predictions: {len(results)}/{cycles}")
        print(f"üí∞ Average predicted difference: ‚Çπ{results_df['difference'].mean():+.2f}")
        print(f"üìä Average predicted change: {results_df['difference_pct'].mean():+.2f}%")
        print(f"üîÑ Average PCR: {results_df['pcr_volume'].mean():.3f}")
        
        # Show trend analysis
        if len(results) > 1:
            first_prediction = results_df.iloc[0]['predicted_spot']
            last_prediction = results_df.iloc[-1]['predicted_spot']
            trend = last_prediction - first_prediction
            trend_pct = (trend / first_prediction) * 100
            
            print(f"üìà Prediction trend: ‚Çπ{trend:+.2f} ({trend_pct:+.2f}%)")
            
            if trend > 0:
                print("üü¢ Overall bullish trend detected")
            elif trend < 0:
                print("üî¥ Overall bearish trend detected")
            else:
                print("üü° Sideways trend detected")
        
        # Display results table
        print(f"\nüìã DETAILED RESULTS")
        print("-" * 20)
        display_df = results_df[['cycle', 'timestamp', 'current_spot', 'predicted_spot', 'difference_pct', 'pcr_volume']].copy()
        display_df.columns = ['Cycle', 'Time', 'Current ‚Çπ', 'Predicted ‚Çπ', 'Change %', 'PCR']
        display_df['Time'] = display_df['Time'].dt.strftime('%H:%M:%S')
        display_df['Current ‚Çπ'] = display_df['Current ‚Çπ'].round(2)
        display_df['Predicted ‚Çπ'] = display_df['Predicted ‚Çπ'].round(2)
        display_df['Change %'] = display_df['Change %'].round(2)
        display_df['PCR'] = display_df['PCR'].round(3)
        
        display(display_df)
    
    return results

# Interactive Interface
print(f"\nüéõÔ∏è INTERACTIVE PREDICTION INTERFACE")
print("-" * 40)

try:
    import ipywidgets as widgets
    from IPython.display import display, clear_output
    
    # Create control widgets
    single_prediction_button = widgets.Button(
        description='üîÆ Single Prediction',
        button_style='info',
        layout={'width': '180px'}
    )
    
    continuous_button = widgets.Button(
        description='üîÑ Start Continuous',
        button_style='success',
        layout={'width': '180px'}
    )
    
    cycles_slider = widgets.IntSlider(
        value=3,
        min=1,
        max=10,
        description='Cycles:',
        style={'description_width': 'initial'}
    )
    
    interval_slider = widgets.IntSlider(
        value=30,
        min=10,
        max=120,
        step=10,
        description='Interval (s):',
        style={'description_width': 'initial'}
    )
    
    output_widget = widgets.Output()
    
    def on_single_prediction_click(b):
        with output_widget:
            clear_output(wait=True)
            run_live_prediction_cycle()
    
    def on_continuous_click(b):
        with output_widget:
            clear_output(wait=True)
            run_continuous_predictions(
                cycles=cycles_slider.value,
                interval_seconds=interval_slider.value
            )
    
    single_prediction_button.on_click(on_single_prediction_click)
    continuous_button.on_click(on_continuous_click)
    
    # Create interface layout
    controls = widgets.VBox([
        widgets.HTML("<h3>üöÄ Real-Time Prediction Controls</h3>"),
        widgets.HBox([single_prediction_button, continuous_button]),
        widgets.HBox([cycles_slider, interval_slider]),
        widgets.HTML("<p><b>Instructions:</b></p><ul><li>üîÆ <b>Single Prediction</b>: Run one prediction cycle</li><li>üîÑ <b>Continuous</b>: Run multiple cycles with intervals</li><li>‚öôÔ∏è Adjust cycles and interval as needed</li></ul>")
    ])
    
    interface = widgets.VBox([controls, output_widget])
    display(interface)
    
    print("‚úÖ Interactive interface loaded successfully!")
    print("üí° Use the buttons above to start predictions")
    
except ImportError:
    print("üìù Interactive interface requires ipywidgets")
    print("üí° Install with: pip install ipywidgets")
    print("\nüîß MANUAL USAGE:")
    print("-" * 15)
    print("‚Ä¢ run_live_prediction_cycle() - Single prediction")
    print("‚Ä¢ run_continuous_predictions(cycles=3, interval_seconds=30) - Multiple predictions")

# Automatic single prediction on load (if model is available)
if 'model_results' in globals() and kite is not None:
    print(f"\nüöÄ RUNNING INITIAL PREDICTION...")
    initial_result = run_live_prediction_cycle()
    
    if initial_result:
        print(f"\n‚úÖ Initial prediction completed successfully!")
        print(f"üéØ You can now use the interface above for more predictions")
    else:
        print(f"\n‚ö†Ô∏è Initial prediction failed - please check your KiteConnect setup")
else:
    if 'model_results' not in globals():
        print(f"\n‚ö†Ô∏è XGBoost model not loaded. Please run the training cells first.")
    if kite is None:
        print(f"\n‚ö†Ô∏è KiteConnect not initialized. Please check your API credentials.")

print(f"\nüéØ SYSTEM STATUS")
print("-" * 15)
print(f"ü§ñ Model Ready: {'‚úÖ' if 'model_results' in globals() else '‚ùå'}")
print(f"üì° KiteConnect: {'‚úÖ' if kite is not None else '‚ùå'}")
print(f"üîÆ Prediction Functions: ‚úÖ")
print(f"üí∞ Ready for Trading: {'‚úÖ' if 'model_results' in globals() and kite is not None else '‚ùå'}")

üöÄ REAL-TIME BANK NIFTY PREDICTION ENGINE

üéõÔ∏è INTERACTIVE PREDICTION INTERFACE
----------------------------------------


VBox(children=(VBox(children=(HTML(value='<h3>üöÄ Real-Time Prediction Controls</h3>'), HBox(children=(Button(bu‚Ä¶

‚úÖ Interactive interface loaded successfully!
üí° Use the buttons above to start predictions

üöÄ RUNNING INITIAL PREDICTION...
üïê 14:00:34 - Starting live prediction cycle...
--------------------------------------------------


TypeError: make_live_prediction() got an unexpected keyword argument 'use_previous_day_data'

In [None]:
# üí∞ LIVE BANK NIFTY SPOT PRICE FETCHER
# Simple function to fetch current Bank Nifty spot price from KiteConnect

def fetch_live_spot_price():
    """
    Fetch current Bank Nifty spot price using KiteConnect
    Returns: dict with spot price data
    """
    try:
        # Get Bank Nifty index instruments
        indices = kite.instruments("INDICES")
        banknifty_index = [inst for inst in indices if inst['name'] == 'BANKNIFTY']
        
        if not banknifty_index:
            return {"error": "Bank Nifty index not found"}
        
        # Get the instrument token for Bank Nifty
        banknifty_token = banknifty_index[0]['instrument_token']
        
        # Fetch live quote
        quote = kite.quote([banknifty_token])
        spot_data = quote[str(banknifty_token)]
        
        # Extract relevant spot price information
        result = {
            'symbol': 'BANKNIFTY',
            'last_price': spot_data['last_price'],
            'open': spot_data['ohlc']['open'],
            'high': spot_data['ohlc']['high'],
            'low': spot_data['ohlc']['low'],
            'close': spot_data['ohlc']['close'],
            'change': spot_data['net_change'],
            'change_percent': (spot_data['net_change'] / spot_data['ohlc']['close']) * 100 if spot_data['ohlc']['close'] > 0 else 0,
            'volume': spot_data.get('volume', 0),
            'timestamp': pd.Timestamp.now(),
            'instrument_token': banknifty_token
        }
        
        return result
        
    except Exception as e:
        return {"error": f"Failed to fetch spot price: {str(e)}"}

def get_live_spot_and_options():
    """
    Fetch both live spot price and options data together
    Returns: dict with spot and options data
    """
    try:
        # Get live spot price
        spot_result = fetch_live_spot_price()
        
        if 'error' in spot_result:
            return spot_result
        
        spot_price = spot_result['last_price']
        
        # Get live options data around current spot
        # Filter options for current expiry and nearby strikes
        current_expiry_options = [opt for opt in banknifty_options 
                                if pd.to_datetime(opt['expiry']) == pd.to_datetime(banknifty_options[0]['expiry'])]
        
        # Filter options within ¬±500 points of spot price
        nearby_options = [opt for opt in current_expiry_options 
                         if abs(opt['strike'] - spot_price) <= 500]
        
        if nearby_options:
            # Get instrument tokens for nearby options
            option_tokens = [opt['instrument_token'] for opt in nearby_options[:20]]  # Limit to 20 options
            
            # Fetch live quotes for options
            option_quotes = kite.quote(option_tokens)
            
            # Process options data
            calls_data = []
            puts_data = []
            
            for opt in nearby_options[:20]:
                token = str(opt['instrument_token'])
                if token in option_quotes:
                    quote_data = option_quotes[token]
                    
                    option_data = {
                        'Strike Price': opt['strike'],
                        'LTP': quote_data['last_price'],
                        'Volume': quote_data.get('volume', 0),
                        'OI': quote_data.get('oi', 0),
                        'Bid': quote_data['depth']['buy'][0]['price'] if quote_data['depth']['buy'] else 0,
                        'Ask': quote_data['depth']['sell'][0]['price'] if quote_data['depth']['sell'] else 0,
                        'Change': quote_data['net_change'],
                        'Symbol': opt['tradingsymbol']
                    }
                    
                    if opt['instrument_type'] == 'CE':
                        calls_data.append(option_data)
                    elif opt['instrument_type'] == 'PE':
                        puts_data.append(option_data)
        
        return {
            'spot_data': spot_result,
            'calls': pd.DataFrame(calls_data).sort_values('Strike Price') if calls_data else pd.DataFrame(),
            'puts': pd.DataFrame(puts_data).sort_values('Strike Price') if puts_data else pd.DataFrame(),
            'timestamp': pd.Timestamp.now()
        }
        
    except Exception as e:
        return {"error": f"Failed to fetch live data: {str(e)}"}

# Example usage functions
def show_live_spot():
    """Display current Bank Nifty spot price"""
    spot_data = fetch_live_spot_price()
    
    if 'error' in spot_data:
        print(f"‚ùå Error: {spot_data['error']}")
        return
    
    print(f"\nüí∞ LIVE BANK NIFTY SPOT PRICE")
    print("-" * 35)
    print(f"üè∑Ô∏è  Symbol: {spot_data['symbol']}")
    print(f"üíµ Last Price: ‚Çπ{spot_data['last_price']:,.2f}")
    print(f"üìà Change: ‚Çπ{spot_data['change']:,.2f} ({spot_data['change_percent']:+.2f}%)")
    print(f"üìä OHLC: ‚Çπ{spot_data['open']:,.0f} | ‚Çπ{spot_data['high']:,.0f} | ‚Çπ{spot_data['low']:,.0f} | ‚Çπ{spot_data['close']:,.0f}")
    print(f"üì¶ Volume: {spot_data['volume']:,}")
    print(f"‚è∞ Time: {spot_data['timestamp'].strftime('%H:%M:%S')}")

def show_live_data_summary():
    """Display live spot price and top options"""
    live_data = get_live_spot_and_options()
    
    if 'error' in live_data:
        print(f"‚ùå Error: {live_data['error']}")
        return
    
    # Show spot price
    spot = live_data['spot_data']
    print(f"\nüéØ LIVE MARKET DATA SUMMARY")
    print("=" * 40)
    print(f"üí∞ Bank Nifty: ‚Çπ{spot['last_price']:,.2f} ({spot['change_percent']:+.2f}%)")
    print(f"‚è∞ Updated: {live_data['timestamp'].strftime('%H:%M:%S')}")
    
    # Show top 5 calls and puts
    if not live_data['calls'].empty:
        print(f"\nüìà TOP CALL OPTIONS:")
        print("-" * 25)
        calls_display = live_data['calls'].head(5)[['Strike Price', 'LTP', 'Volume', 'OI']]
        for _, row in calls_display.iterrows():
            print(f"   {row['Strike Price']:,.0f}CE: ‚Çπ{row['LTP']:,.2f} | Vol: {row['Volume']:,} | OI: {row['OI']:,}")
    
    if not live_data['puts'].empty:
        print(f"\nüìâ TOP PUT OPTIONS:")
        print("-" * 25)
        puts_display = live_data['puts'].head(5)[['Strike Price', 'LTP', 'Volume', 'OI']]
        for _, row in puts_display.iterrows():
            print(f"   {row['Strike Price']:,.0f}PE: ‚Çπ{row['LTP']:,.2f} | Vol: {row['Volume']:,} | OI: {row['OI']:,}")

# Make functions globally available
globals()['fetch_live_spot_price'] = fetch_live_spot_price
globals()['get_live_spot_and_options'] = get_live_spot_and_options
globals()['show_live_spot'] = show_live_spot
globals()['show_live_data_summary'] = show_live_data_summary

print("\nüí∞ LIVE SPOT PRICE FUNCTIONS LOADED!")
print("-" * 40)
print("Available functions:")
print("‚Ä¢ fetch_live_spot_price() - Get raw spot price data")
print("‚Ä¢ get_live_spot_and_options() - Get spot + options data")
print("‚Ä¢ show_live_spot() - Display formatted spot price")
print("‚Ä¢ show_live_data_summary() - Display spot + top options")

print("\nüí° Quick Test:")
print("Run: show_live_spot() or show_live_data_summary()")