In [2]:
%pip install xgboost

Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/72.0 MB 1.2 MB/s eta 0:00:58
    --------------------------------------- 1.0/72.0 MB 1.7 MB/s eta 0:00:43
    --------------------------------------- 1.6/72.0 MB 2.1 MB/s eta 0:00:35
   - -------------------------------------- 2.1/72.0 MB 2.2 MB/s eta 0:00:33
   - -------------------------------------- 2.6/72.0 MB 2.2 MB/s eta 0:00:32
   - -------------------------------------- 3.1/72.0 MB 2.2 MB/s eta 0:00:31
   -- ------------------------------------- 3.9/72.0 MB 2.4 MB/s eta 0:00:29
   -- ------------------------------------- 5.2/72.0 MB 2.8 MB/s eta 0:00:24
   --- --------------------

In [3]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
import os
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Load 1-year data
df = pd.read_csv("nifty200_data/nifty200_complete.csv")
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)

one_year_ago = df.index[-252]
df_1year = df.loc[one_year_ago:]
print(f"üìä 1-Year data: {len(df_1year)} days")

open_cols = [col for col in df_1year.columns if col.endswith('_Open') and df_1year[col].notna().sum() > 100]
print(f"üéØ {len(open_cols)} stocks")

forecast_dir = "nifty200_xgb_arima_ensemble"
os.makedirs(forecast_dir, exist_ok=True)

tomorrow_date = df_1year.index[-1] + timedelta(days=1)

üìä 1-Year data: 252 days
üéØ 200 stocks


In [5]:
# ========================================
# ENSEMBLE PREDICTION ENGINE
# ========================================
def create_features(stock_data):
    """Lag features for XGBoost"""
    df_feat = stock_data.copy()
    df_feat = df_feat.to_frame(name='price')
    
    # 1-10 day lags
    for lag in range(1, 11):
        df_feat[f'lag_{lag}'] = df_feat['price'].shift(lag)
    
    # Rolling stats
    df_feat['ma_5'] = df_feat['price'].rolling(5).mean()
    df_feat['ma_10'] = df_feat['price'].rolling(10).mean()
    df_feat['std_5'] = df_feat['price'].rolling(5).std()
    
    # Returns
    df_feat['return_1'] = df_feat['price'].pct_change(1)
    df_feat['return_5'] = df_feat['price'].pct_change(5)
    
    return df_feat.dropna()

def xgb_predict(stock_data):
    """XGBoost prediction"""
    try:
        df_feat = create_features(stock_data)
        
        # Prepare data
        feature_cols = [col for col in df_feat.columns if col != 'price']
        X = df_feat[feature_cols]
        y = df_feat['price']
        
        # Split (80% train)
        split = int(0.8 * len(X))
        X_train, X_test = X.iloc[:split], X.iloc[split:]
        y_train, y_test = y.iloc[:split], y.iloc[split:]
        
        # XGBoost
        model = xgb.XGBRegressor(
            n_estimators=100,
            max_depth=4,
            learning_rate=0.1,
            random_state=42,
            n_jobs=-1
        )
        model.fit(X_train, y_train)
        
        # Predict tomorrow
        tomorrow_features = X.iloc[-1:].copy()
        xgb_tomorrow = model.predict(tomorrow_features)[0]
        
        return xgb_tomorrow
        
    except:
        return None

def arima_predict(stock_data):
    """ARIMA prediction"""
    try:
        model = auto_arima(
            stock_data.dropna().values,
            start_p=0, start_q=0, max_p=2, max_q=2,
            d=1, seasonal=False,
            stepwise=True,
            suppress_warnings=True,
            maxiter=20
        )
        return model.predict(n_periods=1)[0]
    except:
        return None

In [6]:
# ========================================
# MAIN ENSEMBLE LOOP
# ========================================
print("\nüîÆ XGBoost + ARIMA Ensemble...")
results = []

for i, stock_col in enumerate(open_cols):
    print(f"[{i+1:3d}/{len(open_cols)}] {stock_col}", end=" ")
    
    stock_data = df_1year[stock_col]
    
    # Get predictions
    xgb_pred = xgb_predict(stock_data)
    arima_pred = arima_predict(stock_data)
    
    last_price = stock_data.dropna().iloc[-1]
    
    if xgb_pred is not None and arima_pred is not None and last_price > 0:
        # ENSEMBLE: Average predictions
        ensemble_pred = (xgb_pred + arima_pred) / 2
        
        change_pct = ((ensemble_pred / last_price) - 1) * 100
        
        results.append({
            'Stock': stock_col.replace('Open_', ''),
            'Last_Close': round(float(last_price), 2),
            'XGBoost_Pred': round(float(xgb_pred), 2),
            'ARIMA_Pred': round(float(arima_pred), 2),
            'Ensemble_Pred': round(float(ensemble_pred), 2),
            'Change_Pct': round(float(change_pct), 2),
            'Signal': 'üü¢ BUY' if change_pct > 1.0 else 'üü° HOLD' if change_pct > -1.0 else 'üî¥ SELL'
        })
        print(f"‚úÖ {change_pct:+.2f}%")
    else:
        print("‚ùå Failed")
    
    if (i + 1) % 25 == 0:
        print(f"Progress: {i+1}/{len(open_cols)}")


üîÆ XGBoost + ARIMA Ensemble...
[  1/200] 360ONE.NS_Open ‚úÖ -0.34%
[  2/200] ABB.NS_Open ‚úÖ +1.99%
[  3/200] ABCAPITAL.NS_Open ‚úÖ -5.00%
[  4/200] ACC.NS_Open ‚úÖ +2.55%
[  5/200] ADANIENSOL.NS_Open ‚úÖ -0.88%
[  6/200] ADANIENT.NS_Open ‚úÖ -0.23%
[  7/200] ADANIGREEN.NS_Open ‚úÖ +0.21%
[  8/200] ADANIPORTS.NS_Open ‚úÖ -0.06%
[  9/200] ADANIPOWER.NS_Open ‚úÖ +2.47%
[ 10/200] ALKEM.NS_Open ‚úÖ -1.28%
[ 11/200] AMBUJACEM.NS_Open ‚úÖ +0.46%
[ 12/200] APLAPOLLO.NS_Open ‚úÖ -1.81%
[ 13/200] APOLLOHOSP.NS_Open ‚úÖ +0.36%
[ 14/200] ASHOKLEY.NS_Open ‚úÖ -11.68%
[ 15/200] ASIANPAINT.NS_Open ‚úÖ -3.67%
[ 16/200] ASTRAL.NS_Open ‚úÖ -0.43%
[ 17/200] ATGL.NS_Open ‚úÖ +3.13%
[ 18/200] AUBANK.NS_Open ‚úÖ -8.02%
[ 19/200] AUROPHARMA.NS_Open ‚úÖ -0.11%
[ 20/200] AXISBANK.NS_Open ‚úÖ -2.65%
[ 21/200] BAJAJ-AUTO.NS_Open ‚úÖ -1.55%
[ 22/200] BAJAJFINSV.NS_Open ‚úÖ -0.53%
[ 23/200] BAJAJHFL.NS_Open ‚úÖ +9.37%
[ 24/200] BAJAJHLDNG.NS_Open ‚úÖ +2.85%
[ 25/200] BAJFINANCE.NS_Open ‚úÖ +0.01%
Progress: 25/

In [7]:
# ========================================
# SAVE ENSEMBLE RESULTS
# ========================================
print(f"\nüìä Ensemble results: {len(results)} stocks")

if len(results) > 0:
    results_df = pd.DataFrame(results)
    results_df['Forecast_Date'] = tomorrow_date.strftime('%Y-%m-%d')
    
    # MAIN FILES
    results_df.to_csv(f"{forecast_dir}/ENSEMBLE_TOMORROW_NIFTY200.csv", index=False)
    results_df.nlargest(15, 'Change_Pct').to_csv(f"{forecast_dir}/ENSEMBLE_TOP15_GAINERS.csv", index=False)
    results_df.nsmallest(15, 'Change_Pct').to_csv(f"{forecast_dir}/ENSEMBLE_TOP15_LOSERS.csv", index=False)
    
    print(f"\nüéâ ENSEMBLE COMPLETE! Tomorrow: {tomorrow_date.strftime('%Y-%m-%d')}")
    print(f"üìÅ {forecast_dir}/")
    
    print("\nüî• TOP 10 ENSEMBLE GAINERS:")
    print(results_df.nlargest(10, 'Change_Pct')[['Stock', 'Ensemble_Pred', 'Change_Pct', 'Signal']].to_string(index=False))
    
    print("\nüìâ TOP 10 ENSEMBLE LOSERS:")
    print(results_df.nsmallest(10, 'Change_Pct')[['Stock', 'Ensemble_Pred', 'Change_Pct', 'Signal']].to_string(index=False))
    
    # Signal summary
    signals = results_df['Signal'].value_counts()
    print(f"\nüìà ENSEMBLE SIGNALS:")
    for signal, count in signals.items():
        print(f"  {signal}: {count}")

else:
    print("‚ùå No predictions generated")

print(f"\n‚öôÔ∏è  Ensemble: XGBoost (Tree) + ARIMA (Statistical)")


üìä Ensemble results: 200 stocks

üéâ ENSEMBLE COMPLETE! Tomorrow: 2026-01-21
üìÅ nifty200_xgb_arima_ensemble/

üî• TOP 10 ENSEMBLE GAINERS:
             Stock  Ensemble_Pred  Change_Pct Signal
     ENRIN.NS_Open        2684.77       15.77  üü¢ BUY
PREMIERENE.NS_Open         819.24       10.41  üü¢ BUY
     DIXON.NS_Open       12144.73       10.12  üü¢ BUY
       ITC.NS_Open         365.50        9.41  üü¢ BUY
  BAJAJHFL.NS_Open          99.64        9.37  üü¢ BUY
   PAGEIND.NS_Open       37150.43        8.36  üü¢ BUY
  JUBLFOOD.NS_Open         552.78        6.86  üü¢ BUY
     TRENT.NS_Open        4199.79        5.98  üü¢ BUY
     IRCTC.NS_Open         660.61        4.53  üü¢ BUY
GODFRYPHLP.NS_Open        2286.92        4.38  üü¢ BUY

üìâ TOP 10 ENSEMBLE LOSERS:
             Stock  Ensemble_Pred  Change_Pct Signal
NATIONALUM.NS_Open         313.21      -14.77 üî¥ SELL
  HINDZINC.NS_Open         578.39      -13.52 üî¥ SELL
  ASHOKLEY.NS_Open         166.86      -11.68 