In [37]:
# Import packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import nsepython as nse
import datetime 
import time
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error



In [38]:
# Index and eq list
index_list = nse.nse_get_index_list()
eq_list = nse.nse_eq_symbols()
print(datetime.datetime.now(),' : Index and equity symbols extracted!')


2025-08-06 12:57:19.139313  : Index and equity symbols extracted!


In [39]:
# Inputs
# Date input
from_date = datetime.date(year=2015,month=1,day=1).strftime('%Y/%m/%d')
to_date = datetime.date(year=2025,month=8,day=6).strftime('%Y/%m/%d')
print(datetime.datetime.now(),' : Date inputs taken!')


2025-08-06 12:57:19.144554  : Date inputs taken!


In [None]:
# Load data
df = nse.index_history(symbol='NIFTY 50',start_date=from_date,end_date=to_date)
df = df[['HistoricalDate','OPEN','HIGH','LOW','CLOSE']]
print(datetime.datetime.now(),' : Data loaded!')

df = df.rename({'HistoricalDate':'Date'},axis=1)
print(datetime.datetime.now(),' : Column renamed!')


In [None]:
# Change column datatype
df['Date'] = df['Date'].astype('date32[pyarrow]') # convert column to datetime format
print(datetime.datetime.now(),' : Column datatype modfied!')


In [None]:
# Additional feature generation 
for k in [5,30]:
    for j in ['OPEN','HIGH','LOW','CLOSE']:
        minl = []
        maxl = []
        for i in range(df.shape[0]):
            minimum = df[j].iloc[i:i+k].min()
            maximum = df[j].iloc[i:i+k].max()
            minl.append(minimum)
            maxl.append(maximum)
        df[f'{j}_{k}daylow'] = minl
        df[f'{j}_{k}dayhigh'] = maxl
print(datetime.datetime.now(),' : Additional features generated!')

In [None]:
# Date feature dropped
df = df.drop('Date',axis=1)
print(datetime.datetime.now(),' : date feature dropped!')  


In [None]:
# train test data generation
X_train,X_test,y_train,y_test = train_test_split(df.drop('CLOSE',axis=1),df['CLOSE'],
                                                 test_size=0.2,random_state=11)
print(datetime.datetime.now(),' : Train/test data generated!')  


In [None]:
# Modeling
model = XGBRegressor()
print(datetime.datetime.now(),' : XGBoost model intialised!')  

model.fit(X_train,y_train)
print(datetime.datetime.now(),' : Model fitted on train data!')  

y_pred = model.predict(X_test)
print(datetime.datetime.now(),' : Predictions done for test data!') 

rmse = root_mean_squared_error(y_test,y_pred)
print(datetime.datetime.now(),' : RMSE performance metric calculated!') 

pred_table = pd.DataFrame([y_test,y_pred],columns=['Actual','Predicted'])
print(datetime.datetime.now(),' : Prediction table generated!') 

pred_table['Avg_error'] = round((pred_table['Actual'] - pred_table['Predicted'])/pred_table['Actual'],2)
print(datetime.datetime.now(),' : AVG error added to final prediction table!') 


print(pred_table['Avg_error'].mean())