Load and inspect cleaned data

In [1]:
import pandas as pd

# Load cleaned intraday data
df = pd.read_csv("../data/nifty_clean_intraday.csv")

# Clean column names
df.columns = [c.strip() for c in df.columns]

df.head()


Unnamed: 0,Datetime,Close,High,Low,Open,Volume
0,2025-10-24 03:45:00+00:00,25850.800781,25939.300781,25846.050781,25939.300781,0.0
1,2025-10-24 03:50:00+00:00,25886.0,25892.400391,25840.400391,25851.150391,0.0
2,2025-10-24 03:55:00+00:00,25886.400391,25893.300781,25859.5,25886.300781,0.0
3,2025-10-24 04:00:00+00:00,25875.400391,25887.599609,25864.5,25885.849609,0.0
4,2025-10-24 04:05:00+00:00,25914.300781,25914.699219,25862.550781,25876.150391,0.0


numeric consistency

In [2]:
numeric_cols = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]

for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

df = df.dropna(subset=["Close"]).reset_index(drop=True)

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4252 entries, 0 to 4251
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Datetime  4252 non-null   object 
 1   Close     4252 non-null   float64
 2   High      4252 non-null   float64
 3   Low       4252 non-null   float64
 4   Open      4252 non-null   float64
 5   Volume    4252 non-null   float64
dtypes: float64(5), object(1)
memory usage: 199.4+ KB


Feature construction (EMA)

In [3]:
df["EMA_5"] = df["Close"].ewm(span=5, adjust=False).mean()
df["EMA_15"] = df["Close"].ewm(span=15, adjust=False).mean()

df[["Close", "EMA_5", "EMA_15"]].head()


Unnamed: 0,Close,EMA_5,EMA_15
0,25850.800781,25850.800781,25850.800781
1,25886.0,25862.533854,25855.200684
2,25886.400391,25870.489366,25859.100647
3,25875.400391,25872.126374,25861.138115
4,25914.300781,25886.18451,25867.783448


Returns calculation

In [4]:
df["Returns"] = df["Close"].pct_change()

df = df.dropna().reset_index(drop=True)

df[["Close", "Returns"]].head()


Unnamed: 0,Close,Returns
0,25886.0,0.001362
1,25886.400391,1.5e-05
2,25875.400391,-0.000425
3,25914.300781,0.001503
4,25903.5,-0.000417


In [5]:
df.to_csv("../data/nifty_model_features.csv", index=False)

print("Feature construction completed successfully")


Feature construction completed successfully


In [6]:
df["EMA_5_Slope"] = df["EMA_5"].diff()
df["EMA_15_Slope"] = df["EMA_15"].diff()

df["Rolling_Return_20"] = df["Returns"].rolling(20).mean()
df["Rolling_Volatility_20"] = df["Returns"].rolling(20).std()

df["Volatility_Ratio"] = (
    df["Returns"].rolling(10).std() /
    df["Returns"].rolling(50).std()
)

df = df.dropna().reset_index(drop=True)


In [7]:
df.to_csv("../data/nifty_features_enhanced.csv", index=False)
print("Enhanced features saved")


Enhanced features saved
