In [1]:
# ============================
# ‚öôÔ∏è PHASE 3: FEATURE ENGINEERING
# ============================

import pandas as pd
import numpy as np

# ----------------------------
# Load cleaned data
# ----------------------------
data = pd.read_csv('../data/processed/stocks_clean.csv', parse_dates=['Date'])
data = data.sort_values('Date').reset_index(drop=True)
print("‚úÖ Data loaded for feature engineering")
print("Shape:", data.shape)

# ----------------------------
# Create technical indicators for each stock
# ----------------------------
stocks = ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS']

for stock in stocks:
    close_col = f'Close_{stock}'
    
    # Daily return
    data[f'{stock}_Return'] = data[close_col].pct_change()

    # 7-day and 21-day moving averages
    data[f'{stock}_MA7'] = data[close_col].rolling(window=7).mean()
    data[f'{stock}_MA21'] = data[close_col].rolling(window=21).mean()

    # Exponential moving average
    data[f'{stock}_EMA21'] = data[close_col].ewm(span=21, adjust=False).mean()

    # Rolling standard deviation (volatility)
    data[f'{stock}_STD21'] = data[close_col].rolling(window=21).std()

    # RSI (Relative Strength Index)
    delta = data[close_col].diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    avg_gain = pd.Series(gain).rolling(window=14).mean()
    avg_loss = pd.Series(loss).rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data[f'{stock}_RSI'] = 100 - (100 / (1 + rs))

# ----------------------------
# Handle missing values from rolling windows
# ----------------------------
data = data.dropna().reset_index(drop=True)
print("‚úÖ Feature engineering complete.")
print("New shape:", data.shape)

# ----------------------------
# Save feature dataset
# ----------------------------
output_path = '../data/processed/stocks_features.csv'
data.to_csv(output_path, index=False)
print(f"üìÅ Features saved to {output_path}")

# Preview
data.head()

‚úÖ Data loaded for feature engineering
Shape: (1481, 21)
‚úÖ Feature engineering complete.
New shape: (1461, 39)
üìÅ Features saved to ../data/processed/stocks_features.csv


Unnamed: 0,Date,Close_HDFCBANK.NS,Close_RELIANCE.NS,Close_TCS.NS,Close_^NSEI,High_HDFCBANK.NS,High_RELIANCE.NS,High_TCS.NS,High_^NSEI,Low_HDFCBANK.NS,...,TCS.NS_MA21,TCS.NS_EMA21,TCS.NS_STD21,TCS.NS_RSI,HDFCBANK.NS_Return,HDFCBANK.NS_MA7,HDFCBANK.NS_MA21,HDFCBANK.NS_EMA21,HDFCBANK.NS_STD21,HDFCBANK.NS_RSI
0,2019-01-29,483.142303,538.356567,1682.212524,10652.200195,489.10642,547.69496,1687.557637,10690.349609,482.062161,...,1606.211577,1615.142729,30.753741,65.781201,-0.012739,494.08439,496.47943,496.25007,4.8968,32.222165
1,2019-01-30,477.765167,531.708435,1681.067139,10651.799805,483.060114,544.737715,1687.939352,10710.200195,476.191961,...,1609.551519,1621.135857,34.830087,65.327356,-0.01113,490.278809,495.211461,494.569625,6.055647,29.62447
2,2019-01-31,488.390259,545.693909,1708.810547,10830.950195,490.691395,546.82785,1713.604163,10838.049805,474.783108,...,1613.386132,1629.106283,40.899493,78.971761,0.022239,488.403687,494.668605,494.007864,6.135113,42.834774
3,2019-02-01,490.855743,555.83252,1722.258179,10893.650391,497.089925,558.078198,1726.330655,10983.450195,487.603678,...,1618.802473,1637.574637,47.259229,88.099281,0.005048,487.9022,494.429879,493.721308,6.183415,47.49099
4,2019-02-04,494.248749,574.042419,1739.481201,10912.25,496.150722,576.732769,1743.892964,10927.900391,489.024256,...,1625.970203,1646.83887,53.508179,86.508463,0.006912,487.991093,494.289552,493.769257,6.150864,46.090219
