In [1]:
# ============================
# üéØ PHASE 4: TARGET CREATION + TRAIN/TEST SPLIT
# ============================

import pandas as pd
from sklearn.model_selection import train_test_split

# ----------------------------
# Load the dataset
# ----------------------------
data = pd.read_csv('../data/processed/stocks_features.csv', parse_dates=['Date'])
print("‚úÖ Features data loaded:", data.shape)

# ----------------------------
# Define list of stocks
# ----------------------------
stocks = ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS']

# ----------------------------
# Function to create targets and splits
# ----------------------------
def prepare_stock_data(df, stock, test_size=0.2):
    """
    Create next-day regression target & binary classification label for one stock.
    Then split into train/test sets chronologically.
    """
    df = df.copy()
    close_col = f'Close_{stock}'

    # 1Ô∏è‚É£ Create regression target (next day's closing price)
    df[f'{stock}_Next_Close'] = df[close_col].shift(-1)

    # 2Ô∏è‚É£ Create classification target (1 = price up tomorrow, 0 = down)
    df[f'{stock}_Target'] = (df[f'{stock}_Next_Close'] > df[close_col]).astype(int)

    # Drop last row (no next-day data)
    df = df.iloc[:-1, :]

    # 3Ô∏è‚É£ Select features for this stock
    feature_cols = [
        f'{stock}_Return',
        f'{stock}_MA7',
        f'{stock}_MA21',
        f'{stock}_EMA21',
        f'{stock}_STD21',
        f'{stock}_RSI'
    ]
    X = df[feature_cols]
    y_reg = df[f'{stock}_Next_Close']
    y_cls = df[f'{stock}_Target']

    # 4Ô∏è‚É£ Split train/test chronologically (no shuffle)
    split_idx = int(len(df) * (1 - test_size))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train_reg, y_test_reg = y_reg[:split_idx], y_reg[split_idx:]
    y_train_cls, y_test_cls = y_cls[:split_idx], y_cls[split_idx:]

    # 5Ô∏è‚É£ Save to processed folder
    df_train = X_train.copy()
    df_train['Target_Reg'] = y_train_reg
    df_train['Target_Cls'] = y_train_cls
    df_train['Split'] = 'Train'

    df_test = X_test.copy()
    df_test['Target_Reg'] = y_test_reg
    df_test['Target_Cls'] = y_test_cls
    df_test['Split'] = 'Test'

    df_final = pd.concat([df_train, df_test])
    output_path = f'../data/processed/{stock.replace(".NS","").lower()}_model_ready.csv'
    df_final.to_csv(output_path, index=False)

    print(f"‚úÖ Saved model-ready file for {stock} ‚Üí {output_path}")
    print(f"   Train size: {len(df_train)}, Test size: {len(df_test)}")
    return df_final

# ----------------------------
# Run for all stocks
# ----------------------------
for s in stocks:
    prepare_stock_data(data, s)


‚úÖ Features data loaded: (1461, 39)
‚úÖ Saved model-ready file for RELIANCE.NS ‚Üí ../data/processed/reliance_model_ready.csv
   Train size: 1168, Test size: 292
‚úÖ Saved model-ready file for TCS.NS ‚Üí ../data/processed/tcs_model_ready.csv
   Train size: 1168, Test size: 292
‚úÖ Saved model-ready file for HDFCBANK.NS ‚Üí ../data/processed/hdfcbank_model_ready.csv
   Train size: 1168, Test size: 292
