<a href="https://colab.research.google.com/github/daksh-l/gradient-gains/blob/main/Team_E.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingRegressor

def prepare_data(df, is_train=True):
    df = df.copy()

    if is_train:
        df['Next_Close'] = df['Close'].shift(-1)
        df['TARGET_LOG_RET'] = np.log(df['Next_Close'] / df['Close'])

        for lag in range(30):
            df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
            df[f'High_Lag_{lag}'] = df['High'].shift(lag)
            df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
            df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
            df[f'Open_Lag_{lag}'] = df['Open'].shift(lag)

        df = df.dropna()

    lags = range(30)
    P_close = df[[f'Close_Lag_{i}' for i in lags]].values
    P_high = df[[f'High_Lag_{i}' for i in lags]].values
    P_low = df[[f'Low_Lag_{i}' for i in lags]].values
    P_vol = df[[f'Volume_Lag_{i}' for i in lags]].values
    P_open = df[[f'Open_Lag_{i}' for i in lags]].values

    low_14 = np.min(P_low[:, :14], axis=1)
    high_14 = np.max(P_high[:, :14], axis=1)
    close_0 = P_close[:, 0]
    range_14 = high_14 - low_14
    range_14[range_14 == 0] = 1e-8
    df['Stoch_K'] = ((close_0 - low_14) / range_14) * 100

    diffs = np.diff(P_close[:, :15][:, ::-1], axis=1)
    gains = np.maximum(diffs, 0)
    losses = np.maximum(-diffs, 0)
    avg_gain = np.mean(gains, axis=1)
    avg_loss = np.mean(losses, axis=1)
    rs = np.divide(avg_gain, avg_loss, out=np.zeros_like(avg_gain), where=avg_loss != 0)
    df['RSI'] = 100 - (100 / (1 + rs))

    ma_12 = np.mean(P_close[:, :12], axis=1)
    ma_26 = np.mean(P_close[:, :26], axis=1)
    df['MACD'] = ma_12 - ma_26

    ma_20 = np.mean(P_close[:, :20], axis=1)
    std_20 = np.std(P_close[:, :20], axis=1)
    df['BB_Pos'] = (close_0 - ma_20) / (2 * std_20 + 1e-8)
    df['BB_Width'] = 4 * std_20 / (ma_20 + 1e-8)

    vol_ma_5 = np.mean(P_vol[:, :5], axis=1) + 1e-8
    df['Vol_Ratio'] = P_vol[:, 0] / vol_ma_5

    body_top = np.maximum(P_close[:, 0], P_open[:, 0])
    body_bot = np.minimum(P_close[:, 0], P_open[:, 0])
    df['Upper_Shadow'] = P_high[:, 0] - body_top
    df['Lower_Shadow'] = body_bot - P_low[:, 0]
    df['Body_Size'] = P_close[:, 0] - P_open[:, 0]

    return df

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_final = prepare_data(train_df, is_train=True)
test_final = prepare_data(test_df, is_train=False)

features = [
    'Stoch_K', 'RSI', 'MACD', 'BB_Pos', 'BB_Width',
    'Vol_Ratio', 'Upper_Shadow', 'Lower_Shadow', 'Body_Size'
]

X = train_final[features].values
y = train_final['TARGET_LOG_RET'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_test = test_final[features].values
X_test_scaled = scaler.transform(X_test)

ridge = RidgeCV(alphas=[0.1, 1.0, 10.0, 100.0])

lgbm = lgb.LGBMRegressor(
    n_estimators=500,
    learning_rate=0.05,
    num_leaves=31,
    random_state=42,
    force_col_wise=True,
    verbose=-1
)

ensemble = VotingRegressor(
    estimators=[('ridge', ridge), ('lgbm', lgbm)],
    weights=[0.4, 0.6]
)

ensemble.fit(X_scaled, y)

pred_log_ret = ensemble.predict(X_test_scaled)

current_price = test_final['Close_Lag_0'].values
final_pred = current_price * np.exp(pred_log_ret)

submission = pd.DataFrame({
    'ID': test_df['ID'],
    'TARGET': final_pred
})

submission.to_csv('submission.csv', index=False)

  df[f'High_Lag_{lag}'] = df['High'].shift(lag)
  df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
  df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
  df[f'Open_Lag_{lag}'] = df['Open'].shift(lag)
  df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
  df[f'High_Lag_{lag}'] = df['High'].shift(lag)
  df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
  df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
  df[f'Open_Lag_{lag}'] = df['Open'].shift(lag)
  df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
  df[f'High_Lag_{lag}'] = df['High'].shift(lag)
  df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
  df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
  df[f'Open_Lag_{lag}'] = df['Open'].shift(lag)
  df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
  df[f'High_Lag_{lag}'] = df['High'].shift(lag)
  df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
  df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
  df[f'Open_Lag_{lag}'] = df['Open'].shift(lag)
  df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
  df[f'High_Lag_{lag}'] 