In [None]:
!pip uninstall numpy scipy -y

In [None]:
!pip install numpy==1.26.4 scipy==1.13.1
# !pip install numpy scipy

In [None]:
!pip install smartapi-python
!pip install logzero
!pip install pyotp

In [None]:
!pip install smartapi-python --upgrade

In [None]:
!pip install pandas-ta

In [None]:
!pip install optuna

***OOPs Code***

In [None]:
import optuna
optuna.logging.set_verbosity(optuna.logging.ERROR)
import json
import urllib.request
import pandas as pd
from SmartApi.smartConnect import SmartConnect
from pyotp import TOTP
from scipy.stats import zscore
import numpy as np
import pandas_ta as ta
import lightgbm as lgb
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score, mean_squared_error
from datetime import datetime
from dateutil.relativedelta import relativedelta
from sklearn.impute import KNNImputer

In [None]:
class AngelOneAPI:
  def __init__(self, api_key, user_id, password, totp_key):
    self.api_key = api_key
    self.user_id = user_id
    self.password = password
    self.totp_key = totp_key
    self.client = SmartConnect(api_key=self.api_key)
    self.session_data = None
    self.instrument_list = None

  def login(self):
    totp = TOTP(self.totp_key).now()
    self.session_data = self.client.generateSession(self.user_id, self.password, totp)

  def fetch_instruments(self, url="https://margincalculator.angelbroking.com/OpenAPI_File/files/OpenAPIScripMaster.json"):
    response = urllib.request.urlopen(url)
    self.instrument_list = json.loads(response.read())

  def token_lookup(self, ticker, exchange="NSE"):
    for instrument in self.instrument_list:
      if instrument["name"] == ticker and instrument["exch_seg"] == exchange and instrument["symbol"].split('-')[-1] == "EQ":
          return instrument["token"]
    return None

  def symbol_lookup(self, token, exchange="NSE"):
    for instrument in self.instrument_list:
      if instrument["token"] == token and instrument["exch_seg"] == exchange and instrument["symbol"].split('-')[-1] == "EQ":
          return instrument["name"]
    return None

  def get_candle_data(self, symbol, from_date, to_date, interval="ONE_DAY", exchange="NSE"):
    token = self.token_lookup(symbol, exchange)
    if not token:
        print(f"Token not found for {symbol}")
        return None

    params = {
        "exchange": exchange,
        "symboltoken": str(token),
        "interval": interval,
        "fromdate": from_date,
        "todate": to_date
    }

    data = self.client.getCandleData(params)
    if "data" in data and data["data"]:
      df = pd.DataFrame(data["data"], columns=["datetime", "open", "high", "low", "close", "volume"])
      df["datetime"] = pd.to_datetime(df["datetime"])
      df['date'] = df['datetime'].dt.date
      df.drop(columns='datetime', inplace=True)
      df.set_index('date', inplace=True)
      return df
    else:
      print(f"No data returned for {symbol}")
      return None


class Dataset:
  def __init__(self, df):
    self.df = df.copy()

  def add_volume_indicators(self):
    self.df['trade_vol_inr'] = self.df[['close', 'volume']].prod(axis=1).div(1e3)
    self.df['z_trade_vol_inr'] = zscore(self.df['trade_vol_inr'])
    self.df['z_trade_vol_rank'] = self.df['z_trade_vol_inr'].rolling(window=21).mean().rank(ascending=False)

  def add_rsi(self, length=14):
    self.df['rsi'] = ta.rsi(self.df['close'], length=length)

  def Compute_ADX(self, period = 14):
    adx_df = ta.adx(self.df['high'], self.df['low'], self.df['close'], length = period)
    if adx_df is not None:
      adx_df = adx_df.rename(columns = {f'ADX_{period}':'adx', f'DMP_{period}':'+di', f'DMN_{period}':'-di'})
      self.df = pd.concat([self.df, adx_df], axis = 1)
    else:
      print(f"ADX could not be computed for period {period}. Not enough data.")

  def Compute_natr_atr(self, period = 14):
    natr = ta.natr(self.df['high'], self.df['low'], self.df['close'], length = period)
    atr = ta.atr(self.df['high'], self.df['low'], self.df['close'], length=period)
    self.df['natr'] = natr
    self.df['atr'] = atr

  def super_trend(self, period = 14, multiplier = 3):
    self.df = self.df.sort_index()
    high, low, close = self.df['high'], self.df['low'], self.df['close']
    hl2 = (high + low) / 2
    atr = ta.atr(high, low, close, length = period)

    upper_band = hl2 + (multiplier*atr)
    lower_band = hl2 - (multiplier*atr)

    supertrend = np.full(len(self.df), np.nan)
    trend_dir = np.full(len(self.df), np.nan)

    # Check if atr calculation resulted in NaN for the initial period
    if not np.isnan(upper_band.iloc[period]):
        supertrend[period] = upper_band.iloc[period]
        trend_dir[period] = -1
    else:
        # Handle cases where initial ATR is NaN
        supertrend[period] = np.nan
        trend_dir[period] = np.nan


    for i in range(period + 1, len(self.df)):
      if not np.isnan(supertrend[i-1]): # Add this check
          if trend_dir[i-1] == 1:
            if close.iloc[i] > supertrend[i-1]:
              supertrend[i] = max(lower_band.iloc[i], supertrend[i-1])
              trend_dir[i] = 1
            else:
              supertrend[i] = upper_band.iloc[i]
              trend_dir[i] = -1
          else:
            if close.iloc[i] < supertrend[i-1]:
              supertrend[i] = min(upper_band.iloc[i], supertrend[i-1])
              trend_dir[i] = -1
            else:
              supertrend[i] = lower_band.iloc[i]
              trend_dir[i] = 1
      else: # If previous supertrend was NaN, current is also NaN
          supertrend[i] = np.nan
          trend_dir[i] = np.nan


    self.df['supertrend'] = supertrend
    self.df['trend_dir'] = trend_dir

  def hist_returns(self):
    by_sym = self.df['close']
    for t in [1, 3, 5, 21]:
      col = f'r{t:02}'
      self.df[col] = by_sym.pct_change(t)


  def fwd_returns(self):
    for t in [1, 3, 5, 21]:
        col = f'r{t:02}'
        fwd_col = f'{col}_fwd'
        self.df[fwd_col] = self.df[col].shift(-t)

        # Interpolate + extrapolate in-place
        self.df[fwd_col] = self.df[fwd_col].interpolate(method='linear', limit_direction='both')




  def features(self):
    self.df['daily_return'] = self.df['close'].pct_change()

    self.df['returns_ma05'] = self.df['daily_return'].transform(
        lambda x: x.shift(1).rolling(5).mean()
    )
    self.df['volatility_21'] = self.df['daily_return'].transform(
        lambda x: x.shift(1).rolling(21).std()
    )

    self.df['ema_12'] = self.df['close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
    self.df['ema_26'] = self.df['close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
    self.df['macd'] = self.df['ema_12'] - self.df['ema_26']

    self.df['bbw'] = self.df['close'].transform(
        lambda x: (x.shift(1).rolling(20).mean() + 2 * x.shift(1).rolling(20).std())
                  - (x.shift(1).rolling(20).mean() - 2 * x.shift(1).rolling(20).std())
    )

    self.df["rolling_vol_adj_return"] = self.df['daily_return'] / self.df["atr"]

    self.df['rolling_sharpe_10'] = self.df['daily_return'].transform(
        lambda x: x.shift(1).rolling(10).apply(lambda y: np.mean(y) / (np.std(y) + 1e-6))
    )

    self.df['cum_return'] = self.df['daily_return'].cumsum()

    self.df['max_drawdown_21'] = self.df['cum_return'].transform(
        lambda x: (x - x.shift(1).rolling(21).max()) / (x.shift(1).rolling(21).max() + 1e-6)
    )

    self.df['volume_zscore'] = self.df['volume'].transform(
        lambda x: (x.shift(1).rolling(20).mean() - x.shift(1).rolling(100).mean()) / (x.shift(1).rolling(100).std())
    )

    self.df['log_return_1d'] = self.df['close'].transform(lambda x: np.log(x / x.shift(1)))

    self.df['atr_percent'] = self.df['atr'] / self.df['close']


  def outliers(self):
    outliers = self.df[self.df.r01 > 1].index.unique()
    self.df = self.df.drop(outliers)

  def create_labels(self, long_threshold=0.25, short_threshold=0.25, transaction_cost=0.0003):
    returns = self.df['r01_fwd']
    q_long = returns.quantile(1 - long_threshold)
    q_short = returns.quantile(short_threshold)

    self.df['labels'] = 1  # Neutral
    self.df.loc[returns > q_long, 'labels'] = 2  # Long
    self.df.loc[returns < q_short, 'labels'] = 0


  @staticmethod
  def remap_labels(series):
    return series.map({-1: 0, 0: 1, 1: 2})

  def get_result(self):
    self.df = self.df.dropna()
    # print(self.df)
    return self.df



#class for Classification
class LGBTimeSeriesRegressor():
  def __init__(self, df):
    self.df = df.copy()

  def features_split(self):
    features = [col for col in self.df.columns if col not in ['r01_fwd', 'r05_fwd', 'r21_fwd', 'labels','open', 'close', 'low', 'high', 'volume']]
    self.X = self.df[features]
    self.y = self.df['labels']
    return self.X, self.y

  def train_test_split(self, train_years=7, val_years=1, test_years=1):
    # print(self.X)
    last_date = self.X.index.max()

    test_start = last_date - pd.DateOffset(years=test_years) + pd.DateOffset(days=1)
    val_start = test_start - pd.DateOffset(years=val_years)
    train_start = val_start - pd.DateOffset(years=train_years)

    train_mask = (self.X.index >= train_start) & (self.X.index < val_start)
    val_mask = (self.X.index >= val_start) & (self.X.index < test_start)
    test_mask = (self.X.index >= test_start)

    self.X_train, self.y_train = self.X[train_mask], self.y[train_mask]
    self.X_val, self.y_val = self.X[val_mask], self.y[val_mask]
    self.X_test, self.y_test = self.X[test_mask], self.y[test_mask]


    # print(self.X_test)
    return self.X_train, self.X_val, self.X_test, self.y_test



  def calc_weights(self, y):
    class_counts = y.value_counts()
    total = sum(class_counts)

    return y.map({cls: total / (len(class_counts) * count) for cls, count in class_counts.items()})


  def f1_custom(self, preds, data):
    labels = data.get_label()
    preds = preds.argmax(axis=1)
    return 'f1_macro', f1_score(labels, preds, average='macro'), True

  def objective(self, trial):
    params = {
        'objective': 'multiclass',
        'num_class': 3,
        'metric': 'custom',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 32, 512),
        'learning_rate': trial.suggest_float('learning_rate', 0.002, 0.1, log = True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_child_samples':trial.suggest_int('min_child_samples', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
        'min_split_gain': trial.suggest_float('min_split_gain', 0.01, 0.1),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 250, 300),
        'random_state':42,
        'n_jobs': -1,
        'verbosity':-1,
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', None]),
        'feature_fraction_bynode': trial.suggest_float('feature_fraction_bynode', 0.5, 1.0),
    }

    tscv = TimeSeriesSplit(n_splits=5)
    scores = []

    for fold, (train_idx, val_idx) in enumerate(tscv.split(self.X_train)):
      train_idx = train_idx[train_idx <= val_idx[0] - 30]  # purge lookahead leak

      # Drop NaNs from train and validation subsets
      X_train_fold = self.X_train.iloc[train_idx].dropna()
      y_train_fold = self.y_train.iloc[train_idx].dropna()
      X_val_fold = self.X_train.iloc[val_idx].dropna()
      y_val_fold = self.y_train.iloc[val_idx].dropna()


      weights = self.calc_weights(y_train_fold)
      dtrain = lgb.Dataset(X_train_fold, y_train_fold, weight=weights)
      dval = lgb.Dataset(X_val_fold, y_val_fold, reference=dtrain)

      model = lgb.train(params,
                        dtrain,
                        valid_sets=[dtrain, dval],
                        feval=self.f1_custom,
                        callbacks=[
                            lgb.early_stopping(50, verbose=False),
                            lgb.log_evaluation(0)
                        ])

      preds = model.predict(X_val_fold).argmax(axis=1)
      score = f1_score(y_val_fold, preds, average='macro')
      scores.append(score)

    return np.mean(scores)

  def run_optuna(self, n_trials=20):
    self.study = optuna.create_study(direction='maximize')
    self.study.optimize(self.objective, n_trials=n_trials, show_progress_bar=False, catch=(Exception,))
    return self.study.best_params

  def train_final_model(self):
    best_params.update({
        'objective': 'multiclass',
        'num_class': 3,
        'metric': 'custom',
        'verbosity': -1
    })

    train_data = lgb.Dataset(self.X_train, self.y_train, weight=self.calc_weights(self.y_train))
    val_data = lgb.Dataset(self.X_val, self.y_val, reference=train_data)

    self.final_model = lgb.train(best_params,
                                  train_data,
                                  valid_sets=[train_data, val_data],
                                  feval=self.f1_custom,
                                  num_boost_round=1000,
                                  callbacks=[
                                      lgb.early_stopping(50, verbose = False),
                                      lgb.log_evaluation(0)
                                  ])
    return self.final_model

  # def adjust_predictions(self, pred_prob, short_thresh = 0.65, long_thresh = 0.60):
  #   adjusted_preds = []
  #   for prob in pred_prob:
  #     if prob[0] > short_thresh:
  #       adjusted_preds.append(0)
  #     elif prob[2] > long_thresh:
  #       adjusted_preds.append(2)
  #     else:
  #       adjusted_preds.append(1)
  #   return np.array(adjusted_preds)

  def adjust_predictions(self, pred_prob):
    return np.argmax(pred_prob, axis=1)


  def validation(self):
    val_probs = self.final_model.predict(self.X_val)
    val_preds = self.adjust_predictions(val_probs)
    # print(classification_report(self.y_val, val_preds, target_names=['Short', 'Neutral', 'Long']))
    # print(confusion_matrix(self.y_val, val_preds))

    return val_probs

  def training(self):
    train_probs = self.final_model.predict(self.X_train)
    train_preds = self.adjust_predictions(train_probs)
    # print(classification_report(self.y_train, train_preds, target_names=['Short', 'Neutral', 'Long']))
    # print(confusion_matrix(self.y_train, train_preds))

    return train_probs

class Strategy():
  def __init__(self, df, val_probs, train_probs, X_train, X_val, X_test, y_test, final_model, regressor, transaction_cost):
    self.df = df.copy()
    self.val_probs = val_probs
    self.train_probs = train_probs
    self.X_train = X_train
    self.X_val = X_val
    self.X_test = X_test
    self.y_test = y_test
    self.transaction_cost = transaction_cost
    self.final_model = final_model
    self.regressor = regressor

  def simulate_strategy(self, pred_probs, true_returns, cost_per_trade):
    volatility = true_returns.rolling(window = 20).std()
    cost_per_trade *= volatility.mean()

    short_prob, neutral_prob, long_prob = pred_probs.T
    positions = np.clip(np.where(long_prob > 0.4, 1.2, np.where(short_prob > 0.35, -1.2, long_prob-short_prob)), -1.5, 1.5)

    trades = np.abs(np.diff(positions, prepend=0))
    Strategy_returns = positions*true_returns - trades*cost_per_trade

    return Strategy_returns, Strategy_returns.mean()/Strategy_returns.std(), (Strategy_returns > 0).mean()

  def valid_simulate(self):
    val_returns = self.df.loc[self.X_val.index, 'r01_fwd']
    val_strategy_returns, val_sharpe, val_win_rate = self.simulate_strategy(self.val_probs, val_returns, self.transaction_cost)

    # print(f"\nValidation Strategy Performance:")
    # print(f"Sharpe Ratio: {val_sharpe:.4f}")
    # print(f"Win Rate: {val_win_rate:.2%}")

  def training_simulate(self):
    train_returns = self.df.loc[self.X_train.index, 'r01_fwd']
    train_strategy_returns, train_sharpe, train_win_rate = self.simulate_strategy(self.train_probs, train_returns, self.transaction_cost)

    # print(f"\nTraining Strategy Performance:")
    # print(f"Sharpe Ratio: {train_sharpe:.4f}")
    # print(f"Win Rate: {train_win_rate:.2%}")

  def testing(self):
    test_probs = self.final_model.predict(self.X_test)
    test_preds = self.regressor.adjust_predictions(test_probs)

    # print("Classification report")
    # print(classification_report(self.y_test, test_preds, target_names=['Short', 'Neutral', 'Long']))


    test_returns = self.df.loc[self.X_test.index, 'r01_fwd']
    test_strategy_returns, test_sharpe, test_win_rate = self.simulate_strategy(test_probs, test_returns, self.transaction_cost)

    # print(f"\nTesting Strategy Performance:")
    # print(f"Sharpe Ratio: {test_sharpe:.4f}")
    # print(f"Win Rate: {test_win_rate:.2%}")

    return test_preds



#Class for regression
class LGBTimeSeriesRegressor_Regression():
  def __init__(self, df, target_col='r01_fwd', cost_per_trade=0.0003):
    self.df = df.copy()
    self.target_col = target_col
    self.cost_per_trade = cost_per_trade
    self.features = []
    self.final_model = None
    self.study = None
    self.best_params = None
    self.test_results = None

  def features_split(self):
    exclude = ['open', 'close', 'low', 'high', 'volume', 'labels',
                'r05_fwd', 'r21_fwd'] # Removed 'r01_fwd' from exclude list
    self.features = [col for col in self.df.columns if col not in exclude]
    self.X = self.df[self.features]
    self.y = self.df[self.target_col]
    return self.X, self.y

  def train_test_split(self, train_years=7, val_years=1, test_years=1):
    date_index = self.X.index  # Already datetime64[ns]
    # print(self.X)
    last_date = date_index.max()

    # Compute dynamic rolling window boundaries
    test_start = last_date - pd.DateOffset(years=test_years) + pd.DateOffset(days=1)
    val_start = test_start - pd.DateOffset(years=val_years)
    train_start = val_start - pd.DateOffset(years=train_years)

    # Boolean masks
    train_mask = (date_index >= train_start) & (date_index < val_start)
    val_mask = (date_index >= val_start) & (date_index < test_start)
    test_mask = (date_index >= test_start)

    # Split X and y
    self.X_train, self.y_train = self.X[train_mask], self.y[train_mask]
    self.X_val, self.y_val = self.X[val_mask], self.y[val_mask]
    self.X_test, self.y_test = self.X[test_mask], self.y[test_mask]

    # Replace infs with NaNs
    self.X_train = self.X_train.replace([np.inf, -np.inf], np.nan)
    self.X_val = self.X_val.replace([np.inf, -np.inf], np.nan)
    self.X_test = self.X_test.replace([np.inf, -np.inf], np.nan)

    # Drop rows where y_train is NaN
    mask = ~self.y_train.isna()
    self.X_train, self.y_train = self.X_train[mask], self.y_train[mask]

    # Impute missing values (fit only on train, transform others)
    imputer = SimpleImputer(strategy='median')
    self.X_train = pd.DataFrame(imputer.fit_transform(self.X_train), index=self.X_train.index, columns=self.X_train.columns)
    self.X_val = pd.DataFrame(imputer.transform(self.X_val), index=self.X_val.index, columns=self.X_val.columns)
    self.X_test = pd.DataFrame(imputer.transform(self.X_test), index=self.X_test.index, columns=self.X_test.columns)


    # print(self.X_test)
    return self.X_train, self.X_val, self.X_test, self.y_test




  def rmse(self, y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

  def compute_sharpe_ratio(self, y_true, y_pred):
    returns = y_true * np.sign(y_pred)
    return returns.mean() / returns.std() if returns.std() != 0 else 0

  def compute_strategy_win_rate(self, y_true, y_pred):
    signal = np.sign(y_pred)
    strategy_returns = y_true * signal - self.cost_per_trade
    return (strategy_returns > 0).mean()

  def compute_accuracy(self, y_true, y_pred):
    return np.mean(np.sign(y_true) == np.sign(y_pred))

  def objective(self, trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 32, 512),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.01, log=True),
        'max_depth': trial.suggest_int('max_depth', 5, 10),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0),
        'random_state': 42,
        'verbosity': -1,
        'n_jobs': -1
    }

    tscv = TimeSeriesSplit(n_splits=5)
    rmses = []

    for train_idx, val_idx in tscv.split(self.X_train):
        lgb_train = lgb.Dataset(self.X_train.iloc[train_idx], self.y_train.iloc[train_idx])
        lgb_val = lgb.Dataset(self.X_train.iloc[val_idx], self.y_train.iloc[val_idx])

        model = lgb.train(params, lgb_train, valid_sets=[lgb_val],
                          callbacks=[lgb.early_stopping(100, verbose=False), lgb.log_evaluation(0)])
        preds = model.predict(self.X_train.iloc[val_idx])
        rmses.append(self.rmse(self.y_train.iloc[val_idx], preds))

    return np.mean(rmses)

  def run_optuna(self, n_trials=15):
    self.study = optuna.create_study(direction='minimize')
    self.study.optimize(self.objective, n_trials=n_trials, show_progress_bar=False)
    self.best_params = self.study.best_trial.params
    return self.best_params

  def train_final_model(self, use_val=True):
    params = {
            **self.best_params,
            'objective': 'regression',
            'metric': 'rmse',
            'verbosity': -1
        }

    if use_val:
        train_data = lgb.Dataset(self.X_train, self.y_train)
        val_data = lgb.Dataset(self.X_val, self.y_val, reference=train_data)
    else:
        X_trainval = pd.concat([self.X_train, self.X_val])
        y_trainval = pd.concat([self.y_train, self.y_val])
        train_data = lgb.Dataset(X_trainval, y_trainval)
        val_data = train_data  # dummy

    self.final_model = lgb.train(params, train_data, valid_sets=[val_data],
                                  callbacks=[lgb.early_stopping(100, verbose=False), lgb.log_evaluation(0)])
    return self.final_model

  def evaluate_model(self, X, y, dataset_name=""):
    preds = self.final_model.predict(X)
    r2 = r2_score(y, preds)
    rmse_val = self.rmse(y, preds)
    sharpe = self.compute_sharpe_ratio(y, preds)
    win_rate = self.compute_strategy_win_rate(y, preds)
    # accuracy = self.compute_accuracy(y, preds)

    # print(f"\n{dataset_name} Performance:")
    # print(f"RMSE: {rmse_val:.6f}")
    # print(f"RÂ² Score: {r2:.6f}")
    # print(f"Sharpe Ratio: {sharpe:.6f}")
    # print(f"Win Rate: {win_rate * 100:.2f}%")
    # print(f"Accuracy: {accuracy * 100:.2f}%")

    return preds

  def generate_strategy_results(self, y_preds, y_true, X):
    y_true = y_true.loc[X.index]
    X = X.loc[y_true.index]
    self.test_results = pd.DataFrame({
        'Predicted_Return': y_preds,
        'Actual_Return': y_true
    }, index=X.index)
    self.test_results['Position'] = np.sign(self.test_results['Predicted_Return'])
    self.test_results['Strategy_Return'] = self.test_results['Actual_Return'] * self.test_results['Position']
    return self.test_results

In [None]:
api_key = ""
user_id = ""
password = ""
key = ""

angel = AngelOneAPI(api_key, user_id, password, key)
angel.login()
angel.fetch_instruments()

stocks_with_long_signals = []
stocks_with_short_signals = []

ticker_list = [
    'ABB', 'ADANIENT', 'ADANIPORTS', 'ADANIPOWER', 'AMBUJACEM', 'APOLLOHOSP', 'ASIANPAINT', 'AXISBANK', 'BAJAJ-AUTO',
    'BAJFINANCE', 'BAJAJFINSV', 'BAJAJHLDNG', 'BAJAJHFL', 'BANKBARODA', 'BEL', 'BPCL', 'BHARTIARTL', 'BOSCHLTD', 'BRITANNIA',
    'CGPOWER', 'CANBK', 'CHOLAFIN', 'CIPLA', 'COALINDIA', 'COALINDIA', 'DLF', 'DABUR', 'DIVISLAB', 'DRREDDY', 'EICHERMOT', 'ETERNAL',
    'GAIL', 'GODREJCP', 'GRASIM', 'HCLTECH', 'HDFCBANK', 'HDFCLIFE', 'HAVELLS', 'HEROMOTOCO', 'HINDALCO', 'HAL', 'HINDUNILVR', 'HYUNDAI', 'ICICIBANK',
    'ICICIGI', 'ICICIPRULI', 'ITC', 'INDHOTEL', 'IOC', 'IRFC', 'INDUSINDBK', 'NAUKRI', 'INFY', 'INDIGO', 'JSWENERGY', 'JSWSTEEL',
    'JINDALSTEL', 'KOTAKBANK', 'LTIM','LT', 'LICI', 'M&M', 'MARUTI', 'NTPC', 'NESTLEIND', 'ONGC', 'PIDILITIND',
    'PFC', 'POWERGRID', 'PNB', 'RECLTD', 'RELIANCE', 'SBILIFE', 'MOTHERSON', 'SHREECEM', 'SHRIRAMFIN', 'SIEMENS', 'SBIN', 'SUNPHARMA', 'TVSMOTOR'
    'TCS', 'TATACONSUM', 'TATAMOTORS', 'TATAPOWER', 'TATASTEEL', 'TECHM', 'TITAN', 'TORNTPHARM', 'TRENT', 'ULTRACEMCO', 'UNITDSPR', 'VBL', 'VEDL', 'WIPRO', 'ZYDUSLIFE'
]
df_dict = {}
results = {}

for ticker in ticker_list:
  end_time = datetime.now().replace(hour=15, minute=30, second=0, microsecond=0)
  mid_time = end_time - relativedelta(years=5)

  start_time = end_time - relativedelta(years=10)
  start_time = start_time.replace(hour=9, minute=15)
  mid_time = mid_time.replace(hour=15, minute=30)
  end_start = mid_time.replace(hour=9, minute=15)

  start_str = start_time.strftime("%Y-%m-%d %H:%M")
  mid_str = mid_time.strftime("%Y-%m-%d %H:%M")
  end_start_str = end_start.strftime("%Y-%m-%d %H:%M")
  end_str = end_time.strftime("%Y-%m-%d %H:%M")


  df1 = angel.get_candle_data(ticker, start_str, mid_str)
  df2 = angel.get_candle_data(ticker, end_start_str, end_str)

  df1.index = pd.to_datetime(df1.index, errors='coerce')
  df2.index = pd.to_datetime(df2.index, errors='coerce')

  df = pd.concat([df1, df2])
  # print(df)

  if df is not None:
      analyzer = Dataset(df)
      analyzer.add_volume_indicators()
      analyzer.add_rsi()
      analyzer.Compute_ADX()
      analyzer.Compute_natr_atr()
      analyzer.super_trend()
      analyzer.hist_returns()
      analyzer.fwd_returns()
      analyzer.features()
      analyzer.outliers()
      analyzer.create_labels()
      # analyzer.remap_labels()
      df_dict[ticker] = analyzer.get_result()
      # print(df_dict[ticker])
      # print(df_dict[ticker].columns.tolist())

      LGB = LGBTimeSeriesRegressor(df_dict[ticker])
      X, y = LGB.features_split()
      X_train, X_val, X_test, y_test = LGB.train_test_split()
      best_params = LGB.run_optuna()
      final_model = LGB.train_final_model()
      # print(f"Result for {ticker}: ")
      val_prob = LGB.validation()
      train_prob = LGB.training()

      # print(X_test)

      # Pass the 'r01_fwd' for the test set index to the Strategy class
      test_returns_for_strategy = df_dict[ticker].loc[X_test.index, 'r01_fwd']
      strategy = Strategy(df_dict[ticker], val_prob, train_prob, X_train, X_val, X_test, y_test, final_model, LGB, 0.0003)
      strategy.valid_simulate()
      strategy.training_simulate()
      y_preds_classification = strategy.testing()



      reg = LGBTimeSeriesRegressor_Regression(df_dict[ticker], target_col='r01_fwd')
      X, y = reg.features_split()
      reg.train_test_split()
      reg.run_optuna(n_trials=20)
      reg.train_final_model(use_val=True)
      reg.evaluate_model(reg.X_train, reg.y_train, "Train")
      reg.evaluate_model(reg.X_val, reg.y_val, "Validation")
      reg.evaluate_model(reg.X_test, reg.y_test.dropna(), "Test")
      reg.train_final_model(use_val=False)
      y_test_preds = reg.evaluate_model(reg.X_test, reg.y_test.dropna(), "Test (retrained)")
      strategy_df = reg.generate_strategy_results(y_test_preds, reg.y_test, reg.X_test)
      y_preds_regression = strategy_df['Position']
      # print(strategy_df.tail(10))

      y_pred_classifcation = pd.Series(y_preds_classification, index=y_preds_regression.index)

      combined = pd.DataFrame({
          'reg': y_preds_regression,
          'class': y_pred_classifcation
      })

      agreement = np.where(
            (y_preds_regression == 1) & (y_pred_classifcation == 2), 1,
            np.where((y_preds_regression == -1) & (y_pred_classifcation == 0), -1, 0)
        )

      print()
      print(f"The result for {ticker}")
      print(agreement[-1])
      if agreement[-1] == -1:
        stocks_with_short_signals.append(ticker)
      elif agreement[-1] == 1:
        stocks_with_long_signals.append(ticker)
      print()
      print()

print(stocks_with_short_signals)
print()
print(stocks_with_long_signals)