## Strategy
How will Strategy module will be used:

It will take a:
- df (the start and the ending date will be provided within the dataframe)
- type of objective function to use ('Sharpe Ratio', 'Multiple', ... any metrics)


It will contain methods:
- that will perform dynamic universe selection
- that will contain the trading strategy (will take the parameters as input)
- that will optimize for the best parameters given the objective function (will call the trading strategy method)\
-> Make sure to enforce the use of discrete parameters (by using an integer space (not real))\
-> Use BayesOptCV (cross validation, not Bayesian Optimization)
- that will perform the walk forward analysis (from sklearn.model_selection import TimeSeriesSplit)

It will output the strategy return column, position, cumulative return, trades, sessions, cumulative session return.


======================================================

Practical Recommendations
Low-Dimensional Problems (<5 dimensions):

init_points: 5–10
n_iter: 10–30
Moderate-Dimensional Problems (5–10 dimensions):

init_points: 10–15
n_iter: 30–50
High-Dimensional Problems (>10 dimensions):

Bayesian optimization might struggle due to the curse of dimensionality. Consider alternatives like random search or evolutionary algorithms if dimensions are very high.

======================================================

The key is to perform separate walk-forward analysis for both the strategies and the rebalancing process

---

In [8]:
import requests
import json
import math
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from qgridnext import show_grid
from datetime import datetime, timedelta
import sys  
import os
import pandas_ta as ta
import sklearn as sk

# Ensure the directories are in the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'Data_Management'))) #We have a double .. as we are in the Strategy subfolder
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'Universe_Selection')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'Signal_Generation')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'Risk_Management')))

# Import the modules
from data import Data
from calculations import Calculations, Metrics
from coarse import Coarse_1 as Coarse
from fine import Fine_1 as Fine
from entry_signal import Trend_Following, Mean_Reversion
from tail_risk import Stop_Loss, Take_Profit
from manage_trade import Manage_Trade
from position import Position

                                close  creturns      high  log_return  \
date                coin                                                
2020-12-31 19:00:00 BTCUSDT  29331.69       NaN  29600.00         NaN   
                    ETHUSDT    728.91       NaN    749.00         NaN   
2021-01-01 19:00:00 BTCUSDT  32178.33  1.097050  33300.00    0.092625   
                    ETHUSDT    774.56  1.062628    787.69    0.060745   
2021-01-02 19:00:00 BTCUSDT  33000.05  1.125065  34778.11    0.025216   
                    ETHUSDT    978.28  1.342114   1011.07    0.233501   
2021-01-03 19:00:00 BTCUSDT  31988.71  1.090585  33600.00   -0.031126   
                    ETHUSDT   1041.43  1.428750   1162.97    0.062554   
2021-01-04 19:00:00 BTCUSDT  33949.53  1.157435  34360.00    0.059492   
                    ETHUSDT   1099.56  1.508499   1134.60    0.054315   
2021-01-05 19:00:00 BTCUSDT  36769.36  1.253571  36939.21    0.079790   
                    ETHUSDT   1208.42  1.657845   1

In [None]:
symbols = ['BTCUSDT', 'ETHUSDT']
start_time = dt.datetime(2024, 1, 1)
end_time = dt.datetime(2024, 2, 1)
timeframes = ['1w', '1d', '4h', '1h', '30m','15m', '5m', '1m']
index = 3 #It is better to choose the highest frequency for the backtest to be able to downsample
timeframe = timeframes[index]
data = Data(symbols, timeframe, start_time, end_time).df

###### To Optimize ######
#All parameters:
all_frequency = ['1W', '1D', '4h','1h', '30min','15min', '5min', '1min'] #All possible frequencies for the resampling
low_freq_index = 1 #The index of the lowest frequency for the resampling
low_freq = all_frequency[low_freq_index] #The lowest frequency for the resampling
max_dollar_allocation = 10000
std_window = 2
mean_window = 2
ema_window = 2
high_freq_index = 3 #The index of the highest frequency for the resampling
high_freq = all_frequency[high_freq_index] #The highest frequency for the resampling
str_length = 10
str_mult = 3
_min_pos = 0
_max_pos = 1
sl_type = 'atr'
sl_ind_length = 14
sl_ind_mult = 3
sl_signal_only = True
fixed_sl = True
tp_type = 'rr'
tp_mult = 2
tp_ind_length = 0
tp_signal_only = True
fixed_tp = True
max_perc_risk = 0.01




#Downsample the data
cal = Calculations()
# df = cal.downsample(data, low_freq)

#Perform coarse analysis and filtering
coarse = Coarse()
df = coarse.volume_flag(data, max_dollar_allocation)
df = coarse.sort_by_volume(df)
df = coarse.sort_by_std(df, std_window, mean_window)
fine = Fine()
df = fine.above_ema(df, ema_window)

#Join the universe selection data with high frequency data

#Generate a signal
tf = Trend_Following()

_df = tf.supertrend_signals(df, str_length, str_mult)

pos = Position(_df, _min_pos, _max_pos)
_df = pos.initialize_position()
sl = Stop_Loss(_df, sl_type, sl_ind_length, sl_ind_mult, sl_signal_only)
_df = sl.apply_stop_loss(fixed_sl, plot = True)
tp = Take_Profit(_df, tp_type, tp_mult, tp_signal_only)
_df = tp.apply_take_profit(fixed_tp, plot = True)

_df = cal.merge_cols(_df, common = 'exit_signal', use_clip = True)
_df = pos.calculate_position(_df)

mt = Manage_Trade(_df)
_df = mt.erw_actual_allocation(max_perc_risk, max_dollar_allocation)

#########################

_df = cal.update_all(_df)
_df

AttributeError: 'Stop_Loss' object has no attribute 'indicator_length'

In [4]:
_df.session

date                        
2024-01-01 09:00:00  BTCUSDT     0.0
                     ETHUSDT     0.0
2024-01-01 10:00:00  BTCUSDT     0.0
                     ETHUSDT     0.0
2024-01-01 11:00:00  BTCUSDT     0.0
                                ... 
2024-01-31 16:00:00  ETHUSDT     7.0
2024-01-31 17:00:00  BTCUSDT    14.0
                     ETHUSDT     7.0
2024-01-31 18:00:00  BTCUSDT    14.0
                     ETHUSDT     8.0
Name: session, Length: 1460, dtype: float64

---

## Dynamic Universe Selection Strategy

current_universe = {}\
max_positions = 4
```pseudocode
for each row:

	if len(universe) < max_positions:

		current coins = coins at the current index
		available_coins = current_coins - universe => All coins not in the universe

		filter = above_ema, volume_rank < 50 (could be optimized), std_rank < 4 (should be FINAL Constant),	 entry_signal.shift() == 1
		potential_coins = available coins with applied filter => Potenatial coins that could be added to the universe
		potential_coins = potential_coins.sort(based on std_rank)
	
		missing_positions = max_positions - len(universe)
		to_be_added = potential_coins[:missing_positions]

		universe = universe + to_be_added

	for each coin in the current row:
		if coin is in universe:
			df[(time, coin), 'in_universe'] = True => mark it as part of the universe

	return df = df[df['in_universe']]

```



In [33]:
current_universe: set()
for time_index in df.index.get_level_values('time').unique():
        current_coins = df.loc[time_index].index.get_level_values('coin').unique()
        available_coins = set(current_coins) - current_universe
        print(available_coins)

{'Coin_7', 'Coin_8', 'Coin_6', 'Coin_1', 'Coin_3', 'Coin_4', 'Coin_2', 'Coin_0', 'Coin_5', 'Coin_9'}
{'Coin_7', 'Coin_8', 'Coin_6', 'Coin_1', 'Coin_3', 'Coin_4', 'Coin_2', 'Coin_0', 'Coin_5', 'Coin_9'}
{'Coin_7', 'Coin_8', 'Coin_6', 'Coin_1', 'Coin_3', 'Coin_4', 'Coin_2', 'Coin_0', 'Coin_5', 'Coin_9'}
{'Coin_7', 'Coin_8', 'Coin_6', 'Coin_1', 'Coin_3', 'Coin_4', 'Coin_2', 'Coin_0', 'Coin_5', 'Coin_9'}


Timestamp('2024-01-04 00:00:00')

In [37]:
max_positions: int = 4

temp_df = df.loc[(time_index, list(available_coins)), :].copy()
temp_df

filter_condition = (
    (temp_df['above_ema']) &
    (temp_df['volume_rank'] < 50) &
    (temp_df['std_rank'] < 4) &
    (temp_df['entry_signal'] == 1) # Shift entry signal here, fillna to handle first row
)
print(temp_df)
filter_condition

                   above_ema  volume_rank  std_rank  entry_signal
time       coin                                                  
2024-01-04 Coin_7       True           14         2             1
           Coin_8      False           95         3             0
           Coin_6      False          100         5             1
           Coin_1       True           90         1             1
           Coin_3      False           34         5             1
           Coin_4      False           74         3             1
           Coin_2      False           44         3             1
           Coin_0       True           18         3             1
           Coin_5      False           62         1             0
           Coin_9      False           48         1             1


time        coin  
2024-01-04  Coin_7     True
            Coin_8    False
            Coin_6    False
            Coin_1    False
            Coin_3    False
            Coin_4    False
            Coin_2    False
            Coin_0     True
            Coin_5    False
            Coin_9    False
dtype: bool

In [2]:
def create_test_df(num_times: int = 5, num_coins: int = 10):
    """Creates a multi-index DataFrame for testing."""
    times = pd.to_datetime(['2024-01-01'] + [pd.Timestamp('2024-01-01') + pd.Timedelta(days=i) for i in range(1, num_times)])
    coins = [f"Coin_{i}" for i in range(num_coins)]
    index = pd.MultiIndex.from_product([times, coins], names=['time', 'coin'])
    
    df = pd.DataFrame(index=index)
    df['above_ema'] = np.random.choice([True, False], size=len(df))
    df['volume_rank'] = np.random.randint(1, 100, size=len(df))
    df['std_rank'] = np.random.randint(1, 10, size=len(df))
    df['entry_signal'] = np.random.randint(0, 2, size=len(df)) # 0 or 1
    df['position'] = np.random.randint(0, 2, size=len(df))
    return df

# Example usage to create a test DataFrame:
test_df = create_test_df(num_times=4, num_coins=8)
test_df

Unnamed: 0_level_0,Unnamed: 1_level_0,above_ema,volume_rank,std_rank,entry_signal,position
time,coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,Coin_0,True,80,2,0,0
2024-01-01,Coin_1,False,55,7,1,1
2024-01-01,Coin_2,True,67,1,0,1
2024-01-01,Coin_3,False,80,8,0,1
2024-01-01,Coin_4,False,36,8,0,0
2024-01-01,Coin_5,True,87,3,0,1
2024-01-01,Coin_6,True,43,4,1,0
2024-01-01,Coin_7,True,3,7,1,1
2024-01-02,Coin_0,True,72,2,0,0
2024-01-02,Coin_1,True,72,1,1,0


In [8]:
import pandas as pd
import numpy as np
from typing import Set, List
import pandas as pd
from typing import List

def update_universe(df: pd.DataFrame, max_positions: int = 4) -> pd.Series:
    """Updates a DataFrame to track a dynamic universe of coins."""
    current_universe = set()
    df['in_universe'] = False

    for time_index in df.index.get_level_values(0).unique():
        # Remove coins that are no longer in the universe *for this time index*
        coins_to_remove = []
        for coin in current_universe:
            if (time_index, coin) in df.index and df.loc[(time_index, coin), 'position'] == 0:
                coins_to_remove.append(coin)
                df.loc[(time_index, coin), 'in_universe'] = False
        current_universe.difference_update(coins_to_remove) #use difference_update for set manipulation


        current_coins = df.loc[time_index].index
        available_coins = set(current_coins) - current_universe

        if len(current_universe) < max_positions and available_coins:
            temp_df = df.loc[(time_index, list(available_coins)), :].copy()

            # The shift was the main source of the bug. It was shifting across coins,
            # which is incorrect. We should not shift at all in this context.
            # The intention was likely to use the *previous* time slice data.
            # This is handled later.

            filter_condition = (
                (temp_df['above_ema']) &
                (temp_df['volume_rank'] < 50) &
                (temp_df['std_rank'] < 10) &
                (temp_df['entry_signal'] == 1)
            )

            potential_coins_df = temp_df[filter_condition]

            if not potential_coins_df.empty:
                potential_coins_df = potential_coins_df.sort_values(by='std_rank')
                potential_coins = set(potential_coins_df.index.get_level_values(1))
                missing_positions = max_positions - len(current_universe)
                to_be_added: List[str] = list(potential_coins)[:missing_positions]
                current_universe.update(to_be_added)

        df.loc[(time_index, list(current_universe)), 'in_universe'] = True
    length = len(df.index.get_level_values(1).unique())
    return df['in_universe'].shift(length), current_universe


test_df['in_universe'], current_universe = update_universe(test_df)

print(current_universe)
test_df

{'Coin_7', 'Coin_6'}


Unnamed: 0_level_0,Unnamed: 1_level_0,above_ema,volume_rank,std_rank,entry_signal,position,in_universe
time,coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-01,Coin_0,True,80,2,0,0,
2024-01-01,Coin_1,False,55,7,1,1,
2024-01-01,Coin_2,True,67,1,0,1,
2024-01-01,Coin_3,False,80,8,0,1,
2024-01-01,Coin_4,False,36,8,0,0,
2024-01-01,Coin_5,True,87,3,0,1,
2024-01-01,Coin_6,True,43,4,1,0,
2024-01-01,Coin_7,True,3,7,1,1,
2024-01-02,Coin_0,True,72,2,0,0,False
2024-01-02,Coin_1,True,72,1,1,0,False


---

### Objective Function