# A reinforcement learning approach to improve the performance of the Avellaneda-Stoikov market making algorithm

This paper presents a new approach to market making using deep reinforcement learning that is able to perform better than the benchmarks on different metrics of performance and inventory management.
Our main contributions will be a different approach to the agent actions that will configure the agent behavior based on the Avellaneda-Stoikov algorithm.
Another contribution is going to be how our benchmark is being genetically optimized on the test periods.
The last one will be how we reduce the number of states of the reinforcement learning algorithm using Feature importance.
Our high frequency trading simulator using L2 tick data and algorithms are openly accessible on a GitHub repository [1].
We are going to use real data to simulate and test our strategies on 30 days period.
The first day will be reserved for initial training and parameter tuning on the benchmark, the next 30 days will be used to show results.
During testing every 5 complete days , one of the benchmark algorithms is going to be optimized.


In [None]:
%matplotlib inline
!git pull
# import os;os.environ['LAMBDA_PARQUET_TICK_DB']=rf'X:\\'
session_name='CryptoMM'
import time
import datetime
import tqdm
import seaborn as sns
from trading_algorithms.compare_trading_algorithms import CompareTradingAlgorithms
sns.set_theme()

# Index:
* [Algorithm configuration](#algo_configuration)
* [Backtest configuration](#backtest_configuration)
* [Genetic Algorithm Parameter Tuning Benchmarks](#parameter_tuning)
* [Train DQN](#train)
    * [Memory Replay](#memory)
* [Test](#test)
* [Results](#results)
  * [Ratios](#ratios)
    * [Sharpe](#sharpe)
    * [Sortino](#sortino)
    * [Pnl Map](#pnl_map)
    * [Max Drawdown](#dd)

## Algorithm configuration <a class="anchor" id="algo_configuration)"></a>

In [None]:
start=time.time()
start_notebook=time.time()
from notebooks.lambda_imports import *
plt.style.use('seaborn')

# dill.load_session(session_name)
PARAMETER_TUNING_BENCHMARK_INITIAL=False
PARAMETER_TUNING_BENCHMARK_INITIAL_WITH_BEST=False#small fine tuning from best parameters
PARAMETER_TUNING_BENCHMARK=False#parameter tuning every parameter_tuning_every_n_days

TRAIN_DQN=True
TRAIN_CONTANT_DQN=True

TRAIN_DQN_LSTM=False

clean_initial_experience=True
clean_initial_experience_CONSTANT_DQN=True
clean_initial_experience_LSTM=False

#experiments configuration
INCLUDE_RNN=False
INCLUDE_ALPHA_CONSTANT=True
VS_MULTIPLES_AS=True


CALCULATE_Tt=False
TEST_EXPLORE_PROB=0.01
TEST_ITERATION_PREDICT_PERIOD_TRAIN = IterationsPeriodTime.FOUR_HOURS#train at the end of session to avoid losing testing time
TEST_ITERATION_TARGET_PERIOD_TRAIN = IterationsPeriodTime.END_OF_SESSION#train at the end of session to avoid losing testing time
parameter_tuning_every_n_days=5#is not working if PARAMETER_TUNING_BENCHMARK is false
elapsed = time.time()-start

In [None]:
seed_number=28220
np.random.seed(seed_number)
import random
random.seed(seed_number)

In [None]:

FIRST_HOUR=7.0
LAST_HOUR=19.0

QUANTITY=0.0001
POSITION_MULITPLIER_BASE=1.0/QUANTITY
timeHorizonSecondsDQN=15

Algorithm.DELAY_MS=65#65 ms of slippage
Algorithm.FEES_COMMISSIONS_INCLUDED=True
Algorithm.MULTITHREAD_CONFIGURATION=MultiThreadConfiguration.multithread

In [None]:
#PARAMETER TUNING CONFIG
ga_configuration_population_initial = 30
ga_configuration_generations_initial=5
ga_configuration_simultaneous_initial=15

In [None]:
#TRAIN CONFIG
iterations_train=25
#only for initial filling memory
fill_memory_max_iterations=3
algos_per_iteration = 10
simultaneous_algos=10


In [None]:
instrument_pk='btcusdt_coinbase'

equity_column_score_enum=ScoreEnum.total_pnl

testing_days=10

start_date_experiment=datetime.datetime(year=2022,month=10,day=25)#datetime.datetime.today()
start_date_experiment = start_date_experiment.replace(hour=0, minute=0,second=0,microsecond=0)
start_date_experiment

In [None]:
def get_time(reference_date,business_days,hour):
    from pandas.tseries.offsets import BDay
#     date=(reference_date-BDay(business_days))
    date=reference_date-datetime.timedelta(days = business_days)
    return datetime.datetime.fromtimestamp(date.timestamp()).replace(hour=hour, minute=0,second=0,microsecond=0)


In [None]:
if not CALCULATE_Tt:
    calculateTt=0
else:
    calculateTt=1

AS_parameters_min = {"riskAversion":0.01,'windowTick':5,'minutesChangeK': 1.0,'spreadMultiplier':0.75,'positionMultiplier':POSITION_MULITPLIER_BASE/1.25,'kDefault':-1,'calculateTt':calculateTt}
AS_parameters_max = {"riskAversion":0.9, 'windowTick':25,'minutesChangeK': 10.0,'spreadMultiplier':5.0,'positionMultiplier':POSITION_MULITPLIER_BASE*1.25,'kDefault':-1 ,'calculateTt':calculateTt}


In [None]:
#from feature importance
# most_significant_state_columns=[
# 'ask_price_0',
#  'ask_price_7',
#  'ask_price_9',
#  'ask_qty_0',
#  'ask_qty_6',
#  'ask_qty_9',
#  'bid_price_0',
#  'bid_price_7',
#  'bid_price_9',
#  'bid_qty_0',
#  'bid_qty_7',
#  'high_1',
#  'last_close_price_1',
#  'last_close_price_2',
#  'last_close_price_5',
#  'last_close_price_6',
#  'last_close_price_9',
#  'last_close_qty_0',
#  'microprice_0',
#  'microprice_5',
#  'microprice_6',
#  'microprice_9',
#  'midprice_0',
#  'midprice_2',
#  'midprice_3',
#  'midprice_4',
#  'midprice_5',
#  'midprice_7',
#  'midprice_8',
#  'spread_0',
#  'spread_7',
#  'spread_9'
# ]
most_significant_state_columns=[]

strategies_benchmark={}

## Backtest Configuration<a class="anchor" id="backtest_configuration"></a>

In [None]:


# start_date_train=datetime.datetime(year=2022, day=8, month=1,hour=int(FIRST_HOUR-1))#datetime.datetime(year=2020, day=8, month=12,hour=int(FIRST_HOUR-1))
# end_date_train=datetime.datetime(year=2022, day=8, month=1,hour=int(LAST_HOUR+1))

start_date_train=get_time(start_date_experiment,testing_days+2,int(FIRST_HOUR-1))#datetime.datetime(year=2021, day=6, month=9,hour=0)
end_date_train=get_time(start_date_experiment,testing_days+2,int(LAST_HOUR+1))#datetime.datetime(year=2021, day=6, month=9,hour=23)
start_date_train

In [None]:
def parameter_tuning_benchmark(benchmark_algorithm,start_date,end_date,best_parameter_tuning_dict=None):
    parameters_min = AS_parameters_min
    parameters_max = AS_parameters_max
    
    
    ga_configuration = GAConfiguration
    ga_configuration.score_column=ScoreEnum.sharpe
    ga_configuration.population =  ga_configuration_population_initial
    ga_configuration.generations =  ga_configuration_generations_initial


    
        
    if best_parameter_tuning_dict is not None:
        ga_configuration.population = 25
        ga_configuration_generations=5
        ga_configuration_simultaneous=25
#         pct_moving=0.2
#         min_pct=1-pct_moving
#         max_pct=1+pct_moving
#         parameters_min = {"risk_aversion":best_parameter_tuning_dict["risk_aversion"]*min_pct,'window_tick':int(best_parameter_tuning_dict["window_tick"]*min_pct),'k_default':best_parameter_tuning_dict["k_default"]*min_pct ,'spread_multiplier':best_parameter_tuning_dict["spread_multiplier"]*min_pct}
#         parameters_max = {"risk_aversion":best_parameter_tuning_dict["risk_aversion"]*max_pct,'window_tick':int(best_parameter_tuning_dict["window_tick"]*max_pct),'k_default':best_parameter_tuning_dict["k_default"]*max_pct ,'spread_multiplier':best_parameter_tuning_dict["spread_multiplier"]*max_pct}
        
        
#     ga_configuration.decay is going to be exponential sigma
    ga_configuration.crossover_prob=0.6
    
    best_param_dict, summary_df =benchmark_algorithm.parameter_tuning(
        instrument_pk=instrument_pk,
        start_date=start_date,
        end_date=end_date,
        parameters_min=parameters_min,
        parameters_max=parameters_max,
        generations=ga_configuration_generations_initial,
        ga_configuration=ga_configuration,
        max_simultaneous=ga_configuration_simultaneous_initial
    )
    benchmark_algorithm.set_parameters(best_param_dict)
    return benchmark_algorithm,best_param_dict

    
    

In [None]:
# constant spreads benchmarks
parameters_default_constant = {
    "quantityLimit": (QUANTITY*10),
    "quantity": (QUANTITY),
    "firstHour": (FIRST_HOUR),
    "lastHour": (LAST_HOUR),
    "level":(1),
    "seed":seed_number,
}
algorithm_info_constant='constant_spread'
benchmark_constant_spread = ConstantSpread(algorithm_info=algorithm_info_constant,parameters=parameters_default_constant)
strategies_benchmark[benchmark_constant_spread]=False

algorithm_info_linear_constant='linear_constant_spread'
benchmark_linear_constant_spread = LinearConstantSpread(algorithm_info=algorithm_info_linear_constant,parameters=parameters_default_constant)
strategies_benchmark[benchmark_linear_constant_spread]=False

benchmark_constant_spread_train = benchmark_constant_spread.test(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
    )

CompareTradingAlgorithms.get_final_metrics(output_test=benchmark_constant_spread_train,equity_column_score_enum=equity_column_score_enum)
          

benchmark_linear_constant_spread_train = benchmark_linear_constant_spread.test(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
    )

CompareTradingAlgorithms.get_final_metrics(output_test=benchmark_linear_constant_spread_train,equity_column_score_enum=equity_column_score_enum)
           


# AVELLANEDA STOIKOV - BENCHMARK
parameters_default_as = {
    # Avellaneda default
    "riskAversion": (0.9),

    "windowTick": (25),
    "minutesChangeK": (1),
    "quantity": (QUANTITY),
    "firstHour": (FIRST_HOUR),
    "lastHour": (LAST_HOUR),
    
    "kDefault": (-1),    
    "positionMultiplier": POSITION_MULITPLIER_BASE,
    "spreadMultiplier": (1.0),
    "seed":seed_number,
}
if not CALCULATE_Tt:
    parameters_default_as["calculateTt"]=0

algorithm_info_as='avellaneda_stoikov'
benchmark = AvellanedaStoikov(algorithm_info=algorithm_info_as,parameters=parameters_default_as)
print(f'AvellanedaStoikov with params {parameters_default_as}')
#### before results
benchmark_before_pt = benchmark.test(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
    )
CompareTradingAlgorithms.get_final_metrics(output_test=benchmark_before_pt,equity_column_score_enum=equity_column_score_enum)        

## Genetic Algorithm Parameter Tuning Benchmarks <a class="anchor" id="parameter_tuning"></a>

In [None]:
#from parameter tuning -> wil lbe used if PARAMETER_TUNING_BENCHMARK_INITIAL is False
# {'riskAversion': 0.01,
#  'windowTick': 25.0,
#  'minutesChangeK': 10.0,
#  'quantity': 0.0001,
#  'firstHour': 7.0,
#  'lastHour': 19.0,
#  'kDefault': -1.0,
#  'positionMultiplier': 8000.0,
#  'spreadMultiplier': 5.0,
#  'seed': 28220.0,
#  'calculateTt': 0.0,
#  'aDefault': -1.0,
#  'sigmaDefault': -1.0,
#  'spreadCalculation': 'Avellaneda',
#  'kCalculation': 'Alridge'}

best_avellaneda_param_dict={
     'riskAversion':  0.01,
     'windowTick': 25.0,
     'minutesChangeK': 10.0,#with k_default!=-1 doesn't make sense
    
     'quantity': QUANTITY,
     'kDefault': -1.0,
    
     'positionMultiplier': 8000,
     'spreadMultiplier': 5.0,
     'firstHour': FIRST_HOUR,
     'lastHour': LAST_HOUR,
     "seed":seed_number,
}


if not CALCULATE_Tt:
    best_avellaneda_param_dict["calculateTt"]=0


In [None]:
if PARAMETER_TUNING_BENCHMARK_INITIAL:
    print('launching parameter tuning ')
    benchmark,best_avellaneda_param_dict=parameter_tuning_benchmark(benchmark,start_date=start_date_train,end_date=end_date_train)
    print(f'finished parameter tuning with score {best_avellaneda_param_dict}')
elif PARAMETER_TUNING_BENCHMARK_INITIAL_WITH_BEST:
    print('launching parameter tuning with best dict ')
    benchmark,best_avellaneda_param_dict=parameter_tuning_benchmark(benchmark,start_date=start_date_train,end_date=end_date_train,best_parameter_tuning_dict=best_avellaneda_param_dict)
    print(f'finished parameter tuning with score {best_avellaneda_param_dict}')    
else:
    print(f'Parameter tuning loaded with params {best_avellaneda_param_dict}')
print(f'AvellanedaStoikov with params {best_avellaneda_param_dict}')
benchmark.set_parameters(best_avellaneda_param_dict)

In [None]:
best_avellaneda_param_dict

In [None]:
#set actions based on best params benchmark
# skewPricePctAction= [0.,0.05,-0.05,-0.1,0.1]
skewPricePctAction=[0.]
best_risk_aversion=best_avellaneda_param_dict['riskAversion']
best_window_tick=int(best_avellaneda_param_dict['windowTick'])
riskAversionAction= [0.01,.1,0.2,0.9]
# riskAversionAction=[max(min(best_risk_aversion/10,1.0),0.01),best_risk_aversion,max(min(best_risk_aversion*1.25,1.0),0.01),max(min(best_risk_aversion*1.5,1.0),0.01)] # [0.01,.1,0.2,0.9]#
riskAversionAction=list(set(riskAversionAction))
# riskAversionAction= [0.5, 0.01, 0.05,0.1,0.25]#list(set([0.01,best_risk_aversion/2,best_risk_aversion,0.9]))
windowsTickAction = list(set([int(best_window_tick),int(best_window_tick/2),int(best_window_tick*2)]))#[best_avellaneda_param_dict['window_tick']]

In [None]:
riskAversionAction

In [None]:
benchmark_after_pt=benchmark.test(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
    )
CompareTradingAlgorithms.get_final_metrics(output_test=benchmark_after_pt,equity_column_score_enum=equity_column_score_enum)

In [None]:
#DQN Avellaneda stoikov
parameters_default_dqn = {
    # Q
    "skewPricePctAction": skewPricePctAction,
    "riskAversionAction": riskAversionAction,
    "windowsTickAction": windowsTickAction,
    "isRNN":False,
    
    "minPrivateState": (-1),
    "maxPrivateState": (-1),
    "numberDecimalsPrivateState":(4),
    "horizonTicksPrivateState": (5),#(5),
    
    "minMarketState": (-1),
    "maxMarketState": (-1),
    "numberDecimalsMarketState": (7),
    "horizonTicksMarketState": (1),#(10),
    
    "minCandleState": (-1),
    "maxCandleState": (-1),
    "numberDecimalsCandleState": (3),
    "horizonCandlesState": (3),#(5),
    
    "horizonMinMsTick": (0),    
    "scoreEnum": ScoreEnum.asymmetric_dampened_pnl,
    "timeHorizonSeconds": (timeHorizonSecondsDQN),
    "epsilon": (0.1),  # probability of explore=> random action
    "discountFactor": 0.0,  # next state prediction reward discount
    "learningRate": 1.0,  # 0.25 in phd? new values reward multiplier
    "momentumNesterov": 0.0,#momentum nesterov nn
    "learningRateNN": 1.0E-4,#on nn
    # Avellaneda default
    "riskAversion": (0.5),#will be override
    "windowTick": (10),#will be override
    "minutesChangeK": (1),#will be override
    "quantity": (QUANTITY),    
    "positionMultiplier": POSITION_MULITPLIER_BASE,
    "kDefault": (-1),
    "spreadMultiplier": (1.0),
    "firstHour": (FIRST_HOUR),
    "lastHour": (LAST_HOUR),
    #DQN
    "maxBatchSize": 3000,##~300 iterations per day
    "batchSize": 32,
    "trainingPredictIterationPeriod": TEST_ITERATION_PREDICT_PERIOD_TRAIN,  # train only at the end,offline
    "trainingTargetIterationPeriod": TEST_ITERATION_TARGET_PERIOD_TRAIN,  # train at the end,offline
    "epoch": 30,
    "stateColumnsFilter": most_significant_state_columns,
#     "stateColumnsFilter": [],
    "l1":0.,
    "l2":0.,
    "seed":seed_number,
    
}
if not CALCULATE_Tt:
    parameters_default_dqn["calculateTt"]=0

algorithm_info_dqn='alpha_as_1'
avellaneda_dqn = AlphaAvellanedaStoikov(algorithm_info=algorithm_info_dqn,parameters=parameters_default_dqn)#using new implementation
avellaneda_dqn.set_parameters(best_avellaneda_param_dict)#same optimization as benchmark
strategies_benchmark[avellaneda_dqn]=False

if INCLUDE_RNN:
    parameters_default_rnn_dqn = copy.copy(parameters_default_dqn)
    parameters_default_rnn_dqn['isRNN']=True

    algorithm_info_rnndqn='alpha_as_2'
    avellaneda_rnn_dqn = AlphaAvellanedaStoikov(algorithm_info=algorithm_info_rnndqn,parameters=parameters_default_rnn_dqn)
    avellaneda_rnn_dqn.set_parameters(best_avellaneda_param_dict)#same optimization as benchmark
    strategies_benchmark[avellaneda_rnn_dqn]=False
    
if INCLUDE_ALPHA_CONSTANT:
    parameters_default_constant_dqn = copy.copy(parameters_default_dqn)
    parameters_default_constant_dqn['levelAction']= [1, 2, 3, 4]#1 is best level , 4 is the fourth level of the depth
    parameters_default_constant_dqn['skewLevelAction']=[0, 1, -1]#add to ask , discount to bid
    parameters_default_constant_dqn['stateColumnsFilter']=[]#no feature importance!
    
    algorithm_info_alpha_dqn='alpha_cs'
    constant_spread_dqn = AlphaConstantSpread(algorithm_info=algorithm_info_alpha_dqn,parameters=parameters_default_constant_dqn)
    strategies_benchmark[constant_spread_dqn]=False

    
    

In [None]:
best_risk_aversion


In [None]:
best_window_tick

In [None]:
benchmark_avellanedas=[]
if VS_MULTIPLES_AS:
    for risk_aversion in riskAversionAction:
        for windows_tick in windowsTickAction:
    #     for windows_tick in [int(best_window_tick)]:
            parameters=copy.copy(best_avellaneda_param_dict)
            parameters['riskAversion']=risk_aversion
            parameters['windowTick']=windows_tick        
            algorithm_info_as=f'avellaneda_stoikov__{risk_aversion}_{windows_tick}'

            benchmark = AvellanedaStoikov(algorithm_info=algorithm_info_as,parameters=parameters_default_as)
            benchmark.set_parameters(parameters)#same optimization as benchmark
            benchmark_avellanedas.append(benchmark)

else:
    parameters=copy.copy(best_avellaneda_param_dict)
    risk_aversion=parameters['riskAversion']
    windows_tick=int(parameters['windowTick'])    
    algorithm_info_as=f'avellaneda_stoikov__{risk_aversion}_{windows_tick}'
    benchmark = AvellanedaStoikov(algorithm_info=algorithm_info_as,parameters=parameters_default_as)
    benchmark.set_parameters(parameters)#same optimization as benchmark
    benchmark_avellanedas.append(benchmark)
    
        
for algo in benchmark_avellanedas:
    strategies_benchmark[algo]=True
    print(algo.algorithm_info)

## Train DQN <a class="anchor" id="train"></a>

### DQN

In [None]:
avellaneda_dqn.parameters

In [None]:
clean_initial_experience

In [None]:
start=time.time()

if TRAIN_DQN:
    output_train_list=avellaneda_dqn.train(
            instrument_pk=instrument_pk,
            start_date=start_date_train,
            end_date=end_date_train,
            iterations=iterations_train,
            algos_per_iteration=algos_per_iteration,
            simultaneous_algos=simultaneous_algos,
            clean_initial_experience=clean_initial_experience,
            fill_memory_max_iterations=fill_memory_max_iterations,        
            plot_training=True
        )

    
if INCLUDE_RNN and TRAIN_DQN_LSTM:
    output_train_list=avellaneda_rnn_dqn.train(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
        iterations=iterations_train,
        algos_per_iteration=algos_per_iteration,
        simultaneous_algos=simultaneous_algos,
        clean_initial_experience=clean_initial_experience_LSTM,
        fill_memory_max_iterations=fill_memory_max_iterations,        
        plot_training=True
    )    
if INCLUDE_ALPHA_CONSTANT and TRAIN_CONTANT_DQN:
    output_train_list=constant_spread_dqn.train(
        instrument_pk=instrument_pk,
        start_date=start_date_train,
        end_date=end_date_train,
        iterations=iterations_train,
        algos_per_iteration=algos_per_iteration,
        simultaneous_algos=simultaneous_algos,
        clean_initial_experience=clean_initial_experience_CONSTANT_DQN,
        fill_memory_max_iterations=fill_memory_max_iterations,        
        plot_training=True
    )  

elapsed=time.time()-start
print('train of %d iterations finished in %.2f minutes'%(iterations_train,elapsed/60))

if TRAIN_DQN:
    memory_replay_out=avellaneda_dqn.get_memory_replay_df("E:\javif\Coding\Python\market_making_fw\python_lambda\output\memoryReplay_AvellanedaDQN_avellaneda_stoikov_dqn_mm_0.csv")
    if memory_replay_out is not None:
        memory_replay_out.describe()

### Memory replay<a class="anchor" id="memory"></a>

In [None]:
memory_replay_df=avellaneda_dqn.get_memory_replay_df()
memory_replay_df

In [None]:
rewards=pd.DataFrame(columns=['one'])
if memory_replay_df is not None:
    reward_columns =[]
    state_columns=[]
    for column in memory_replay_df.columns:
        if 'reward' in column:
            reward_columns.append(column)
        elif 'next' in column:
            continue
        else:
            state_columns.append(column)
    rewards=memory_replay_df[reward_columns]
rewards

In [None]:
rewards=rewards.replace(0.00,np.nan)
rewards

In [None]:
rewards.describe()

In [None]:
try:
    fig, ax = plt.subplots(figsize=(10,7)) 
    ax = sns.heatmap(rewards,ax=ax)
except:
    pass

## Test <a class="anchor" id="test"></a>

In [None]:
explore_prob=TEST_EXPLORE_PROB#reserve some randomness to improve on testing

trainingPredictIterationPeriod=TEST_ITERATION_PREDICT_PERIOD_TRAIN
trainingTargetIterationPeriod=TEST_ITERATION_TARGET_PERIOD_TRAIN

counter_plt_results=0
wining_counter_constant=0
wining_counter=0

final_results=None
files=[]

best_avellaneda_param_dict

In [None]:
def launch_test(start_date_test,end_date_test,is_parameter_tuning_benchmark:bool=False,start_date_train=None,end_date_train=None,best_parameter_tuning_dict:dict=None):
    strategies_test_output={}
    best_avellaneda_param_dict=None
    if is_parameter_tuning_benchmark:        
        best_benchmark_algorithm,best_avellaneda_param_dict=parameter_tuning_benchmark(benchmark,start_date=start_date_train,end_date=end_date_train,best_parameter_tuning_dict=best_parameter_tuning_dict)
        #set new bet parameter tuning
        benchmark.set_parameters(best_avellaneda_param_dict)
        avellaneda_dqn.set_parameters(best_avellaneda_param_dict)#to be fair optimization is going to optimize avellaneda

    ## linear constant tests
    linear_constant_test=benchmark_linear_constant_spread.test(
        instrument_pk=instrument_pk,
        start_date=start_date_test,
        end_date=end_date_test,
    )
    strategies_test_output[benchmark_linear_constant_spread]=linear_constant_test
    
    constant_test=benchmark_constant_spread.test(
        instrument_pk=instrument_pk,
        start_date=start_date_test,
        end_date=end_date_test,
    )
    strategies_test_output[benchmark_constant_spread]=constant_test
    
    as_test=[]
    for benchmark in benchmark_avellanedas:
        as_test_it = benchmark.test(
            instrument_pk=instrument_pk,
            start_date=start_date_test,
            end_date=end_date_test,
        )
        as_test.append(as_test_it)
        strategies_test_output[benchmark]=as_test_it
    
    dqn_test = avellaneda_dqn.test(
        instrument_pk=instrument_pk,
        start_date=start_date_test,
        end_date=end_date_test,
        explore_prob=explore_prob,
        trainingPredictIterationPeriod=trainingPredictIterationPeriod,
        trainingTargetIterationPeriod=trainingTargetIterationPeriod,
    )
    strategies_test_output[avellaneda_dqn]=dqn_test
    
    dqn_rnn_test=None    
    if INCLUDE_RNN:
        dqn_rnn_test = avellaneda_rnn_dqn.test(
        instrument_pk=instrument_pk,
        start_date=start_date_test,
        end_date=end_date_test,
        explore_prob=explore_prob,
        trainingPredictIterationPeriod=trainingPredictIterationPeriod,
        trainingTargetIterationPeriod=trainingTargetIterationPeriod,
        )
        strategies_test_output[avellaneda_rnn_dqn]=dqn_rnn_test
    
    alpha_constant_spread_test = None
    if INCLUDE_ALPHA_CONSTANT:
        alpha_constant_spread_test = constant_spread_dqn.test(
        instrument_pk=instrument_pk,
        start_date=start_date_test,
        end_date=end_date_test,
        explore_prob=explore_prob,
        trainingPredictIterationPeriod=trainingPredictIterationPeriod,
        trainingTargetIterationPeriod=trainingTargetIterationPeriod,
        )
        strategies_test_output[constant_spread_dqn]=alpha_constant_spread_test
        

    return strategies_test_output,best_avellaneda_param_dict

### Execute test

In [None]:
files_append=[]
days_test=[]
# testing_days=3
print(start_date_experiment)
for day in range(testing_days+1,1,-1):
    day_to_add=get_time(start_date_experiment,day,0)
#     if day_to_add.day==25:#we dont have data
#         continue
    days_test.append(day_to_add)
days_test=days_test[1:]
days_test

In [None]:
from backtest.train_launcher import clean_gpu_memory


start_date_train = None
end_date_train = None
day_counter=0
CompareTradingAlgorithms.reset_global_compare()
for day in tqdm.tqdm(days_test):
    
#     clean_gpu_memory()
    start_date = day+ datetime.timedelta(hours=int(FIRST_HOUR-1))
    end_date=day+ datetime.timedelta(hours=int(LAST_HOUR+1))
    if start_date_train is None or day_counter%parameter_tuning_every_n_days!=0:
        strategies_test_output,best_parameter_dict=launch_test(start_date,end_date) 
        best_parameter_dict=best_avellaneda_param_dict#initially 
    else:
        print(f'Parameter tuning benchmark on day {day_counter} with data from {start_date_train} to {end_date_train}')
        strategies_test_output,best_parameter_dict=launch_test(start_date,end_date,is_parameter_tuning_benchmark=PARAMETER_TUNING_BENCHMARK,start_date_train=start_date_train,end_date_train=end_date_train,best_parameter_tuning_dict=best_parameter_dict)
        
    # plot individual alpha_as1
    name_output = list(strategies_test_output[avellaneda_dqn].keys())[0]
    raw_trade_pnl_df=strategies_test_output[avellaneda_dqn][name_output]
    if raw_trade_pnl_df is not None:
        avellaneda_dqn.plot_trade_results(raw_trade_pnl_df=raw_trade_pnl_df)        
        avellaneda_dqn.plot_params(raw_trade_pnl_df=raw_trade_pnl_df)
    
    # plot individual alpha_as2
    if INCLUDE_RNN:
        name_output = list(strategies_test_output[avellaneda_rnn_dqn].keys())[0]
        raw_trade_pnl_df=strategies_test_output[avellaneda_rnn_dqn][name_output]
        if raw_trade_pnl_df is not None:
            avellaneda_rnn_dqn.plot_trade_results(raw_trade_pnl_df=raw_trade_pnl_df)        
            avellaneda_rnn_dqn.plot_params(raw_trade_pnl_df=raw_trade_pnl_df)
        
    # plot individual alpha_cs
    if INCLUDE_ALPHA_CONSTANT:
        name_output =list(strategies_test_output[constant_spread_dqn].keys())[0]
        raw_trade_pnl_df=strategies_test_output[constant_spread_dqn][name_output]
        if raw_trade_pnl_df is not None:
            constant_spread_dqn.plot_trade_results(raw_trade_pnl_df=raw_trade_pnl_df)        
            constant_spread_dqn.plot_params(raw_trade_pnl_df=raw_trade_pnl_df)
        
    
    
    # CompareTradingAlgorithms methods 
    compare_trading_algorithms=CompareTradingAlgorithms(strategies_benchmark=strategies_benchmark,strategies_test_output=strategies_test_output)
    results_output_dict=compare_trading_algorithms.get_results(equity_column_score_enum=equity_column_score_enum)
    CompareTradingAlgorithms.add_results_compare(results_output_dict)     

    results_compare_df,fig=compare_trading_algorithms.plot_equity_curve(plot_equity_column_score_enum=equity_column_score_enum,metrics_equity_column_score_enum=equity_column_score_enum)
    
    try:
        results_compare_path=rf"compare_df_{day_counter}.csv"
        results_compare_plot_path=rf"compare_df_{day_counter}.png"
        results_compare_df.to_csv(results_compare_path)
        fig.savefig(results_compare_plot_path)
        files_append.append(results_compare_path)
        files_append.append(results_compare_plot_path)
    except  Exception as e:
        print(rf"ERROR saving files of  {start_date}  {e}")
    
    start_date_train=start_date
    end_date_train=end_date
    day_counter+=1


## Results  <a class="anchor" id="results"></a>

In [None]:
score_enum=ScoreEnum.sharpe

results_compare_df=CompareTradingAlgorithms.get_results_compare(score_enum=score_enum)
results_compare_df['date']=days_test
results_compare_df.set_index('date',inplace=True)
(score_df,fig)=CompareTradingAlgorithms.plot_results_compare(score_enum=score_enum,results_compare_df=results_compare_df)
try:
    ratio_compare_path=rf"ratio_compare_{score_enum}.csv"
    ratio_compare_plot_path=rf"ratio_compare_{score_enum}.png"
    score_df.to_csv(ratio_compare_path)
    fig.savefig(ratio_compare_plot_path)
    files_append.append(ratio_compare_path)
    files_append.append(ratio_compare_plot_path)
except  Exception as e:
    print(rf"ERROR saving files of  {score_enum}  {e}")
        
score_df

In [None]:
score_enum=ScoreEnum.sortino

results_compare_df=CompareTradingAlgorithms.get_results_compare(score_enum=score_enum)
results_compare_df['date']=days_test
results_compare_df.set_index('date',inplace=True)
(score_df,fig)=CompareTradingAlgorithms.plot_results_compare(score_enum=score_enum,results_compare_df=results_compare_df)
try:
    ratio_compare_path=rf"ratio_compare_{score_enum}.csv"
    ratio_compare_plot_path=rf"ratio_compare_{score_enum}.png"
    score_df.to_csv(ratio_compare_path)
    fig.savefig(ratio_compare_plot_path)
    files_append.append(ratio_compare_path)
    files_append.append(ratio_compare_plot_path)
except  Exception as e:
    print(rf"ERROR saving files of  {score_enum}  {e}")
    
score_df

In [None]:
score_enum=ScoreEnum.max_dd

results_compare_df=CompareTradingAlgorithms.get_results_compare(score_enum=score_enum)
results_compare_df['date']=days_test
results_compare_df.set_index('date',inplace=True)
(score_df,fig)=CompareTradingAlgorithms.plot_results_compare(score_enum=score_enum,results_compare_df=results_compare_df)
try:
    ratio_compare_path=rf"ratio_compare_{score_enum}.csv"
    ratio_compare_plot_path=rf"ratio_compare_{score_enum}.png"
    score_df.to_csv(ratio_compare_path)
    fig.savefig(ratio_compare_plot_path)
    files_append.append(ratio_compare_path)
    files_append.append(ratio_compare_plot_path)
except  Exception as e:
    print(rf"ERROR saving files of  {score_enum}  {e}")
score_df

In [None]:
score_enum=ScoreEnum.pnl_to_map

results_compare_df=CompareTradingAlgorithms.get_results_compare(score_enum=score_enum)
results_compare_df['date']=days_test
results_compare_df.set_index('date',inplace=True)
(score_df,fig)=CompareTradingAlgorithms.plot_results_compare(score_enum=score_enum,results_compare_df=results_compare_df)
try:
    ratio_compare_path=rf"ratio_compare_{score_enum}.csv"
    ratio_compare_plot_path=rf"ratio_compare_{score_enum}.png"
    score_df.to_csv(ratio_compare_path)
    fig.savefig(ratio_compare_plot_path)
    files_append.append(ratio_compare_path)
    files_append.append(ratio_compare_plot_path)
except  Exception as e:
    print(rf"ERROR saving files of  {score_enum}  {e}")
    
score_df

In [None]:
elapsed_total_hours = (time.time()-start_notebook)/3600
elapsed_total_hours

In [None]:
# save_notebook_session(session_name)
send_email(recipient='javifalces@gmail.com',subject=rf'[{datetime.datetime.today()}] CryptoMM finished in {elapsed_total_hours:.1f} hours ',body='',file_append=files_append)
print(rf"email sent!")

In [None]:
for file in files_append:
    os.remove(file)
print(rf"all temp files deleted")