In [None]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

In [None]:
data_directory = '/usr/src/app/Data'
sampling_freqency = '1S'

In [None]:
import os
def get_files_name(directory,cboe=True):
    if cboe:
        file_names = [f.split('_')[2] for f in os.listdir(directory) if len(f.split('_'))>=3]
    else:
        file_names = [directory+'/'+f for f in os.listdir(directory) if f.split('.')[-1]=='csv']
    file_names.sort()
    return file_names

In [None]:
from pathlib import PosixPath
import pandas as pd
import itertools , os
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import datetime
import numpy as np
import zipfile
def load_orderbook(path,level,cboe = True):
    if cboe == True :
        column_names = [['Date']] \
                  + [[f'BP{i}' , f'BV{i}'] for i in range(1, 6)] \
                  + ['M'] \
                  + [[f'AP{i}' , f'AV{i}'] for i in range(1, 6)] \
                  + [['']]

        column_names=list(itertools.chain.from_iterable(column_names))
        df = None
        with zipfile.ZipFile(path, 'r') as z:
          for file in z.namelist():
              if file.endswith('.lob'):
                  with z.open(file) as f:
                      df = pd.read_csv(f,header=None,names=column_names).iloc[:, :-1].drop(columns=['M'])
        df = df.dropna(subset=[f'BV{level}',f'AV{level}'])
        df = df.drop(columns=[c for c in df.columns[1:] if int(c[-1:])>level])
        df = df.set_index('Date')
        df.index = pd.to_datetime(df.index)
    else:
        df = pd.read_csv(path)
        df['Gmt time'] = pd.to_datetime(df['Gmt time'],format='%d.%m.%Y %H:%M:%S.%f')
        # df['Local time'] = pd.to_datetime(df['Local time'].str[:-9],format='%d.%m.%Y %H:%M:%S.%f')
        df = df.rename(columns={'Ask':'AP1','Bid':'BP1','AskVolume':'AV1','BidVolume':'BV1'})
        df = df.set_index('Gmt time')
    return df

In [None]:
def merge_orderbook(date, directory, currencies_list , level = 5 , cboe = True):
    df = {}
    for c in currencies_list:
        if cboe:
            df[c] = load_orderbook(f'{directory}/{c}/tks_ny_{date}_{c}.zip',level,cboe)
        else:
            df[c] = load_orderbook(f'{directory}/{date}',level,cboe)

    start_time = max([df[c].index.min() for c in currencies_list])
    end_time   = min([df[c].index.max() for c in currencies_list])

    for c in currencies_list:
        # df[c].index = pd.to_datetime(df[c].index)
        df[c] = df[c].add_prefix(f'{c}_')

    final_df = df[currencies_list[0]]
    for c in currencies_list[1:]:
        final_df = pd.merge(final_df, df[c], left_index=True, right_index=True, how='outer')

    final_df = final_df.fillna(method='ffill').loc[start_time:end_time]

    return final_df

In [None]:
def cov_matrix(dates_list,input_currencies,period=20):
    df_total = pd.DataFrame()
    for date in dates_list:
        merged_df = merge_orderbook(date, data_directory ,input_currencies , 1 , True)
        for curr in input_currencies:
            merged_df[curr] = (merged_df[curr+'_BP1']+merged_df[curr+'_AP1'])/2
            merged_df[curr] = merged_df[curr].pct_change(periods=period) * 100
        merged_df = merged_df[[curr for curr in input_currencies]].dropna()
        df_total = pd.concat([df_total,merged_df],axis=0)
    return df_total.cov()

In [None]:
import numpy as np
# import pandas_ta as ta

def prepare_X_Y(dates_list, input_currencies ,target_currency , memory_horizon , forecast_horizon ,step = 1 , cboe = True , only_market_price = False):
    X = []
    _target = []
    _return = []
    _mid = []
    for date in dates_list:
        merged_df = merge_orderbook(date, data_directory ,list(set(input_currencies + [target_currency])) , 1 , cboe)
        merged_df = merged_df.resample(sampling_freqency).mean().fillna(method='ffill')
        target = merged_df[[target_currency+'_BP1', target_currency+'_AP1']]

        if only_market_price:
            _target.append(target)
            continue

        merged_df = merged_df[[c for c in merged_df.columns for in_curr in input_currencies if in_curr in c]]

        merged_df['OBI'] =  (merged_df[f'{target_currency}_AV1'] -  merged_df[f'{target_currency}_BV1'])/(merged_df[f'{target_currency}_AV1'] +  merged_df[f'{target_currency}_BV1'])
        merged_df['mid_change'] = (merged_df[f'{target_currency}_AP1']+merged_df[f'{target_currency}_BP1'])/2
        merged_df['sma'] = merged_df['mid_change'].rolling(memory_horizon).mean()
        merged_df['diff_sma'] = np.log(merged_df['mid_change']) - np.log(merged_df['sma'])
        merged_df['mid_change'] = np.log(merged_df['mid_change']).diff()
        merged_df = merged_df[['mid_change','diff_sma','OBI']].dropna()

        result_df = pd.DataFrame()
        for lag in range(memory_horizon):
            lagged_df = merged_df.shift(lag*step)
            lagged_df.columns = [f"{col}_lag{lag*step}" for col in merged_df.columns]
            result_df = pd.concat([result_df, lagged_df], axis=1)

        # result_df = pd.DataFrame()
        # for col in ohlov.columns:
        #   for lag in range(0,memory_horizon+1):
        #       if (col+'_lage_'+str(lag)) in features:
        #           result_df[col+'_lage_'+str(lag)]=ohlov[col].shift(lag)

        result_df = result_df.dropna()
        shifted_index = target.index - datetime.timedelta(seconds=forecast_horizon)
        result_df = result_df.loc[shifted_index[0]:shifted_index[-1]]
        shifted_index = result_df.index + datetime.timedelta(seconds=forecast_horizon)
        target_ = (target.loc[shifted_index].mean(axis=1).to_numpy() - \
            target.loc[result_df.index].mean(axis=1).to_numpy())
        return_ = (target.loc[shifted_index].mean(axis=1).to_numpy() - \
            target.loc[result_df.index].mean(axis=1).to_numpy()) / target.loc[result_df.index].mean(axis=1).to_numpy()
        X.append(result_df)
        _target.append(target_)
        _return.append(return_)
        _mid.append(target.loc[result_df.index].mean(axis=1).to_numpy())
    if only_market_price:
        return pd.concat(_target,axis=0)

    _target = np.concatenate(_target)
    _return = np.concatenate(_return)
    _mid = np.concatenate(_mid)
    X=pd.concat(X,axis=0)
    return X , _target , _return , _mid

In [None]:
def model_batch_training(X_train,Y_train,threshold,batch_size=10000,initial_model=None):
    model = initial_model
    params = {
    'objective': 'multiclassova',  # Multi-class classification
    'metric': 'multi_logloss',  # Common metric for multi-class classification
    'num_class': 3,             # Number of classes
    'is_unbalance' : True,
    'boosting_type': 'gbdt',
    'num_leaves': 63,
    'learning_rate': 0.02,
    'feature_fraction': 1,
    'force_col_wise': True,
    'verbose' : -1
    }
    Y_train[np.abs(Y_train) < threshold] = 0
    Y_train = np.sign(Y_train) + 1
    if batch_size ==-1:
        lgb_train = lgb.Dataset(X_train,  label = Y_train)
        if model == None:
            model = lgb.train(params, lgb_train, num_boost_round=400)
        else:
            params['learning_rate']=0.003
            model = lgb.train(params, lgb_train, num_boost_round=25, init_model=model)
    else:
      for i in range(0, len(Y_train),batch_size):
          lgb_train = lgb.Dataset(X_train[i:i+batch_size],  label = Y_train[i:i+batch_size])

          if model == None:
              model = lgb.train(params, lgb_train, num_boost_round=400)
          else:
              params['learning_rate']=0.003
              model = lgb.train(params, lgb_train, num_boost_round=25, init_model=model)

    return model

In [None]:
#######################################################################
#######################################################################
#####################   Portfolio Optimizer  ##########################
#######################################################################
#######################################################################
from pypfopt.efficient_frontier import EfficientFrontier

memory_horizon = 60 
memory_horizon_step = 1
forecast_horizon = 20 
rebalance_time = 5 
pair_currencies = ['USDCAD','EURUSD','GBPUSD']
spread = 3
model_name = {5:"20_5",20:"60_20",60:"240_4_60",240:"960_10_240"}


train_files_list = get_files_name(f'{data_directory}/{pair_currencies[0]}')[-50:-22]
test_files_list = get_files_name(f'{data_directory}/{pair_currencies[0]}')[-22:]
indexs = []
model = {}
threshold = {}
last_features = {}
last_target = {}
df_total = pd.DataFrame()

free_balance = 1000000
equity = []  # Track equity over time
current_positions = {'USDCAD': 0, 'EURUSD': 0, 'GBPUSD': 0}  # Initial positions in asset terms

cov_mat = cov_matrix(train_files_list,pair_currencies,3600)

for currency in pair_currencies:
    model[currency] = lgb.Booster(model_file=f'/usr/src/app/models_{model_name[forecast_horizon]}/{currency}_model.txt')
    X_train , Y_train , _ , __= prepare_X_Y(train_files_list, [currency], currency, memory_horizon, forecast_horizon , memory_horizon_step , cboe =True)
    threshold[currency] = Y_train.std() 

for _days in tqdm(range(0,len(test_files_list))):
    df={}
    for currency in pair_currencies: 
        features , target  , returnn  , mid= prepare_X_Y(test_files_list[_days:_days+1], [currency], currency, memory_horizon, forecast_horizon ,memory_horizon_step ,cboe=True)
        if _days>0:
            model[currency] = model_batch_training(last_features[currency],last_target[currency],threshold[currency],-1,model[currency])
        last_features[currency] = features
        last_target[currency] = target
    
        prob_predictions = model[currency].predict(features)
        # ExcpectReturn = np.argmax(prob_predictions, axis=1) - 1
        ExcpectReturn_ = ((prob_predictions[:,0] + prob_predictions[:,2])/2)**2 * \
                (-1 * prob_predictions[:,0] + prob_predictions[:,2] ) * threshold[currency] / mid  * 10000
        df[currency] = pd.DataFrame({f'{currency}_ExpectedReturn':ExcpectReturn_,f'{currency}_midPrice':mid},index=features.index)

    df = pd.concat([df[curr] for curr in df],axis=1).dropna()
    for i in range(0, len(df), rebalance_time): 
        current_data = df.iloc[i]
        prices = current_data[[f'{curr}_midPrice' for curr in pair_currencies]]
        expected_returns = current_data[[f'{curr}_ExpectedReturn' for curr in pair_currencies]]
        ef = EfficientFrontier(expected_returns, cov_mat, weight_bounds=(-1, 1))
        weights = ef.max_quadratic_utility()
        total_weight = sum(abs(weight) for weight in weights.values())
        total_equity = sum(current_positions[asset] * prices[f'{asset}_midPrice'] for asset in current_positions) + free_balance
        target_positions = {asset: total_equity * weight / prices[f'{asset}_midPrice'] / total_weight for asset, weight in zip(pair_currencies,weights.values())}
        if i+rebalance_time>=len(df):
            target_positions = {asset: 0 for asset in target_positions}
        trades = {asset: target_positions[asset] - current_positions.get(asset, 0) for asset in target_positions}
        for asset in trades:
            if trades[asset]>0:
                free_balance -= prices[f'{asset}_midPrice'] *(1+spread/1000000)* trades[asset]
            else :
                free_balance -= prices[f'{asset}_midPrice'] *(1-spread/1000000)* trades[asset]
                
        for asset in trades:
            current_positions[asset] = target_positions[asset]
        equity.append(total_equity)
        indexs.append(df.index[i])
equity_history = pd.Series(equity, index=indexs)

def print_in_box_with_centered_title(messages, title):
    max_length = max(len(message) for message in messages)
    border = "+" + "-" * (max_length + 4) + "+"  # Adjust for padding

    print(border)
    # Center the title within the box
    title_formatted = f"| {title.center(max_length + 2)} |"
    print(title_formatted)
    print("+" + "-" * (max_length + 4) + "+")  # Separator after title

    # Print each message left-aligned
    for message in messages:
        print(f"| {message.ljust(max_length)}   |")
    print(border)

start_date = equity_history.index[0]
end_date = equity_history.index[-1]
duration_days = (end_date - start_date).days
duration_years = duration_days / 365.25

daily_returns = equity_history.resample('D').apply(lambda x: (x.iloc[-1] / x.iloc[0]) - 1 if not x.empty else np.nan).dropna()
annual_return = (equity_history.iloc[-1] / equity_history.iloc[0]) ** (1/(duration_days/255)) - 1
annual_volatility = daily_returns.std() * np.sqrt(255)
total_return = (equity_history[-1] / equity_history[0] - 1)
i = np.argmax(np.maximum.accumulate(equity_history) - equity_history)
j = np.argmax(equity_history[:i])

title = "=== Trading Performance Summary ==="
messages = [
    f"Start Date : {start_date}",
    f"End Date : {end_date}",
    f"PnL: ${equity_history[-1]-1000000:.2f}",
    f"Return: {total_return*100:.2f}%",
    f"Annual Return: {annual_return*100:.2f}%",
    f"Annual Return Volatility: {annual_volatility*100:.2f}",
    f"Sharp Ratio: {(annual_return-0.05)/annual_volatility:.2f}",
    f"DrawDown: {(equity_history[i]/equity_history[j]-1)*100:.2f}%" if i != 0 else "DrawDown: 0%",
    f"Initial Money: ${1:.2f}M",
]

# Print the messages in a box with the title centered
print_in_box_with_centered_title(messages, title)
plt.figure(figsize=(10, 6))
plt.plot(equity_history, label='Equity Curve', color='blue')
plt.xticks(rotation='vertical')
if i != 0:
    plt.plot([equity_history.index[i], equity_history.index[j]], [equity_history.iloc[i], equity_history.iloc[j]], 'o-', color='Red', markersize=10, label='Max Drawdown')
plt.title('Trading Performance Overview')
plt.xlabel('Time')
plt.ylabel('Equity')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.legend()
plt.tight_layout()

In [None]:
def PlayBackMarket_backtest(model, input_LOB_files , market_price_files , threshold, spread  , confidence_level=0, trading_mode=1 , position_size = 0.2 , initial_money=1000 , CBOE_market_price= False ,print_result = False):
    #Mode 1 : Close position in any time prediction confidence come below confidence_level or pred_direction == 0
    #Mode 2 : DONT close opened position in pred_direction == 0
    #Mode 3 : Close position in pred_direction == 0
    if CBOE_market_price==False:
        market_price_files.sort()
    input_LOB_files.sort()
    position = 0
    position_contract = 1
    pnl=0
    equity_history = []
    indexs = []
    trades = []
    win_trades_cnt = 0
    trades_cnt = 0
    money = initial_money
    last_features = None
    last_target = None
    backtest_duration_hour = 0
    for _days in tqdm(range(0,len(input_LOB_files))):
        features , target , _  , __= prepare_X_Y(input_LOB_files[_days:_days+1], input_currencies, target_currency, memory_horizon, forecast_horizon ,cboe=True)
        #update model
        if _days>0:
            model = model_batch_training(last_features,last_target,threshold,-1,model)
        last_features = features
        last_target = target
        if CBOE_market_price==False:
            market_price_data = load_orderbook(market_price_files[_days],1,False).resample(sampling_freqency).first().fillna(method='ffill')
            market_price_data.index = market_price_data.index - pd.Timedelta(hours=5, minutes=0)
        else:
            market_price_data = load_orderbook(f'{data_directory}/{target_currency}/tks_ny_{input_LOB_files[_days]}_{target_currency}.zip',1,True).resample(sampling_freqency).first().fillna(method='ffill')

        start_time = max([features.index.min() , market_price_data.index.min()])
        end_time   = min([features.index.max() , market_price_data.index.max()])

        backtest_duration_hour += (end_time-start_time).total_seconds()/3600
        features = features.loc[start_time:end_time]
        market_price_data = market_price_data.loc[start_time:end_time]

        position_contract = int(initial_money * position_size / market_price_data['AP1'].iloc[0])

        for i in range(0,len(features)-forecast_horizon,forecast_horizon):
            mid = (market_price_data['AP1'].iloc[i] + market_price_data['BP1'].iloc[i])/2
            ASK = mid * (1+spread/1000000)
            BID = mid / (1+spread/1000000)
            money = max(initial_money + pnl,0)
            equity_history.append(money)
            indexs.append(features.index[i])
            if money == 0 :
                break
            prob_predictions = model.predict(features.iloc[i:i+1])
            if np.max(prob_predictions, axis=1) < confidence_level:
                if trading_mode != 1:
                    predicted_direction = np.argmax(prob_predictions, axis=1)[0] - 1
                    if predicted_direction == 1 and  position<0:
                        pnl += -1 * position_contract * ASK - position
                        if -1 * position_contract * ASK - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size":1,"price":ASK})
                        position = 0
                        # position_contract = 0
                    elif predicted_direction == -1 and  position>0:
                        pnl += position_contract * BID - position
                        if position_contract * BID - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size":-1,"price":BID})
                        position = 0
                        # position_contract = 0
                    elif predicted_direction == 0 and  position!=0 and trading_mode==3:
                        if position < 0 :
                            pnl += -1 * position_contract * ASK - position
                            if -1 * position_contract * ASK - position > 0 :
                                win_trades_cnt+=1
                            trades.append({"time":features.index[i],"size": 1,"price":ASK})
                        else :
                            pnl += position_contract * BID - position
                            if position_contract * BID - position > 0 :
                                win_trades_cnt+=1
                            trades.append({"time":features.index[i],"size": -1,"price":BID})
                        # position_contract = 0
                        position = 0

                if position !=0 and trading_mode==1:
                    if position < 0 :
                        pnl += -1 * position_contract * ASK - position
                        if -1 * position_contract * ASK - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size": 1,"price":ASK})
                    else:
                        pnl += position_contract * BID - position
                        if position_contract * BID - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size": -1,"price":BID})

                    # position_contract = 0
                    position = 0
            else:
                predicted_direction = np.argmax(prob_predictions, axis=1)[0] - 1
                if predicted_direction == 1 and  position<0:
                    trades_cnt+=1
                    pnl += -1 *position_contract * ASK - position
                    if -1 *position_contract * ASK - position > 0 :
                        win_trades_cnt+=1
                    # position_contract = money / market_price_data['BP1'].iloc[i]
                    position = 1 * position_contract * ASK
                    trades.append({"time":features.index[i],"size":2,"price":ASK})
                elif predicted_direction == -1 and  position>0:
                    trades_cnt+=1
                    pnl += position_contract * BID - position
                    if position_contract * BID - position > 0 :
                        win_trades_cnt+=1
                    # position_contract = money / market_price_data['BP1'].iloc[i]
                    position = -1 * position_contract * BID
                    trades.append({"time":features.index[i],"size":-2,"price":BID})
                elif predicted_direction == 0 and  position!=0 and trading_mode!=2:
                    if position < 0 :
                        pnl += -1 * position_contract * ASK - position
                        if -1 * position_contract * ASK - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size": 1,"price":ASK})
                    else :
                        pnl += position_contract * BID - position
                        if position_contract * BID - position > 0 :
                            win_trades_cnt+=1
                        trades.append({"time":features.index[i],"size":-1,"price":BID})
                    # position_contract = 0
                    position = 0
                elif predicted_direction != 0 and  position==0:
                    trades_cnt+=1
                    if predicted_direction == -1 :
                        # position_contract = money / market_price_data['BP1'].iloc[i]
                        position = -1 * position_contract * BID
                        trades.append({"time":features.index[i],"size":-1,"price":BID})
                    else :
                        # position_contract = money / market_price_data['BP1'].iloc[i]
                        position = 1 * position_contract * ASK
                        trades.append({"time":features.index[i],"size": 1,"price":ASK})
        mid = (market_price_data['AP1'].iloc[-1] + market_price_data['BP1'].iloc[-1])/2
        ASK = mid * (1+spread/1000000)
        BID = mid / (1+spread/1000000)
        if position !=0 :
            if position < 0 :
                pnl += -1 * position_contract * ASK - position
                if -1 * position_contract * ASK - position > 0 :
                    win_trades_cnt+=1
                trades.append({"time":features.index[-1],"size": 1,"price":ASK})
            else :
                pnl += position_contract * BID - position
                if position_contract * BID - position > 0 :
                    win_trades_cnt+=1
                trades.append({"time":features.index[-1],"size":-1,"price":BID})
            position = 0
            money = max(initial_money + pnl,0)
            equity_history.append(money)
            indexs.append(features.index[-1])

    equity_history = pd.Series(equity_history, index=indexs)
    trades = pd.DataFrame(trades)
    if print_result:
        def print_in_box_with_centered_title(messages, title):
            max_length = max(len(message) for message in messages)
            border = "+" + "-" * (max_length + 4) + "+"  # Adjust for padding

            print(border)
            # Center the title within the box
            title_formatted = f"| {title.center(max_length + 2)} |"
            print(title_formatted)
            print("+" + "-" * (max_length + 4) + "+")  # Separator after title

            # Print each message left-aligned
            for message in messages:
                print(f"| {message.ljust(max_length)}   |")
            print(border)

        start_date = equity_history.index[0]
        end_date = equity_history.index[-1]
        duration_days = (end_date - start_date).days
        duration_years = duration_days / 365.25

        total_return = (equity_history[-1] / equity_history[0] - 1)
        annual_return = (1 + total_return) ** (365.25 / duration_days) - 1
        avg_holding_time = trades['time'].diff()[trades['size'].cumsum()==0].mean()
        profit_factor = -1*equity_history.diff()[equity_history.diff()>0].sum()/equity_history.diff()[equity_history.diff()<0].sum()
        rr = -1*equity_history.diff()[equity_history.diff()>0].mean()/equity_history.diff()[equity_history.diff()<0].mean()
        i = np.argmax(np.maximum.accumulate(equity_history) - equity_history)
        j = np.argmax(equity_history[:i])

        title = "=== Trading Performance Summary ==="
        messages = [
            f"Start Date : {start_date}",
            f"End Date : {end_date}",
            f"Total Trades: {trades_cnt}",
            f"Win Rate: {win_trades_cnt/trades_cnt*100:.2f}%",
            f"PnL: ${equity_history[-1]-initial_money:.2f}",
            f"Profit Factor: {profit_factor:.2f}",
            f"Return: {total_return*100:.2f}%",
            f"Annual Return: {annual_return*100:.2f}%",
            f"DrawDown: {(equity_history[i]/equity_history[j]-1)*100:.2f}%" if i != 0 else "DrawDown: 0%",
            f"Riward/Risk : {rr:.2f}",
            f"Avg Holding Time: {avg_holding_time}",
            f"Position Size: {position_size*100:.2f}% of initial money",
            f"Initial Money: ${initial_money/1000000:.2f}M",
            f"Backtest Duration: {backtest_duration_hour:.2f} hours"
        ]

        # Print the messages in a box with the title centered
        print_in_box_with_centered_title(messages, title)
        plt.figure(figsize=(10, 6))
        plt.plot(equity_history, label='Equity Curve', color='blue')
        plt.xticks(rotation='vertical')
        if i != 0:
            plt.plot([equity_history.index[i], equity_history.index[j]], [equity_history.iloc[i], equity_history.iloc[j]], 'o-', color='Red', markersize=10, label='Max Drawdown')
        plt.title('Trading Performance Overview')
        plt.xlabel('Time')
        plt.ylabel('Equity')
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend()
        plt.tight_layout()
    return  equity_history , trades

In [None]:
#######################################################################
#####################   Model Trainer  ################################
#######################################################################

# for pair in ['AUDCAD','AUDNZD','AUDUSD','CADJPY','CHFJPY','EURAUD','EURCAD','EURCHF','EURGBP','EURJPY','EURNZD','EURUSD','USDCAD']:
#     input_currencies = [pair]
#     target_currency = pair
#     memory_horizon = 96  # 10 second snapshot LOB for feature
#     forecast_horizon = 240  # predict next 5 second
#     train_files_list = get_files_name(f'{data_directory}/{target_currency}')[-50:-22]
#     test_files_list = get_files_name(f'{data_directory}/{target_currency}')[-22:]
#     X_train , Y_train , _ , __= prepare_X_Y(train_files_list, input_currencies, target_currency, memory_horizon, forecast_horizon , cboe =True)
#     threshold = Y_train.std() / 2
#     model = model_batch_training(X_train , Y_train ,threshold , -1)
#     model.save_model(f'/usr/src/app/models_20_5/{target_currency}_model.txt')

In [None]:
input_currencies = ['USDCAD']
target_currency = 'USDCAD'
memory_horizon = 60  # 10 second snapshot LOB for feature
memory_horizon_step = 1
forecast_horizon = 20 # predict next 5 second

# broker_price_files = get_files_name('/content/broker_price/EURUSD',False)
broker_price_files = None
equity_history , trades = PlayBackMarket_backtest(model , test_files_list, broker_price_files ,threshold , \
                                                  spread = 25 , confidence_level = 0.8 , trading_mode = 3 , \
                                                  position_size = 0.2 , initial_money = 1000000 , print_result = True , CBOE_market_price = True)

In [None]:
lgb.plot_importance(model, importance_type="gain", figsize=(7,20), title="LightGBM Feature Importance (Gain)")

In [None]:
lgb.plot_importance(model, importance_type="split", figsize=(7, 20), title="LightGBM Feature Importance (Split)")