In [1]:
import pickle
import pandas as pd

In [2]:
import quantlib.general_utils as general_utils
import numpy as np

from sklearn.ensemble import RandomForestClassifier
import pickle

from sklearn.model_selection import train_test_split

In [3]:
file = "./data/oan_ohlcv.xlsx"
historical_data = pd.read_excel(file)

strat_df = pd.read_excel("./backtests/oan_LBMOM.xlsx")

In [5]:
strat_df

Unnamed: 0,date,capital,strat scalar,HK33_HKD units,HK33_HKD w,FR40_EUR units,FR40_EUR w,JP225_USD units,JP225_USD w,CN50_USD units,...,USB30Y_USD w,IN50_USD units,IN50_USD w,nominal,leverage,daily pnl,nominal ret,capital ret,cum ret,drawdown
0,2022-05-24,10000.000000,2,0,0.0,0.000000,0.000000,0.000000,0.000000,0,...,0,0.843979,1,13601.985867,1.360199,,,,,
1,2022-05-25,10137.146570,2,0,0.0,0.000000,0.000000,0.000000,0.000000,0,...,0,0.781652,1,12724.511221,1.255236,137.146570,0.010083,0.013715,1.013715,0.000000
2,2022-05-26,10268.698585,2,0,0.0,0.000000,0.000000,0.000000,0.000000,0,...,0,0.669054,1,11004.135072,1.071619,131.552014,0.010338,0.012977,1.026870,0.000000
3,2022-05-29,10402.977762,2,0,0.0,0.000000,0.000000,0.000000,0.000000,0,...,0,0.661160,1,11006.991454,1.058062,134.279177,0.012203,0.013077,1.040298,0.000000
4,2022-05-30,10258.844885,2,0,0.0,0.000000,0.000000,0.000000,0.000000,0,...,0,0.670146,1,11010.499556,1.073269,-144.132877,-0.013095,-0.013855,1.025884,-0.013855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,2023-04-23,10076.497462,2,0,0.0,0.048397,0.060131,0.031831,0.134622,0,...,0,0.000000,0,6742.629421,0.669144,-1.753650,-0.000373,-0.000174,1.007650,-0.039148
89,2023-04-24,10003.452659,2,0,0.0,0.048956,0.066213,0.031365,0.145938,0,...,0,0.000000,0,6078.936173,0.607684,-73.044803,-0.010833,-0.007249,1.000345,-0.046113
90,2023-04-25,9983.978827,2,0,0.0,0.048895,0.066663,0.031409,0.146811,0,...,0,0.000000,0,6026.162120,0.603583,-19.473832,-0.003203,-0.001947,0.998398,-0.047970
91,2023-04-26,10068.267708,2,0,0.0,0.048742,0.081803,0.029526,0.170412,0,...,0,0.000000,0,4952.300041,0.491872,84.288881,0.013987,0.008442,1.006827,-0.039933


In [4]:
def unit_val_change(from_prod, val_change, historical_data, date):
    is_denominated = len(from_prod.split("_")) == 2
    if not is_denominated:
        return val_change #assume USD denominated, e.g. AAPL
    elif is_denominated and from_prod.split("_")[1] == "USD":
        return val_change #USD denominated, e.g. AAPL_USD, EUR_USD
    else:
        #e.g. HK33_HKD, USD_JPY (X_Y)
        #take delta price * (Y_USD) = price change in USD terms
        return val_change * historical_data.loc[date, "{}_USD close".format(from_prod.split("_")[1])]

#how much is 1 contract `worth`?
def unit_dollar_value(from_prod, historical_data, date):
    is_denominated = len(from_prod.split("_")) == 2
    if not is_denominated:
        return historical_data.loc[date, "{} close".format(from_prod)] #e.g. AAPL units is worth the price of 1 AAPL unit
    if is_denominated and from_prod.split("_")[0] == "USD":
        return 1 #e.g. USD_JPY unit is worth 1 USD!
    if is_denominated and not from_prod.split("_")[0] == "USD":
        #e.g.HK33_HKD, EUR_USD, (X_Y)
        #then you want to take the price change in the denominated currency, which is unit_price * Y_USD
        unit_price = historical_data.loc[date, "{} close".format(from_prod)]
        fx_inst = "{}_{}".format(from_prod.split("_")[1], "USD")
        fx_quote = 1 if fx_inst == "USD_USD" else historical_data.loc[date, "{} close".format(fx_inst)]
        return unit_price * fx_quote

#we set the leverage cap for 2 reasons
#1. Prevent relative allocations from throwing off statistical relevance of performance results (reducing variance)
#2. Meet margin requirements by the brokerage specification, where relevant
def set_leverage_cap(portfolio_df, instruments, date, idx, nominal_tot, leverage_cap, historical_data):
    leverage = nominal_tot / portfolio_df.loc[idx, "capital"]
    if leverage > leverage_cap:
        new_nominals = 0
        leverage_scalar = leverage_cap / leverage
        for inst in instruments:
            newpos = portfolio_df.loc[idx, "{} units".format(inst)] * leverage_scalar
            portfolio_df.loc[idx, "{} units".format(inst)] = newpos
            if newpos != 0:
                new_nominals += abs(newpos * unit_dollar_value(inst, historical_data, date))
        return new_nominals
    else:
        return nominal_tot

#get some statistics from the portfolio df
def kpis(df):
    portfolio_df = df.copy()
    portfolio_df["cum ret"] = (1 + portfolio_df["capital ret"]).cumprod()
    portfolio_df["drawdown"] = portfolio_df["cum ret"] / portfolio_df["cum ret"].cummax() - 1
    sharpe = portfolio_df["capital ret"].mean() / portfolio_df["capital ret"].std() * np.sqrt(253)
    drawdown_max = portfolio_df["drawdown"].min() * 100
    volatility = portfolio_df["capital ret"].std() * np.sqrt(253) * 100 #annualised percent vol
    return portfolio_df, sharpe, drawdown_max, volatility


def classifier_data_transform(historical_data, strat_df):
    df_ = pd.concat([historical_data, strat_df], axis=1)
    X_process = df_[historical_data.columns]
    X_process = X_process.replace([np.nan, np.inf, -np.inf], 0)
    X =  X_process.values
    #1 means negative pnl
    y = (df_['daily pnl'].tail(len(X)) < 0).astype(int).values
    return X, y


def run_strategy_classifier(historical_data, strat_df, run_live_classifier):
    # date already index
    #hist_data = historical_data.set_index('date')
    #strat_data = strat_df.set_index('date')
    filename = 'classifier_model.sav'
    if run_live_classifier:
        # full train
        full_train_length = len(strat_df)
        X, y = classifier_data_transform(historical_data.head(full_train_length-1), strat_df.head(full_train_length-1))
        #retrain classifier
        clf = pickle.load(open(filename, 'rb'))
        # new classifier
        #clf = RandomForestClassifier(random_state=0)
        clf.fit(X, y)
        pickle.dump(clf, open(filename, 'wb'))
    if run_live_classifier == False:
        # only load pre-trained
        clf = pickle.load(open(filename, 'rb'))

    
    X, y = classifier_data_transform(historical_data.tail(1), strat_df.tail(1))
    
    # if RF thinks the strategy will lose money for the latest data gathered
    if clf.predict(X[0].reshape(1,-1)) == [0]:
        # If clf predicts 0, then clf thinks we WON'T lose money
        return True
    else:
        return False

In [5]:
run_strategy_classifier(historical_data, strat_df, False)

TypeError: float() argument must be a string or a number, not 'Timestamp'

False

NameError: name 'X' is not defined

Unnamed: 0_level_0,capital,strat scalar,HK33_HKD units,HK33_HKD w,FR40_EUR units,FR40_EUR w,JP225_USD units,JP225_USD w,CN50_USD units,CN50_USD w,...,USB30Y_USD w,IN50_USD units,IN50_USD w,nominal,leverage,daily pnl,nominal ret,capital ret,cum ret,drawdown
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-04-26,10068.267708,2,0,0.0,0.048742,0.081803,0.029526,0.170412,0,0,...,0,0.0,0,4952.300041,0.491872,84.288881,0.013987,0.008442,1.006827,-0.039933
2023-04-27,10089.862979,2,0,0.0,0.049129,0.081532,0.031308,0.181807,0,0,...,0,0.0,0,4979.374436,0.493503,21.595271,0.004361,0.002145,1.008986,-0.037873
