In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

In [2]:
#Columns
exchange_contract_col = 'exchange-contract'
exchange_col = 'exchange'
contract_col = 'contract'
basis_col = 'basis'
timestamp_col = 'timestamp'
current_btc_price_col = 'current_btc_price'
#Constants
n_minutes = 5


In [3]:
#Functions
def filter_every_n_min(input_df,n,timestamp_col):
    df = input_df.copy()
    df[timestamp_col] = pd.to_datetime(df[timestamp_col],unit='ms').dt.floor('T')
    time_mask = df[timestamp_col].dt.minute % n == 0#(df[timestamp_col][0].minute % n)
    return df[time_mask]
def preprocess(df,n_min):
    df = filter_every_n_min(df,n_min,timestamp_col)
    #Add Exchange-contract column
    df[exchange_contract_col] = df[exchange_col]+df[contract_col]
    #Clean columns
    df[basis_col] = df[basis_col].replace('[\$,\(\)]', '', regex=True).astype(float)
    return df
def get_percent_change(df,timestamp_col,exchange_contract_col,target_col):
    #Get the percent change of all the Exchange Contract pairs
    percent_change_df = df.set_index([timestamp_col, exchange_contract_col])[target_col].unstack([exchange_contract_col]).pct_change()
    percent_change_df = percent_change_df.iloc[1:]
    return percent_change_df

In [4]:
df = pd.read_csv('data/whole.csv')
df.drop_duplicates(inplace=True)
df = preprocess(df,n_minutes)
df

Unnamed: 0,exchange,contract,price,oi,basis,timestamp,current_btc_price,exchange-contract
83,OKX,BTC-USDT-230203,"$22,585.90",$1.5m,31.28,2023-01-25 02:25:00,22553.33,OKXBTC-USDT-230203
86,OKX,BTC-USDT-230203,"$22,620.90",$1.5m,43.78,2023-01-25 02:30:00,22577.36,OKXBTC-USDT-230203
91,OKX,BTC-USDT-230203,"$22,589.30",$1.5m,29.00,2023-01-25 02:35:00,22555.35,OKXBTC-USDT-230203
96,OKX,BTC-USDT-230203,"$22,615.80",$1.5m,38.15,2023-01-25 02:40:00,22581.96,OKXBTC-USDT-230203
101,OKX,BTC-USDT-230203,"$22,617.40",$1.5m,35.43,2023-01-25 02:45:00,22580.23,OKXBTC-USDT-230203
...,...,...,...,...,...,...,...,...
286579,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,323.36,2023-01-24 08:30:00,23078.39,KrakenFI_XBTUSD_230630
286584,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,341.43,2023-01-24 08:35:00,23055.56,KrakenFI_XBTUSD_230630
286589,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,334.88,2023-01-24 08:40:00,23062.76,KrakenFI_XBTUSD_230630
286594,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,320.08,2023-01-24 08:45:00,23077.79,KrakenFI_XBTUSD_230630


In [5]:
#Get the percent change of all the Exchange Contract pairs
percent_change_df = get_percent_change(df,timestamp_col,exchange_contract_col,basis_col)
btc_price_percent_change = get_percent_change(df,timestamp_col,exchange_contract_col,current_btc_price_col)['BinanceBTCUSDT_230331']

In [6]:
def calc_correlation(df,btc_price_percent_change):
    percent_change_df = df.copy()
    correlation = percent_change_df.corrwith(btc_price_percent_change,method = "spearman")
    return correlation

In [7]:
def calc_profit_factor(df):
    trades_df = df.copy()

    #calculates the number of winning trades
    num_winning_trades = trades_df.gt(0).sum()

    #calculates the number of losing trades
    num_losing_trades = trades_df.lt(0).sum()

    profit_factor = num_winning_trades/num_losing_trades

    return profit_factor
def calc_weighted_average(df):

    calc_df = df.copy()

    #calculates the number of winning trades
    total_profit = calc_df[calc_df>0].sum()

    #calculates the number of losing trades
    total_loss = calc_df[calc_df<0].sum()

    return total_profit/(total_profit+abs(total_loss))

In [8]:
from scipy.stats import spearmanr 

In [9]:
def window_predict(windowed_btc,apply_corr,pct_thresh,top_n):
    windowed_pct_change_df = percent_change_df.loc[windowed_btc.index]


    hist_windowed_pct_change_df  = windowed_pct_change_df.iloc[:-1,:] 
    hist_windowed_btc = windowed_btc[:-1]

    live_windowed_pct_change_df = windowed_pct_change_df.iloc[-1,:]
    live_windowed_btc = windowed_btc[-1]


    # correlation = calc_correlation(hist_windowed_pct_change_df,hist_windowed_btc)
    correlation = hist_windowed_pct_change_df.corrwith(hist_windowed_btc,method = "pearson")
    # correlation = hist_windowed_pct_change_df.apply(lambda x:spearmanr(x,hist_windowed_btc).correlation)
    if apply_corr:
        mul_correlation = correlation/correlation.abs()
    else:
        mul_correlation = 1

    #Correlation
    corr_win_pct_change_df = hist_windowed_pct_change_df.multiply(mul_correlation,axis=1)

    #Mask
    thresh_win_pct_change_df = corr_win_pct_change_df.where(corr_win_pct_change_df.abs()>pct_thresh)

    #Simulate Trade
    # simulate_trade_df = thresh_win_pct_change_df.multiply(hist_windowed_btc,axis=0)
    simulate_trade_df = thresh_win_pct_change_df.multiply(hist_windowed_btc.shift(-1),axis=0)

    #Calculate Hit rate
    hit_rate = simulate_trade_df.apply(lambda x: x.dropna().gt(0).mean(),axis=0)

    #Calculate actual profit
    actual_profit = simulate_trade_df.sum()

    #Calculate Profit Factor
    profit_factor = calc_profit_factor(simulate_trade_df)

    #Get the top columns
    top_cols = hit_rate.nlargest(top_n).index.to_list()

    #Get predictions on the live data
    # predictors_df = live_windowed_pct_change_df[top_cols]
    predictors_df = hist_windowed_pct_change_df.iloc[-1][top_cols]

    votes = predictors_df.multiply(mul_correlation).dropna().gt(0).value_counts()
    final_prediction = 1 if votes.get(True, 0) > votes.get(False, 0) else -1

    trade = final_prediction*live_windowed_btc #win +1 or lose -1
    return trade

In [10]:
# interval = 24 #Use the past hour
# btc_price_percent_change.rolling(interval).apply(window_predict)

In [11]:
# top_n = 5
# pct_thresh = 0.12
# apply_corr = True
# predictions = btc_price_percent_change.rolling(interval).apply(lambda x:window_predict(x,apply_corr,pct_thresh,top_n))

In [12]:
top_n = 9
#Loop using thresholds
pct_change_thresholds = list(np.arange(0.0,0.1,0.02))
# pct_change_thresholds = [0.08]

list_apply_corr = [True]*len(pct_change_thresholds)

#Add the strategy with no correlation correction
pct_change_thresholds.insert(0,0.0)
list_apply_corr.insert(0,False)


#Strategies
strategies = [str(x) for x in pct_change_thresholds]
strategies[0] = 'No correlation correction'


In [13]:
# intervals = list(range(12,12*24,12))
# intervals = [36]

In [14]:
intervals = [6,12,24,36,48]

final_metrics = []
for interval in intervals:
    print(interval)
    metrics = []
    for pct_thresh,apply_corr in zip(pct_change_thresholds,list_apply_corr):
        print(pct_thresh)
        predictions = btc_price_percent_change.rolling(interval).apply(lambda x:window_predict(x,apply_corr,pct_thresh,top_n))
        hit_rate = predictions.dropna().gt(0).mean()
        actual_profit = predictions.sum()
        profit_factor = calc_profit_factor(predictions)
        weighted_average = calc_weighted_average(predictions)
        metrics.append([hit_rate,actual_profit,profit_factor,weighted_average])
    metrics_df = pd.DataFrame(metrics,columns=['Hit Rate', 'Actual Profit','Profit Factor','Weighted Average'])
    metrics_df['interval'] = interval
    metrics_df.index = strategies

    metrics_df.to_csv(f'results/{str(interval)}.csv')

    final_metrics.append(metrics_df)    



6
0.0
0.0
0.02
0.04
0.06
0.08
12
0.0
0.0
0.02
0.04
0.06
0.08
24
0.0
0.0
0.02
0.04
0.06
0.08
36
0.0
0.0
0.02
0.04
0.06
0.08
48
0.0
0.0
0.02
0.04
0.06
0.08


In [15]:
final_df = pd.concat(final_metrics)

In [16]:
final_df

Unnamed: 0,Hit Rate,Actual Profit,Profit Factor,Weighted Average,interval
No correlation correction,0.491958,-0.040602,0.968341,0.488688,6
0.0,0.47421,-0.070826,0.901899,0.480267,6
0.02,0.477537,-0.060742,0.914013,0.483076,6
0.04,0.470882,-0.044358,0.889937,0.487641,6
0.06,0.478647,0.0184,0.918085,0.505127,6
0.08,0.478647,-0.005528,0.918085,0.49846,6
No correlation correction,0.478575,-0.088704,0.917823,0.475235,12
0.0,0.488592,-0.114693,0.955386,0.467979,12
0.02,0.479688,-0.102099,0.921925,0.471495,12
0.04,0.483027,-0.083212,0.934338,0.476768,12


In [17]:
final_df.to_csv('final.csv')

In [18]:
final_df.index.name = "Strategy"
final_df.reset_index(inplace=True)

In [19]:
px.bar(final_df,x='Strategy',y='Hit Rate',animation_frame='interval')

In [20]:
px.bar(final_df,x='Strategy',y='Actual Profit',animation_frame='interval')

In [21]:
px.bar(final_df,x='Strategy',y='Profit Factor',animation_frame='interval')