In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
#Columns
exchange_contract_col = 'exchange-contract'
exchange_col = 'exchange'
contract_col = 'contract'
basis_col = 'basis'
timestamp_col = 'timestamp'
current_btc_price_col = 'current_btc_price'
#Constants
n_minutes = 5


In [3]:
#Functions
def filter_every_n_min(df,n,timestamp_col):
    df[timestamp_col] = pd.to_datetime(df[timestamp_col],unit='ms').dt.floor('T')
    time_mask = df[timestamp_col].dt.minute % n == (df[timestamp_col][0].minute % n)
    return df[time_mask]
def preprocess(df,n_min):
    df = filter_every_n_min(df,n_min,timestamp_col)
    #Add Exchange-contract column
    df[exchange_contract_col] = df[exchange_col]+df[contract_col]
    #Clean columns
    df[basis_col] = df[basis_col].replace('[\$,\(\)]', '', regex=True).astype(float)
    return df
def get_percent_change(df,timestamp_col,exchange_contract_col,target_col):
    #Get the percent change of all the Exchange Contract pairs
    percent_change_df = df.set_index([timestamp_col, exchange_contract_col])[target_col].unstack([exchange_contract_col]).pct_change()
    percent_change_df = percent_change_df.iloc[1:]
    return percent_change_df

In [4]:
df = pd.read_csv('data/whole.csv')
df = preprocess(df,n_minutes)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[exchange_contract_col] = df[exchange_col]+df[contract_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[basis_col] = df[basis_col].replace('[\$,\(\)]', '', regex=True).astype(float)


Unnamed: 0,exchange,contract,price,oi,basis,timestamp,current_btc_price,exchange-contract
0,BitMEX,XBTUSDTH23,"$22,891.50",$1.1m,288.59,2023-01-25 14:21:00,22591.17,BitMEXXBTUSDTH23
1,OKX,BTC-USDT-230203,"$22,639.40",$1.8m,36.49,2023-01-25 14:21:00,22591.17,OKXBTC-USDT-230203
2,Deribit,BTC-24FEB23,"$22,720.00",$15.3m,117.09,2023-01-25 14:21:00,22591.17,DeribitBTC-24FEB23
3,Binance,BTCUSD_230331,"$22,818.70",$97.4m,215.79,2023-01-25 14:21:00,22591.17,BinanceBTCUSD_230331
4,OKX,BTC-USDT-230331,"$22,799.40",$34.9m,196.49,2023-01-25 14:21:00,22591.17,OKXBTC-USDT-230331
...,...,...,...,...,...,...,...,...
286575,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,316.24,2023-01-24 08:26:00,23087.46,KrakenFI_XBTUSD_230630
286580,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,333.71,2023-01-24 08:31:00,23063.72,KrakenFI_XBTUSD_230630
286585,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,338.72,2023-01-24 08:36:00,23060.67,KrakenFI_XBTUSD_230630
286590,Kraken,FI_XBTUSD_230630,"$23,400.00",$9.2m,326.63,2023-01-24 08:41:00,23074.75,KrakenFI_XBTUSD_230630


In [5]:
#Get the percent change of all the Exchange Contract pairs
percent_change_df = get_percent_change(df,timestamp_col,exchange_contract_col,basis_col)
btc_price_percent_change = get_percent_change(df,timestamp_col,exchange_contract_col,current_btc_price_col)['BinanceBTCUSDT_230331']

In [6]:
def calc_correlation(df,btc_price_percent_change):
    percent_change_df = df.copy()
    correlation = percent_change_df.corrwith(btc_price_percent_change,method = "spearman")
    return correlation

In [7]:
def calc_profit_factor(df):
    trades_df = df.copy()

    #calculates the number of winning trades
    num_winning_trades = trades_df.gt(0).sum()

    #calculates the number of losing trades
    num_losing_trades = trades_df.lt(0).sum()

    profit_factor = num_winning_trades/num_losing_trades

    return profit_factor

In [8]:
from scipy.stats import spearmanr 

In [9]:
def window_predict(windowed_btc,apply_corr,pct_thresh,top_n):
    windowed_pct_change_df = percent_change_df.loc[windowed_btc.index]


    hist_windowed_pct_change_df  = windowed_pct_change_df.iloc[:-1,:] 
    hist_windowed_btc = windowed_btc[:-1]

    live_windowed_pct_change_df = windowed_pct_change_df.iloc[-1,:]
    live_windowed_btc = windowed_btc[-1]


    # correlation = calc_correlation(hist_windowed_pct_change_df,hist_windowed_btc)
    # correlation = hist_windowed_pct_change_df.corrwith(hist_windowed_btc,method = "pearson")
    correlation = hist_windowed_pct_change_df.apply(lambda x:spearmanr(x,hist_windowed_btc).correlation)
    if apply_corr:
        mul_correlation = correlation/correlation.abs()
    else:
        mul_correlation = 1

    #Correlation
    corr_win_pct_change_df = hist_windowed_pct_change_df.multiply(mul_correlation,axis=1)

    #Mask
    thresh_win_pct_change_df = corr_win_pct_change_df.where(corr_win_pct_change_df.abs()>pct_thresh)

    #Simulate Trade
    simulate_trade_df = thresh_win_pct_change_df.multiply(hist_windowed_btc,axis=0)

    #Calculate Hit rate
    hit_rate = simulate_trade_df.apply(lambda x: x.dropna().gt(0).mean(),axis=0)

    #Calculate actual profit
    actual_profit = simulate_trade_df.sum()

    #Calculate Profit Factor
    profit_factor = calc_profit_factor(simulate_trade_df)



    #Get the top columns
    top_cols = hit_rate.nlargest(top_n).index.to_list()

    #Get predictions on the live data


    predictors_df = live_windowed_pct_change_df[top_cols]

    votes = predictors_df.multiply(mul_correlation).dropna().gt(0).value_counts()
    final_prediction = 1 if votes.get(True, 0) > votes.get(False, 0) else -1

    trade = final_prediction*live_windowed_btc #win +1 or lose -1
    return 1

In [10]:
interval = 24 #Use the past hour
# btc_price_percent_change.rolling(interval).apply(window_predict)

In [11]:
# top_n = 5
# pct_thresh = 0.12
# apply_corr = True
# predictions = btc_price_percent_change.rolling(interval).apply(lambda x:window_predict(x,apply_corr,pct_thresh,top_n))

In [12]:
top_n = 7
#Loop using thresholds
pct_change_thresholds = list(np.arange(0.0,0.2,0.02))
list_apply_corr = [True]*len(pct_change_thresholds)

#Add the strategy with no correlation correction
pct_change_thresholds.insert(0,0.0)
list_apply_corr.insert(0,False)


#Strategies
strategies = [str(x) for x in pct_change_thresholds]
strategies[0] = 'No correlation correction'


In [13]:
intervals = list(range(12,12*24,12))

In [14]:
len(intervals)

23

In [15]:
(10*23)/60

3.8333333333333335

In [16]:
# intervals = [12,24,36,48,72]

final_metrics = []
for interval in intervals:
    print(interval)
    metrics = []
    for pct_thresh,apply_corr in zip(pct_change_thresholds,list_apply_corr):
        predictions = btc_price_percent_change.rolling(interval).apply(lambda x:window_predict(x,apply_corr,pct_thresh,top_n))
        hit_rate = predictions.dropna().gt(0).mean()
        actual_profit = predictions.sum()
        profit_factor = calc_profit_factor(predictions)
        metrics.append([hit_rate,actual_profit,profit_factor])
    metrics_df = pd.DataFrame(metrics,columns=['Hit Rate', 'Actual Profit','Profit Factor'])
    metrics_df['interval'] = interval
    metrics_df.index = strategies

    metrics_df.to_csv(f'results/{str(interval)}.csv')

    final_metrics.append(metrics_df)
    



12


  profit_factor = num_winning_trades/num_losing_trades
