In [1]:
import pandas as pd

In [2]:
file_loc = 'a.csv'
df = pd.read_csv(file_loc)

# Utils 

In [3]:
def mask(df,threshold):
    """
    Masks DataFrame values based on threshold, returns new DataFrame with NaN and sign.

    Args:
    df: DataFrame
    threshold: numeric threshold

    Returns:
    DataFrame with masked values.
"""
    df_mask  = df.where(df.abs() > threshold)
    df_mask = df_mask/df_mask.abs()
    return df_mask

In [4]:
#Columns
exchange_contract_col = 'exchange-contract'
exchange_col = 'exchange'
contract_col = 'contract'
basis_col = 'basis'
timestamp_col = 'timestamp'
current_btc_price_col = 'current_btc_price'

# Clean Data

In [5]:
#Add Exchange-contract column
df[exchange_contract_col] = df[exchange_col]+df[contract_col]
#Clean columns
# df[basis_col] = pd.to_numeric(df[basis_col].str.replace("$","",regex=False))
df[basis_col] = df[basis_col].replace('[\$,\(\)]', '', regex=True).astype(float) 
# df['Basis %'] = pd.to_numeric(df['Basis %'].str.replace("%","",regex=False))


# Necessary Dataframes

In [6]:

#Get the percent change of all the Exchange Contract pairs
percent_change_df = df.set_index(['timestamp', exchange_contract_col])[basis_col].unstack([exchange_contract_col]).pct_change()
percent_change_df = percent_change_df.iloc[1:] #Remove the first row (null)
percent_change_df.head()

exchange-contract,BinanceBTCUSDT_230331,BinanceBTCUSD_230331,BinanceBTCUSD_230630,BitMEXXBTF23,BitMEXXBTG23,BitMEXXBTH23,BitMEXXBTM23,BitMEXXBTU23,BitMEXXBTUSDTH23,BitMEXXBTUSDTM23,...,KrakenFI_XBTUSD_230331,KrakenFI_XBTUSD_230630,OKXBTC-USD-230120,OKXBTC-USD-230127,OKXBTC-USD-230331,OKXBTC-USD-230630,OKXBTC-USDT-230120,OKXBTC-USDT-230127,OKXBTC-USDT-230331,OKXBTC-USDT-230630
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1674113580180,0.008932,-0.020552,0.003598,0.127729,-0.015171,0.021603,-0.267461,0.015884,-0.0121,0.012295,...,0.007351,0.003834,-0.484026,0.039183,0.007346,-0.008182,0.046875,-0.174105,-0.061616,0.004261
1674113640134,-0.010444,-0.012899,-0.019827,0.225557,-0.030806,0.042111,-0.268857,0.031137,-0.024494,0.024188,...,0.014533,0.007606,0.721362,0.075089,-0.040329,-0.028928,-0.571092,-0.130466,0.013423,0.00845
1674113700242,-0.002143,0.010237,0.010317,0.260664,-0.044936,0.057232,-0.519868,0.042768,-0.035498,0.033448,...,0.020288,0.010691,0.593525,0.098921,-0.001949,-0.001458,-0.384615,0.204415,0.028057,-0.046393
1674113760227,0.009771,-0.012995,-0.014791,0.007519,-0.001802,0.001969,-0.041463,0.001491,-0.001409,0.001177,...,0.000723,0.000385,-0.674944,-0.31042,0.00794,0.01614,0.720238,0.004073,-0.038588,0.000453
1674113820196,0.00101,0.017454,-0.000345,-0.006841,0.001649,-0.24738,0.03952,-0.001365,0.001289,-0.001078,...,-0.000662,-0.000352,-0.038194,-0.004351,-0.00071,0.000323,-0.019031,0.033469,0.017758,-0.000415


In [7]:
#Get the percent change of BTC
#Get one column of the percent change in current_btc_price (all columns have the same value)
btc_price_percent_change = df.set_index([timestamp_col, exchange_contract_col])[current_btc_price_col].unstack([exchange_contract_col]).pct_change()['BinanceBTCUSDT_230331']
btc_price_percent_change = btc_price_percent_change.iloc[1:]
btc_price_percent_change.head()

timestamp
1674113580180   -0.000001
1674113640134   -0.000376
1674113700242   -0.000115
1674113760227    0.000177
1674113820196    0.000015
Name: BinanceBTCUSDT_230331, dtype: float64

# Correlation

In [8]:
def calc_correlation(df,price_percent_change):
    percent_change_df = df.copy()
    correlation = percent_change_df.corrwith(btc_price_percent_change)
    return correlation


In [9]:
#Calculate the correlation using default pandas correlation function
correlation = calc_correlation(percent_change_df,btc_price_percent_change)

In [10]:
#Mask the correlation values using a threshold
correlation_thresh = 0.2
masked_correlation = mask(correlation,correlation_thresh)
masked_correlation

exchange-contract
BinanceBTCUSDT_230331     1.0
BinanceBTCUSD_230331      NaN
BinanceBTCUSD_230630      NaN
BitMEXXBTF23              NaN
BitMEXXBTG23              NaN
BitMEXXBTH23              NaN
BitMEXXBTM23              NaN
BitMEXXBTU23             -1.0
BitMEXXBTUSDTH23          NaN
BitMEXXBTUSDTM23         -1.0
BitMEXXBTUSDTU23         -1.0
DeribitBTC-20JAN23        NaN
DeribitBTC-24FEB23        NaN
DeribitBTC-27JAN23        NaN
DeribitBTC-29DEC23       -1.0
DeribitBTC-29SEP23       -1.0
DeribitBTC-30JUN23       -1.0
DeribitBTC-31MAR23       -1.0
HuobiDMBTC230120          NaN
HuobiDMBTC230127          NaN
HuobiDMBTC230331          NaN
KrakenFI_XBTUSD_230127   -1.0
KrakenFI_XBTUSD_230331   -1.0
KrakenFI_XBTUSD_230630   -1.0
OKXBTC-USD-230120         NaN
OKXBTC-USD-230127         1.0
OKXBTC-USD-230331         1.0
OKXBTC-USD-230630         1.0
OKXBTC-USDT-230120        NaN
OKXBTC-USDT-230127        NaN
OKXBTC-USDT-230331        NaN
OKXBTC-USDT-230630        NaN
dtype: float64

In [11]:
#Multiply the mask to the percent_change_df and drop the null columns
masked_percent_change_df = percent_change_df.multiply(masked_correlation).dropna(axis=1)
masked_percent_change_df

exchange-contract,BinanceBTCUSDT_230331,BitMEXXBTU23,BitMEXXBTUSDTM23,BitMEXXBTUSDTU23,DeribitBTC-29DEC23,DeribitBTC-29SEP23,DeribitBTC-30JUN23,DeribitBTC-31MAR23,KrakenFI_XBTUSD_230127,KrakenFI_XBTUSD_230331,KrakenFI_XBTUSD_230630,OKXBTC-USD-230127,OKXBTC-USD-230331,OKXBTC-USD-230630
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1674113580180,0.008932,-0.015884,-0.012295,-0.004331,-0.003030,-0.003217,0.018355,-0.009657,-0.016917,-0.007351,-0.003834,0.039183,0.007346,-0.008182
1674113640134,-0.010444,-0.031137,-0.024188,-0.008587,-0.006016,-0.006387,0.000830,-0.019047,-0.033130,-0.014533,-0.007606,0.075089,-0.040329,-0.028928
1674113700242,-0.002143,-0.042768,-0.033448,-0.012059,-0.008469,-0.008988,0.008306,-0.026472,-0.045417,-0.020288,-0.010691,0.098921,-0.001949,-0.001458
1674113760227,0.009771,-0.001491,-0.001177,-0.000433,-0.000305,-0.000324,-0.000591,-0.000938,-0.001580,-0.000723,-0.000385,-0.310420,0.007940,0.016140
1674113820196,0.001010,0.001365,0.001078,0.000397,0.000280,0.000297,0.000542,0.000859,0.001446,0.000662,0.000352,-0.004351,-0.000710,0.000323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1674138600136,-0.001353,0.043935,0.032431,0.015413,0.012326,0.017307,0.029885,0.039694,0.088636,0.044700,0.023525,-0.111363,-0.023942,-0.015984
1674138660030,0.058990,0.139920,0.102054,0.047663,0.037999,0.053623,0.093797,0.125854,0.296121,0.142467,0.007734,0.260474,0.059087,0.022930
1674138720170,-0.002665,-0.058212,-0.040668,-0.017909,-0.014134,-0.020275,-0.037037,-0.051518,-0.150538,-0.059448,-0.026453,-0.254335,0.013872,0.005476
1674138780049,-0.022875,0.053831,0.038241,0.017216,0.013639,0.019446,0.034949,0.047944,0.128037,0.054910,0.025219,0.500775,-0.024271,0.016065


# Hit rate 

In [12]:
def calc_hit_rate(df,price_change):
    """
    This function calculates the hit rate of a given DataFrame and a price change. The hit rate is defined as the percentage of times that the price change is greater than 0 for each row of the DataFrame.

    Args:
    df: A pandas DataFrame containing the prices
    price_change: A pandas DataFrame or Series containing the price change for each row of the input DataFrame

    Returns:
    A float representing the hit rate, i.e, the percentage of times that the price change is greater than 0 for each row of the input DataFrame.
    """
    percent_change_df = df.copy()
    hit_rate = percent_change_df.multiply(price_change,axis=0).gt(0).mean()
    return hit_rate

In [13]:
hit_rate = calc_hit_rate(masked_percent_change_df,btc_price_percent_change)
hit_rate

exchange-contract
BinanceBTCUSDT_230331     0.672986
BitMEXXBTU23              0.829384
BitMEXXBTUSDTM23          0.838863
BitMEXXBTUSDTU23          0.838863
DeribitBTC-29DEC23        0.819905
DeribitBTC-29SEP23        0.827014
DeribitBTC-30JUN23        0.736967
DeribitBTC-31MAR23        0.706161
KrakenFI_XBTUSD_230127    0.815166
KrakenFI_XBTUSD_230331    0.827014
KrakenFI_XBTUSD_230630    0.803318
OKXBTC-USD-230127         0.447867
OKXBTC-USD-230331         0.535545
OKXBTC-USD-230630         0.651659
dtype: float64

# Actual Profit

In [14]:
def calc_actual_profit(df,btc_price_percent_change,actual_profit_thresh=0):
    """
    Calculates the actual profit of a given DataFrame using a given threshold and a btc price change.

    Args:
    df: A pandas DataFrame containing the prices
    actual_profit_thresh: A numeric threshold to mask the values between -thresh and thresh. (default 0)
    btc_price_percent_change: A pandas DataFrame or Series containing the btc price change for each row of the input DataFrame

    Returns:
    A float representing the actual profit by simulating trading by multiplying the mask dataframe and btc price change.
    """
    df_copy = df.copy()
    #Create a dataframe that masks the values between -thresh and thresh. 
    multiplier_df = mask(df_copy,threshold=actual_profit_thresh)

    #Multiply the multiplier_df to the btc_price to simulate trading (same sign = profit, diffrent sign = loss)
    trades_df = multiplier_df.multiply(btc_price_percent_change,axis=0)
    #Get the sum of the trades
    return trades_df
    

In [15]:
trades_df = calc_actual_profit(masked_percent_change_df,btc_price_percent_change)
actual_profit = trades_df.sum()
actual_profit

exchange-contract
BinanceBTCUSDT_230331     0.062280
BitMEXXBTU23              0.100568
BitMEXXBTUSDTM23          0.104209
BitMEXXBTUSDTU23          0.104209
DeribitBTC-29DEC23        0.097609
DeribitBTC-29SEP23        0.097455
DeribitBTC-30JUN23        0.071780
DeribitBTC-31MAR23        0.058887
KrakenFI_XBTUSD_230127    0.095808
KrakenFI_XBTUSD_230331    0.098495
KrakenFI_XBTUSD_230630    0.086493
OKXBTC-USD-230127         0.004692
OKXBTC-USD-230331         0.020890
OKXBTC-USD-230630         0.056590
dtype: float64

# Profit Ratio

In [16]:
def calc_profit_ratio(trades_df):
    """
    Calculates the profit ratio of a given DataFrame of trades.
    The profit ratio is calculated as the ratio of the average gain per winning trade to the average loss per losing trade.

    Args:
    trades_df: A pandas DataFrame containing the trades.

    Returns:
    A float representing the profit ratio.
    """

    #calculates the total gain of the trades
    total_gain = trades_df.mul(trades_df.gt(0)).sum()

    #calculates the total loss of the trades
    total_loss = abs(trades_df.mul(~trades_df.gt(0)).sum())

    #calculates the number of winning trades
    num_winning_trades = trades_df.gt(0).sum()

    #calculates the number of losing trades
    num_losing_trades = trades_df.lt(0).sum()
    
    #calculates the profit ratio as the ratio of the average gain per winning trade to the average loss per losing trade
    
    profit_ratio = (total_gain/num_winning_trades)/(total_loss/num_losing_trades)
    return profit_ratio

In [17]:
def calc_profit_factor(df):
    trades_df = df.copy()

    #calculates the number of winning trades
    num_winning_trades = trades_df.gt(0).sum()

    #calculates the number of losing trades
    num_losing_trades = trades_df.lt(0).sum()


    return num_winning_trades/num_losing_trades

In [19]:
#Profit factor
# num_of_winning/num_losing
#use the distribution of signals for the threshold   

NameError: name 'num_of_winning' is not defined

In [22]:
profit_ratio = calc_profit_ratio(trades_df)
profit_ratio

exchange-contract
BinanceBTCUSDT_230331     1.543642
BitMEXXBTU23              2.479029
BitMEXXBTUSDTM23          2.950742
BitMEXXBTUSDTU23          2.950742
DeribitBTC-29DEC23        2.243006
DeribitBTC-29SEP23        2.117636
DeribitBTC-30JUN23        1.440678
DeribitBTC-31MAR23        1.222487
KrakenFI_XBTUSD_230127    2.114583
KrakenFI_XBTUSD_230331    2.238510
KrakenFI_XBTUSD_230630    1.549460
OKXBTC-USD-230127         1.328736
OKXBTC-USD-230331         1.232179
OKXBTC-USD-230630         1.501603
dtype: float64

In [18]:
profit_factor = calc_profit_factor(trades_df)
profit_factor

exchange-contract
BinanceBTCUSDT_230331     2.088235
BitMEXXBTU23              4.929577
BitMEXXBTUSDTM23          5.283582
BitMEXXBTUSDTU23          5.283582
DeribitBTC-29DEC23        4.613333
DeribitBTC-29SEP23        4.847222
DeribitBTC-30JUN23        2.827273
DeribitBTC-31MAR23        2.442623
KrakenFI_XBTUSD_230127    4.467532
KrakenFI_XBTUSD_230331    4.847222
KrakenFI_XBTUSD_230630    4.134146
OKXBTC-USD-230127         0.814655
OKXBTC-USD-230331         1.158974
OKXBTC-USD-230630         1.883562
dtype: float64

In [32]:
final_df = pd.concat([correlation,hit_rate,actual_profit,profit_ratio,profit_factor],
                    axis=1,
                    keys = ["correlation","hit_rate","actual_profit","profit_ratio","profit_factor"])
final_df.T

exchange-contract,BinanceBTCUSDT_230331,BinanceBTCUSD_230331,BinanceBTCUSD_230630,BitMEXXBTF23,BitMEXXBTG23,BitMEXXBTH23,BitMEXXBTM23,BitMEXXBTU23,BitMEXXBTUSDTH23,BitMEXXBTUSDTM23,...,KrakenFI_XBTUSD_230331,KrakenFI_XBTUSD_230630,OKXBTC-USD-230120,OKXBTC-USD-230127,OKXBTC-USD-230331,OKXBTC-USD-230630,OKXBTC-USDT-230120,OKXBTC-USDT-230127,OKXBTC-USDT-230331,OKXBTC-USDT-230630
correlation,0.542855,0.107161,-0.090558,-0.094357,-0.079787,-0.173745,-0.055976,-0.482838,0.100177,-0.86441,...,-0.707465,-0.221631,0.031187,0.310264,0.256955,0.460762,-0.108999,0.106827,0.116602,-0.014804
hit_rate,0.672986,,,,,,,0.829384,,0.838863,...,0.827014,0.803318,,0.447867,0.535545,0.651659,,,,
actual_profit,0.06228,,,,,,,0.100568,,0.104209,...,0.098495,0.086493,,0.004692,0.02089,0.05659,,,,
profit_ratio,1.543642,,,,,,,2.479029,,2.950742,...,2.23851,1.54946,,1.328736,1.232179,1.501603,,,,
profit_factor,2.088235,,,,,,,4.929577,,5.283582,...,4.847222,4.134146,,0.814655,1.158974,1.883562,,,,
