In [1]:
import pandas as pd
import numpy as np
import random

import quantlib
import quantlib.indicators_cal as indicators_cal
import quantlib.diagnostics_utils as diagnostics_utils
from quantlib.crypto_data_utils import get_symbols_from_df

import warnings
warnings.filterwarnings('ignore')

In [2]:
origin_df = pd.read_excel("../crypto_historical_4h.xlsx", engine="openpyxl", index_col='open_time')

In [3]:
"""
In this notebook, we want to generalize the framework to explore the factor effect in crypto trading

Steps to do this task:
1. Read the crypto data, whatever the format, we still might change the format later
2. Filter data at the very beginning could be not appropriate, because there are always listings and unlistings
3. Take the index: datetime as a important thing to track trades
4. We should have a function to calculate factor, for example:
    def get_momentum(origin_df):
        return {token:{
            "score": score,
            "rank": rank
        }}
5. Before making every trade, calculate the sum of scores
6. Choose symbols to trade accordingly

"""
instruments = get_symbols_from_df(origin_df)
instruments

['BTCUSDT',
 'ETHUSDT',
 'BCHUSDT',
 'XRPUSDT',
 'EOSUSDT',
 'LTCUSDT',
 'TRXUSDT',
 'ETCUSDT',
 'LINKUSDT',
 'XLMUSDT',
 'ADAUSDT',
 'XMRUSDT',
 'DASHUSDT',
 'ZECUSDT',
 'XTZUSDT',
 'ATOMUSDT',
 'ONTUSDT',
 'IOTAUSDT',
 'BATUSDT',
 'VETUSDT',
 'NEOUSDT',
 'QTUMUSDT',
 'IOSTUSDT',
 'THETAUSDT',
 'ALGOUSDT',
 'ZILUSDT',
 'KNCUSDT',
 'ZRXUSDT',
 'COMPUSDT',
 'OMGUSDT',
 'DOGEUSDT',
 'SXPUSDT',
 'KAVAUSDT',
 'BANDUSDT',
 'RLCUSDT',
 'WAVESUSDT',
 'MKRUSDT',
 'SNXUSDT',
 'DOTUSDT',
 'DEFIUSDT',
 'YFIUSDT',
 'BALUSDT',
 'CRVUSDT',
 'RUNEUSDT',
 'SUSHIUSDT',
 'SRMUSDT',
 'EGLDUSDT',
 'SOLUSDT',
 'ICXUSDT',
 'STORJUSDT',
 'BLZUSDT',
 'UNIUSDT',
 'AVAXUSDT',
 'FTMUSDT',
 'HNTUSDT',
 'ENJUSDT',
 'FLMUSDT',
 'TOMOUSDT',
 'RENUSDT',
 'KSMUSDT',
 'NEARUSDT',
 'AAVEUSDT',
 'FILUSDT',
 'RSRUSDT',
 'LRCUSDT',
 'MATICUSDT',
 'OCEANUSDT',
 'CVCUSDT',
 'BELUSDT',
 'CTKUSDT',
 'AXSUSDT',
 'ALPHAUSDT',
 'ZENUSDT',
 'SKLUSDT',
 'GRTUSDT',
 '1INCHUSDT',
 'CHZUSDT',
 'SANDUSDT',
 'ANKRUSDT',
 'BTSUSDT',
 'LI

In [16]:
look_back = 50
ignore = 5
symbol = "BTCUSDT"

class MomentumFactor:
    """
    @Params:
        df: the original df contains all symbol information
        index: the index that a trading event should happend
        look_back: look back period to calculate the momentum
        ingore: int, to prevent short term reversing, ignore the most recent klines
    """
    def __init__(self, look_back, ignore):
        self.look_back = look_back
        self.ignore = ignore
        
    def calculate(self, df, symbol):
        close_prices = df["{} close".format(symbol)]
        momentum_value = close_prices.shift(self.ignore) / close_prices.shift(self.look_back+self.ignore) - 1
        return momentum_value
    
class MaxBarFactor:
    def __init__(self, look_back, ignore):
        self.look_back = look_back
        self.ignore = ignore

    def calculate(self, df, symbol):
        rets = df["{} % ret".format(symbol)]
        max_bar_ret = rets.rolling(window=self.look_back+self.ignore).apply(lambda x: max(x[-self.look_back-self.ignore:-self.ignore]))
        return max_bar_ret
    
max_bar = MomentumFactor(look_back=62, ignore=5)
max_bar.calculate(origin_df, symbol)



open_time
2022-07-01 12:00:00         NaN
2022-07-01 16:00:00         NaN
2022-07-01 20:00:00         NaN
2022-07-02 00:00:00         NaN
2022-07-02 04:00:00         NaN
                         ...   
2023-05-08 04:00:00   -0.001735
2023-05-08 08:00:00    0.001839
2023-05-08 12:00:00   -0.000940
2023-05-08 16:00:00   -0.024618
2023-05-08 20:00:00   -0.049626
Name: BTCUSDT close, Length: 1869, dtype: float64

In [5]:
"""
Every time we trigger trade function, we calculate all factors for all symbols
"""

def calculate_factor_scores(df, target_symbols, factors, trade_index):
    score_dict = {}
    for symbol in target_symbols:
        factor_scores = sum([factor.calculate(df, symbol)[trade_index] for factor in factors])
        score_dict[symbol] = factor_scores
    return score_dict

def trade(df, target_symbols, factors, trade_index):
    """
    @Params:
        factors: a list of factors that we want to use
        trade_index: the index that the trade is happening
    """
    score_dict = calculate_factor_scores(df, target_symbols, factors, trade_index)
    print (score_dict)

momentum = MomentumFactor(look_back, ignore)
factors = [momentum]
#trade(origin_df, instruments, factors, -1)
momentum.calculate(origin_df, "OPUSDT")

open_time
2022-07-01 12:00:00         NaN
2022-07-01 16:00:00         NaN
2022-07-01 20:00:00         NaN
2022-07-02 00:00:00         NaN
2022-07-02 04:00:00         NaN
                         ...   
2023-05-08 04:00:00   -0.110000
2023-05-08 08:00:00   -0.125744
2023-05-08 12:00:00   -0.138211
2023-05-08 16:00:00   -0.134530
2023-05-08 20:00:00   -0.170466
Name: OPUSDT close, Length: 1869, dtype: float64

In [17]:
from collections import defaultdict

def get_rankings(factor_dict, n_ranks=5):
    """
    According to factor scores in factor_dict, generate rankings for each factor
    Returns:
        dict(rank: [symbol1, symbol2])
    """
    sorted_symbols = sorted(factor_dict, key=factor_dict.get, reverse=True)

    # Calculate the rank for each symbol
    num_symbols = len(sorted_symbols)
    rank_size = num_symbols // n_ranks + (num_symbols % n_ranks > 0)
    ranks_dict = defaultdict(list)
    for i, symbol in enumerate(sorted_symbols):
        rank = i // rank_size
        ranks_dict[rank].append(symbol)

    return ranks_dict

def is_rebalancing(i, rebalance_period):
    return i!=1 and i%rebalance_period==1

def get_rank_porfolio(df, target_symbols, factor_specs, trade_index):

    rank_pofolio = []
    for i, (factor, n_ranks) in enumerate(factor_specs):
        # Filter available trading tokens
        max_look_back = factor.look_back
        if trade_index - max_look_back <= 0:
            return None
        active_symbols = get_target_symbols(df.iloc[trade_index - max_look_back])
        target_symbols = set(target_symbols).intersection(active_symbols)

        # Calculate scores for every symbol
        factor_dict = {}
        for symbol in target_symbols:
            factor_dict[symbol] = factor.calculate(df, symbol)[trade_index]
        rank_dict = get_rankings(factor_dict, n_ranks)
        rank_pofolio.append(rank_dict)

    return rank_pofolio
    

def insert_record(dt, rank_df, rank_portfolio, factor_specs):
    """
    rank_df: DataFrame with columns for each rank and each instrument, as well as columns for the forward returns of each instrument
    rank_portfolio: list of rank dictionaries, where each rank dictionary maps ranks to a list of symbols

    """
    # Create a new row for the rank details
    new_row = pd.DataFrame(index=[dt])

    # Combine ranks
    rank_nums = [x[1] for x in factor_specs]
    rank_combinations = get_all_rank_combinations(rank_nums)

    for agg_rank in rank_combinations:
        rank_to_string = [str(rank) for rank in agg_rank]
        col_name = f"rank_{'_'.join(rank_to_string)}"
        symbols = select_intersection_symbols(agg_rank, rank_portfolio)
        new_row[col_name] = ','.join(symbols)
    
    # Concatenate the new row to the rank_df DataFrame
    rank_df = pd.concat([rank_df, new_row])
    return rank_df

def select_intersection_symbols(agg_rank, rank_portfolio):
    """Find target intersection symbols for every aggregation rank"""
    symbol_candidates = []
    for i, rank in enumerate(agg_rank):
        symbols = rank_portfolio[i][rank]
        symbol_candidates.append(symbols)

    rank1 = set(symbol_candidates[0])
    target_symbols = rank1.intersection(*symbol_candidates[1:])
    return target_symbols

def get_all_rank_combinations(rank_nums):
    if not rank_nums:
        return []
    result = [[]]
    for n in rank_nums:
        new_result = []
        for i in range(n):
            for r in result:
                new_result.append(r + [i])
        result = new_result
    return result

def analyse_factor_rank():
    pass

def trade_with_ranks():
    pass

def get_target_symbols(row):
    target_symbols = []
    for inst in instruments:
        if row[f"{inst} active"] == True:
            target_symbols.append(inst)

    return target_symbols

def _add_fwd_ret(df, period):
    all_symbols = get_symbols_from_df(df)
    for symbol in all_symbols:
        df["{} {} fwd ret".format(symbol, period)] = df["{} close".format(symbol)].shift(-period) / df["{} close".format(symbol)] - 1

    return df

def lookup_fwd_ret_for_symbols(df, dt, rank_symbols, rebalance_period, weights=None):
    """For specific dt and symbols, take the average of forward returns"""
    rank_symbols = rank_symbols.split(',')
    total_fwd_ret = 0
    for symbol in rank_symbols:
        fwd_ret = df.loc[dt, f"{symbol} {rebalance_period} fwd ret"]
        total_fwd_ret += fwd_ret

    avg_fwd_ret = round(total_fwd_ret / len(rank_symbols), 4)
    return avg_fwd_ret
    
    

def lookup_fwd_ret(df, rank_df, rebalance_period, weights=None):
    """Combine forward returns for the rank dataframe"""
    rank_cols = rank_df.columns
    for dt in rank_df.index:
        for ranks in rank_cols:
            rank_symbols = rank_df.loc[dt, ranks]
            fwd_ret = lookup_fwd_ret_for_symbols(df, dt, rank_symbols, rebalance_period, weights)
            rank_df.loc[dt, f"{ranks} fwd_ret"] = fwd_ret

    return rank_df
            
    

def backtest(df, rebalance_period, factor_specs):
    # rank_df, every row should contains the rank details for that time
    
    df = _add_fwd_ret(df, rebalance_period)
    rank_df = pd.DataFrame()

    for i, dt in enumerate(df.index):
        print (i)
        if is_rebalancing(i, rebalance_period):
            target_symbols = get_target_symbols(df.iloc[i])
            rank_porfolio = get_rank_porfolio(df, target_symbols, factor_specs, trade_index=i)
            if not rank_porfolio:
                continue
            rank_df = insert_record(dt, rank_df, rank_porfolio, factor_specs)

    rank_df = lookup_fwd_ret(df, rank_df, rebalance_period, weights=None)
    
    fwd_rets_cols = rank_df[[x for x in rank_df.columns if "fwd_ret" in x]]
    fwd_rets = fwd_rets_cols.mean().reset_index()
    print (fwd_rets)
    return rank_df
    #trade_with_ranks(rank_df)

momentum1 = MomentumFactor(look_back=62, ignore=5)
max_bar = MaxBarFactor(look_back=62, ignore=5)
factor_specs = [(momentum1, 5), (max_bar, 4)]
backtest(origin_df, rebalance_period=15, factor_specs=factor_specs)
#get_all_rank_combinations([5,5,5,5])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106


KeyboardInterrupt: 