In [1]:
import pandas as pd
import numpy as np
import random

import quantlib
import quantlib.indicators_cal as indicators_cal
import quantlib.diagnostics_utils as diagnostics_utils

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_instruments_from_df(df):
    instruments = []
    for col in df.columns:
        inst = col.split(" ")[0]
        if "USD" in inst and inst not in instruments:
            instruments.append(inst)
    return instruments

origin_df = pd.read_excel("../crypto_historical_4h.xlsx", engine="openpyxl", index_col='open_time')

In [3]:
"""
In this notebook, we want to generalize the framework to explore the factor effect in crypto trading

Steps to do this task:
1. Read the crypto data, whatever the format, we still might change the format later
2. Filter data at the very beginning could be not appropriate, because there are always listings and unlistings
3. Take the index: datetime as a important thing to track trades
4. We should have a function to calculate factor, for example:
    def get_momentum(origin_df):
        return {token:{
            "score": score,
            "rank": rank
        }}
5. Before making every trade, calculate the sum of scores
6. Choose symbols to trade accordingly

"""
instruments = get_instruments_from_df(origin_df)
instruments

['BTCUSDT',
 'ETHUSDT',
 'BCHUSDT',
 'XRPUSDT',
 'EOSUSDT',
 'LTCUSDT',
 'TRXUSDT',
 'ETCUSDT',
 'LINKUSDT',
 'XLMUSDT',
 'ADAUSDT',
 'XMRUSDT',
 'DASHUSDT',
 'ZECUSDT',
 'XTZUSDT',
 'ATOMUSDT',
 'ONTUSDT',
 'IOTAUSDT',
 'BATUSDT',
 'VETUSDT',
 'NEOUSDT',
 'QTUMUSDT',
 'IOSTUSDT',
 'THETAUSDT',
 'ALGOUSDT',
 'ZILUSDT',
 'KNCUSDT',
 'ZRXUSDT',
 'COMPUSDT',
 'OMGUSDT',
 'DOGEUSDT',
 'SXPUSDT',
 'KAVAUSDT',
 'BANDUSDT',
 'RLCUSDT',
 'WAVESUSDT',
 'MKRUSDT',
 'SNXUSDT',
 'DOTUSDT',
 'DEFIUSDT',
 'YFIUSDT',
 'BALUSDT',
 'CRVUSDT',
 'RUNEUSDT',
 'SUSHIUSDT',
 'SRMUSDT',
 'EGLDUSDT',
 'SOLUSDT',
 'ICXUSDT',
 'STORJUSDT',
 'BLZUSDT',
 'UNIUSDT',
 'AVAXUSDT',
 'FTMUSDT',
 'HNTUSDT',
 'ENJUSDT',
 'FLMUSDT',
 'TOMOUSDT',
 'RENUSDT',
 'KSMUSDT',
 'NEARUSDT',
 'AAVEUSDT',
 'FILUSDT',
 'RSRUSDT',
 'LRCUSDT',
 'MATICUSDT',
 'OCEANUSDT',
 'CVCUSDT',
 'BELUSDT',
 'CTKUSDT',
 'AXSUSDT',
 'ALPHAUSDT',
 'ZENUSDT',
 'SKLUSDT',
 'GRTUSDT',
 '1INCHUSDT',
 'CHZUSDT',
 'SANDUSDT',
 'ANKRUSDT',
 'BTSUSDT',
 'LI

In [4]:
look_back = 50
ignore = 5
symbol = "BTCUSDT"

class MomentumFactor:
    """
    @Params:
        df: the original df contains all symbol information
        index: the index that a trading event should happend
        look_back: look back period to calculate the momentum
        ingore: int, to prevent short term reversing, ignore the most recent klines
    """
    def __init__(self, look_back, ignore):
        self.look_back = look_back
        self.ignore = ignore
        
    def calculate(self, df, symbol):
        close_prices = df["{} close".format(symbol)]
        return close_prices.shift(self.ignore) / close_prices.shift(self.look_back+self.ignore) - 1


In [5]:
"""
Every time we trigger trade function, we calculate all factors for all symbols
"""

def calculate_factor_scores(df, target_symbols, factors, trade_index):
    score_dict = {}
    for symbol in target_symbols:
        factor_scores = sum([factor.calculate(df, symbol)[trade_index] for factor in factors])
        score_dict[symbol] = factor_scores
    return score_dict

def trade(df, target_symbols, factors, trade_index):
    """
    @Params:
        factors: a list of factors that we want to use
        trade_index: the index that the trade is happening
    """
    score_dict = calculate_factor_scores(df, target_symbols, factors, trade_index)
    print (score_dict)

momentum = MomentumFactor(look_back, ignore)
factors = [momentum]
trade(origin_df, instruments, factors, -1)

{'BTCUSDT': -0.0800451256670427, 'ETHUSDT': -0.0905873786407767, 'BCHUSDT': -0.05651467058914594, 'XRPUSDT': -0.08372183372183373, 'EOSUSDT': -0.12049180327868858, 'LTCUSDT': -0.07460621541081303, 'TRXUSDT': 0.006854531607006997, 'ETCUSDT': -0.07859066222445354, 'LINKUSDT': -0.05033779308517694, 'XLMUSDT': -0.09704880817253114, 'ADAUSDT': -0.07422149379536414, 'XMRUSDT': -0.016747906511686117, 'DASHUSDT': -0.11883447428471117, 'ZECUSDT': -0.07662178702570388, 'XTZUSDT': -0.08695652173913049, 'ATOMUSDT': -0.09348791317217564, 'ONTUSDT': -0.028327512611563743, 'IOTAUSDT': -0.08235294117647052, 'BATUSDT': -0.08979885057471249, 'VETUSDT': -0.10351639668115376, 'NEOUSDT': -0.08828699264940876, 'QTUMUSDT': -0.0887864823348693, 'IOSTUSDT': -0.07055511568575712, 'THETAUSDT': -0.04257079808753217, 'ALGOUSDT': -0.14240506329113922, 'ZILUSDT': 0.17125587642713236, 'KNCUSDT': -0.08245954692556634, 'ZRXUSDT': -0.0794036061026352, 'COMPUSDT': -0.083276138183482, 'OMGUSDT': -0.14074595355383535, 'DOG

In [8]:
from collections import defaultdict

def get_rankings(factor_dict, n_ranks=5):
    """
    According to factor scores in factor_dict, generate rankings for each factor
    Returns:
        dict(rank: [symbol1, symbol2])
    """
    sorted_symbols = sorted(factor_dict, key=factor_dict.get, reverse=True)

    # Calculate the rank for each symbol
    num_symbols = len(sorted_symbols)
    rank_size = num_symbols // n_ranks + (num_symbols % n_ranks > 0)
    ranks_dict = defaultdict(list)
    for i, symbol in enumerate(sorted_symbols):
        rank = i // rank_size
        ranks_dict[rank].append(symbol)

    return ranks_dict

def is_rebalancing(i, rebalance_period):
    return i!=1 and i%rebalance_period==1

def get_rank_porfolio(df, target_symbols, factor_specs, trade_index):

    rank_pofolio = []
    for i, (factor, n_ranks) in enumerate(factor_specs):
        # Calculate scores for every symbol
        factor_dict = {}
        for symbol in target_symbols:
            factor_dict[symbol] = factor.calculate(df, symbol)[trade_index]
        rank_dict = get_rankings(factor_dict, n_ranks)
        rank_pofolio.append(rank_dict)

    return rank_pofolio
    

def insert_record(dt, rank_df, rank_portfolio, factor_specs):
    """
    rank_df: DataFrame with columns for each rank and each instrument, as well as columns for the forward returns of each instrument
    rank_portfolio: list of rank dictionaries, where each rank dictionary maps ranks to a list of symbols

    2 factors maximum
    """
    # Create a new row for the rank details
    new_row = pd.DataFrame(index=[dt])
    
    # # Iterate over each factor and its corresponding rank portfolio
    # for i, rank_dict in enumerate(rank_portfolio):
    #     # Iterate over each rank and its corresponding symbols
    #     for rank, symbols in rank_dict.items():
    #         # Set the symbols for the corresponding rank and factor columns
    #         col_name = f'rank_{i+1}_{rank}'
    #         new_row[col_name] = ','.join(symbols)

    # Combine ranks
    rank_nums = [x[1] for x in factor_specs]
    rank_combinations = get_all_rank_combinations(rank_nums)

    for agg_rank in rank_combinations:
        col_name = f"rank_{'_'.join(agg_rank)}"
        symbols = select_symbols(agg_rank, rank_portfolio)
        new_row[col_name] = ','.join(symbols)
    
    # Concatenate the new row to the rank_df DataFrame
    rank_df = pd.concat([rank_df, new_row])
    return rank_df

def select_symbols():
    pass

def get_all_rank_combinations(rank_nums):
    if not rank_nums:
        return []
    result = [[]]
    for n in rank_nums:
        new_result = []
        for i in range(n):
            for r in result:
                new_result.append(r + [i])
        result = new_result
    return result

def analyse_factor_rank():
    pass

def trade_with_ranks():
    pass

def get_target_symbols(row):
    target_symbols = []
    for inst in instruments:
        if row[f"{inst} active"] == True:
            target_symbols.append(inst)

    return target_symbols

def _add_fwd_ret(df, period):
    all_symbols = get_instruments_from_df(df)
    for symbol in all_symbols:
        df["{} {} fwd ret".format(symbol, period)] = df["{} close".format(symbol)].shift(-period) / df["{} close".format(symbol)] - 1

    return df

def backtest(df, rebalance_period, factor_specs):
    # rank_df, every row should contains the rank details for that time
    
    df = _add_fwd_ret(df, rebalance_period)
    rank_df = pd.DataFrame()

    for i, dt in enumerate(df.index):
        if is_rebalancing(i, rebalance_period):
            target_symbols = get_target_symbols(df.iloc[i])
            rank_porfolio = get_rank_porfolio(df, target_symbols, factor_specs, trade_index=i)
            rank_df = insert_record(dt, rank_df, rank_porfolio, factor_specs)

    return rank_df
    #trade_with_ranks(rank_df)

momentum1 = MomentumFactor(look_back, ignore)
momentum2 = MomentumFactor(look_back=20, ignore=2)
factor_specs = [(momentum1, 5), (momentum2, 2)]
#backtest(origin_df, rebalance_period=20, factor_specs=factor_specs)
get_all_rank_combinations([5,5,5,5])

[[0, 0, 0, 0],
 [0, 0, 0, 1],
 [0, 0, 0, 2],
 [0, 0, 0, 3],
 [0, 0, 0, 4],
 [0, 0, 1, 0],
 [0, 0, 1, 1],
 [0, 0, 1, 2],
 [0, 0, 1, 3],
 [0, 0, 1, 4],
 [0, 0, 2, 0],
 [0, 0, 2, 1],
 [0, 0, 2, 2],
 [0, 0, 2, 3],
 [0, 0, 2, 4],
 [0, 0, 3, 0],
 [0, 0, 3, 1],
 [0, 0, 3, 2],
 [0, 0, 3, 3],
 [0, 0, 3, 4],
 [0, 0, 4, 0],
 [0, 0, 4, 1],
 [0, 0, 4, 2],
 [0, 0, 4, 3],
 [0, 0, 4, 4],
 [0, 1, 0, 0],
 [0, 1, 0, 1],
 [0, 1, 0, 2],
 [0, 1, 0, 3],
 [0, 1, 0, 4],
 [0, 1, 1, 0],
 [0, 1, 1, 1],
 [0, 1, 1, 2],
 [0, 1, 1, 3],
 [0, 1, 1, 4],
 [0, 1, 2, 0],
 [0, 1, 2, 1],
 [0, 1, 2, 2],
 [0, 1, 2, 3],
 [0, 1, 2, 4],
 [0, 1, 3, 0],
 [0, 1, 3, 1],
 [0, 1, 3, 2],
 [0, 1, 3, 3],
 [0, 1, 3, 4],
 [0, 1, 4, 0],
 [0, 1, 4, 1],
 [0, 1, 4, 2],
 [0, 1, 4, 3],
 [0, 1, 4, 4],
 [0, 2, 0, 0],
 [0, 2, 0, 1],
 [0, 2, 0, 2],
 [0, 2, 0, 3],
 [0, 2, 0, 4],
 [0, 2, 1, 0],
 [0, 2, 1, 1],
 [0, 2, 1, 2],
 [0, 2, 1, 3],
 [0, 2, 1, 4],
 [0, 2, 2, 0],
 [0, 2, 2, 1],
 [0, 2, 2, 2],
 [0, 2, 2, 3],
 [0, 2, 2, 4],
 [0, 2, 3, 0],
 [0, 2, 3,