In [1]:
import pandas as pd
import numpy as np
import random

import quantlib
import quantlib.indicators_cal as indicators_cal
import quantlib.diagnostics_utils as diagnostics_utils

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_instruments_from_df(df):
    instruments = []
    for col in df.columns:
        inst = col.split(" ")[0]
        if "USD" in inst and inst not in instruments:
            instruments.append(inst)
    return instruments

origin_df = pd.read_excel("../crypto_historical_4h.xlsx", engine="openpyxl", index_col='open_time')

In [3]:
"""
In this notebook, we want to generalize the framework to explore the factor effect in crypto trading

Steps to do this task:
1. Read the crypto data, whatever the format, we still might change the format later
2. Filter data at the very beginning could be not appropriate, because there are always listings and unlistings
3. Take the index: datetime as a important thing to track trades
4. We should have a function to calculate factor, for example:
    def get_momentum(origin_df):
        return {token:{
            "score": score,
            "rank": rank
        }}
5. Before making every trade, calculate the sum of scores
6. Choose symbols to trade accordingly

"""
instruments = get_instruments_from_df(origin_df)
instruments

['BTCUSDT',
 'ETHUSDT',
 'BCHUSDT',
 'XRPUSDT',
 'EOSUSDT',
 'LTCUSDT',
 'TRXUSDT',
 'ETCUSDT',
 'LINKUSDT',
 'XLMUSDT',
 'ADAUSDT',
 'XMRUSDT',
 'DASHUSDT',
 'ZECUSDT',
 'XTZUSDT',
 'ATOMUSDT',
 'ONTUSDT',
 'IOTAUSDT',
 'BATUSDT',
 'VETUSDT',
 'NEOUSDT',
 'QTUMUSDT',
 'IOSTUSDT',
 'THETAUSDT',
 'ALGOUSDT',
 'ZILUSDT',
 'KNCUSDT',
 'ZRXUSDT',
 'COMPUSDT',
 'OMGUSDT',
 'DOGEUSDT',
 'SXPUSDT',
 'KAVAUSDT',
 'BANDUSDT',
 'RLCUSDT',
 'WAVESUSDT',
 'MKRUSDT',
 'SNXUSDT',
 'DOTUSDT',
 'DEFIUSDT',
 'YFIUSDT',
 'BALUSDT',
 'CRVUSDT',
 'RUNEUSDT',
 'SUSHIUSDT',
 'SRMUSDT',
 'EGLDUSDT',
 'SOLUSDT',
 'ICXUSDT',
 'STORJUSDT',
 'BLZUSDT',
 'UNIUSDT',
 'AVAXUSDT',
 'FTMUSDT',
 'HNTUSDT',
 'ENJUSDT',
 'FLMUSDT',
 'TOMOUSDT',
 'RENUSDT',
 'KSMUSDT',
 'NEARUSDT',
 'AAVEUSDT',
 'FILUSDT',
 'RSRUSDT',
 'LRCUSDT',
 'MATICUSDT',
 'OCEANUSDT',
 'CVCUSDT',
 'BELUSDT',
 'CTKUSDT',
 'AXSUSDT',
 'ALPHAUSDT',
 'ZENUSDT',
 'SKLUSDT',
 'GRTUSDT',
 '1INCHUSDT',
 'CHZUSDT',
 'SANDUSDT',
 'ANKRUSDT',
 'BTSUSDT',
 'LI

In [14]:
look_back = 50
ignore = 5
symbol = "BTCUSDT"

class MomentumFactor:
    """
    @Params:
        df: the original df contains all symbol information
        index: the index that a trading event should happend
        look_back: look back period to calculate the momentum
        ingore: int, to prevent short term reversing, ignore the most recent klines
    """
    def __init__(self, look_back, ignore):
        self.look_back = look_back
        self.ignore = ignore
        
    def calculate(self, df, symbol):
        close_prices = df["{} close".format(symbol)]
        momentum_value = close_prices.shift(self.ignore) / close_prices.shift(self.look_back+self.ignore) - 1
        return momentum_value


In [15]:
"""
Every time we trigger trade function, we calculate all factors for all symbols
"""

def calculate_factor_scores(df, target_symbols, factors, trade_index):
    score_dict = {}
    for symbol in target_symbols:
        factor_scores = sum([factor.calculate(df, symbol)[trade_index] for factor in factors])
        score_dict[symbol] = factor_scores
    return score_dict

def trade(df, target_symbols, factors, trade_index):
    """
    @Params:
        factors: a list of factors that we want to use
        trade_index: the index that the trade is happening
    """
    score_dict = calculate_factor_scores(df, target_symbols, factors, trade_index)
    print (score_dict)

momentum = MomentumFactor(look_back, ignore)
factors = [momentum]
#trade(origin_df, instruments, factors, -1)
momentum.calculate(origin_df, "OPUSDT")

open_time
2022-07-01 12:00:00         NaN
2022-07-01 16:00:00         NaN
2022-07-01 20:00:00         NaN
2022-07-02 00:00:00         NaN
2022-07-02 04:00:00         NaN
                         ...   
2023-04-23 00:00:00   -0.071299
2023-04-23 04:00:00   -0.123951
2023-04-23 08:00:00   -0.124019
2023-04-23 12:00:00   -0.123948
2023-04-23 16:00:00   -0.081236
Name: OPUSDT close, Length: 1778, dtype: float64

In [13]:
from collections import defaultdict

def get_rankings(factor_dict, n_ranks=5):
    """
    According to factor scores in factor_dict, generate rankings for each factor
    Returns:
        dict(rank: [symbol1, symbol2])
    """
    sorted_symbols = sorted(factor_dict, key=factor_dict.get, reverse=True)

    # Calculate the rank for each symbol
    num_symbols = len(sorted_symbols)
    rank_size = num_symbols // n_ranks + (num_symbols % n_ranks > 0)
    ranks_dict = defaultdict(list)
    for i, symbol in enumerate(sorted_symbols):
        rank = i // rank_size
        ranks_dict[rank].append(symbol)

    return ranks_dict

def is_rebalancing(i, rebalance_period):
    return i!=1 and i%rebalance_period==1

def get_rank_porfolio(df, target_symbols, factor_specs, trade_index):

    rank_pofolio = []
    for i, (factor, n_ranks) in enumerate(factor_specs):
        # Filter available trading tokens
        max_look_back = factor.look_back
        if trade_index - max_look_back <= 0:
            return None
        active_symbols = get_target_symbols(df.iloc[trade_index - max_look_back])
        target_symbols = set(target_symbols).intersection(active_symbols)

        # Calculate scores for every symbol
        factor_dict = {}
        for symbol in target_symbols:
            factor_dict[symbol] = factor.calculate(df, symbol)[trade_index]
        rank_dict = get_rankings(factor_dict, n_ranks)
        rank_pofolio.append(rank_dict)

    return rank_pofolio
    

def insert_record(dt, rank_df, rank_portfolio, factor_specs):
    """
    rank_df: DataFrame with columns for each rank and each instrument, as well as columns for the forward returns of each instrument
    rank_portfolio: list of rank dictionaries, where each rank dictionary maps ranks to a list of symbols

    """
    # Create a new row for the rank details
    new_row = pd.DataFrame(index=[dt])

    # Combine ranks
    rank_nums = [x[1] for x in factor_specs]
    rank_combinations = get_all_rank_combinations(rank_nums)

    for agg_rank in rank_combinations:
        rank_to_string = [str(rank) for rank in agg_rank]
        col_name = f"rank_{'_'.join(rank_to_string)}"
        symbols = select_intersection_symbols(agg_rank, rank_portfolio)
        new_row[col_name] = ','.join(symbols)
    
    # Concatenate the new row to the rank_df DataFrame
    rank_df = pd.concat([rank_df, new_row])
    return rank_df

def select_intersection_symbols(agg_rank, rank_portfolio):
    """Find target intersection symbols for every aggregation rank"""
    symbol_candidates = []
    for i, rank in enumerate(agg_rank):
        symbols = rank_portfolio[i][rank]
        symbol_candidates.append(symbols)

    rank1 = set(symbol_candidates[0])
    target_symbols = rank1.intersection(*symbol_candidates[1:])
    return target_symbols

def get_all_rank_combinations(rank_nums):
    if not rank_nums:
        return []
    result = [[]]
    for n in rank_nums:
        new_result = []
        for i in range(n):
            for r in result:
                new_result.append(r + [i])
        result = new_result
    return result

def analyse_factor_rank():
    pass

def trade_with_ranks():
    pass

def get_target_symbols(row):
    target_symbols = []
    for inst in instruments:
        if row[f"{inst} active"] == True:
            target_symbols.append(inst)

    return target_symbols

def _add_fwd_ret(df, period):
    all_symbols = get_instruments_from_df(df)
    for symbol in all_symbols:
        df["{} {} fwd ret".format(symbol, period)] = df["{} close".format(symbol)].shift(-period) / df["{} close".format(symbol)] - 1

    return df

def _lookup_fwd_ret(df, rank_df, weights=None):
    pass
    

def backtest(df, rebalance_period, factor_specs):
    # rank_df, every row should contains the rank details for that time
    
    df = _add_fwd_ret(df, rebalance_period)
    rank_df = pd.DataFrame()

    for i, dt in enumerate(df.index):
        if is_rebalancing(i, rebalance_period):
            target_symbols = get_target_symbols(df.iloc[i])
            rank_porfolio = get_rank_porfolio(df, target_symbols, factor_specs, trade_index=i)
            rank_df = insert_record(dt, rank_df, rank_porfolio, factor_specs)
            #rank_df = _lookup_fwd_ret(df, rank_df, weights=None)
            

    return rank_df
    #trade_with_ranks(rank_df)

momentum1 = MomentumFactor(look_back, ignore)
momentum2 = MomentumFactor(look_back=20, ignore=2)
factor_specs = [(momentum1, 5), (momentum2, 2)]
backtest(origin_df, rebalance_period=20, factor_specs=factor_specs)
#get_all_rank_combinations([5,5,5,5])

Unnamed: 0,rank_0_0,rank_1_0,rank_2_0,rank_3_0,rank_4_0,rank_0_1,rank_1_1,rank_2_1,rank_3_1,rank_4_1
2022-07-05 00:00:00,"KNCUSDT,VETUSDT,THETAUSDT,EOSUSDT,ALGOUSDT,BTC...","DOTUSDT,EGLDUSDT,STORJUSDT,COMPUSDT,SXPUSDT,KA...","RENUSDT,HNTUSDT,NEARUSDT,KSMUSDT,OCEANUSDT,ENJ...",,,,,"ZENUSDT,UNFIUSDT,SKLUSDT,AXSUSDT,BELUSDT,1INCH...","LINAUSDT,MTLUSDT,MASKUSDT,HBARUSDT,REEFUSDT,MA...","FTTUSDT,PEOPLEUSDT,1000XECUSDT,ENSUSDT,JASMYUS..."
2022-07-08 08:00:00,"KNCUSDT,VETUSDT,ATOMUSDT,ZRXUSDT,NEOUSDT,EOSUS...","STORJUSDT,SUSHIUSDT,CRVUSDT,ENJUSDT,AVAXUSDT,D...","RENUSDT,NEARUSDT,AXSUSDT,1INCHUSDT,LITUSDT,CTK...","ATAUSDT,CELRUSDT,LINAUSDT,C98USDT,RAYUSDT,BAKE...","FTTUSDT,GMTUSDT,OPUSDT,ENSUSDT,IMXUSDT,APEUSDT...","ZECUSDT,QTUMUSDT,TRXUSDT,ZILUSDT,THETAUSDT,ADA...","DOTUSDT,EGLDUSDT,YFIUSDT,KAVAUSDT,HNTUSDT,ICXU...","KSMUSDT,OCEANUSDT,SKLUSDT,FILUSDT,RSRUSDT,ANKR...","MANAUSDT,COTIUSDT,MTLUSDT,MASKUSDT,NKNUSDT,CHR...","KLAYUSDT,ANCBUSD,CTSIUSDT,ARPAUSDT,PEOPLEUSDT,..."
2022-07-11 16:00:00,"ZENUSDT,RENUSDT,SUSHIUSDT,COMPUSDT,NEOUSDT,UNF...","EOSUSDT,JASMYUSDT,BTCUSDT,ANTUSDT,DEFIUSDT,CHZ...","TRXUSDT,QTUMUSDT,COTIUSDT,SXPUSDT,ARPAUSDT,100...","HNTUSDT,ICXUSDT,XEMUSDT,ADAUSDT,IOSTUSDT,XRPUS...","DOTUSDT,KAVAUSDT,MASKUSDT,BTCDOMUSDT,LINKUSDT,...","CELRUSDT,GMTUSDT,CRVUSDT,AVAXUSDT,IOTXUSDT,DYD...","YFIUSDT,LINAUSDT,ENJUSDT,SOLUSDT,ENSUSDT,SANDU...","KNCUSDT,MANAUSDT,NEARUSDT,EGLDUSDT,FTTUSDT,ZRX...","ATAUSDT,STORJUSDT,KSMUSDT,MTLUSDT,CTSIUSDT,THE...","BELUSDT,RSRUSDT,CTKUSDT,ANCBUSD,OCEANUSDT,RVNU..."
2022-07-15 00:00:00,"RENUSDT,COMPUSDT,HBARUSDT,ARUSDT,CRVUSDT,AVAXU...","NEARUSDT,EGLDUSDT,FTTUSDT,ALGOUSDT,BTCUSDT,CTK...","ZECUSDT,TRXUSDT,YFIUSDT,LINAUSDT,KSMUSDT,OCEAN...","MANAUSDT,ATOMUSDT,RSRUSDT,ENSUSDT,CELOUSDT,ALI...","KNCUSDT,BALUSDT,ENJUSDT,LRCUSDT,APEUSDT,WOOUSD...","ZENUSDT,UNFIUSDT,BTCDOMUSDT,BANDUSDT,BAKEUSDT,...","STORJUSDT,NEOUSDT,ARPAUSDT,EOSUSDT,IOTAUSDT,TO...","DUSKUSDT,DOTUSDT,QTUMUSDT,HNTUSDT,MTLUSDT,MASK...","THETAUSDT,PEOPLEUSDT,AXSUSDT,BELUSDT,1INCHUSDT...","ATAUSDT,ANCBUSD,KAVAUSDT,SXPUSDT,DOGEUSDT,GALA..."
2022-07-18 08:00:00,"BTSUSDT,COTIUSDT,SUSHIUSDT,KSMUSDT,CTSIUSDT,NK...","CELRUSDT,RENUSDT,FTTUSDT,OCEANUSDT,NEOUSDT,BAL...","ZENUSDT,QTUMUSDT,DOTUSDT,RVNUSDT,BANDUSDT,1000...","ATAUSDT,LINAUSDT,SXPUSDT,FILUSDT,AXSUSDT,IOTXU...","C98USDT,MASKUSDT,UNFIUSDT,FTMUSDT,ENSUSDT,SFPUSDT","EGLDUSDT,COMPUSDT,SNXUSDT,CRVUSDT,RAYUSDT,AAVE...","KNCUSDT,ZECUSDT,TRXUSDT,DEFIUSDT,LINKUSDT,ONEU...","RLCUSDT,NEARUSDT,ATOMUSDT,HNTUSDT,EOSUSDT,XTZU...","YFIUSDT,KAVAUSDT,MTLUSDT,ICXUSDT,ZILUSDT,ADAUS...","MANAUSDT,STORJUSDT,ZRXUSDT,GMTUSDT,THETAUSDT,E..."
...,...,...,...,...,...,...,...,...,...,...
2023-04-07 16:00:00,"KNCUSDT,STORJUSDT,LINAUSDT,RSRUSDT,INJUSDT,ANT...","VETUSDT,MTLUSDT,EOSUSDT,1000XECUSDT,1INCHUSDT,...","MANAUSDT,NEARUSDT,OCEANUSDT,UNFIUSDT,GMXUSDT,F...","KAVAUSDT,CVXUSDT,BALUSDT,TOMOUSDT,CELOUSDT,SAN...","TLMUSDT,RENUSDT,YFIUSDT,GMTUSDT,BANDUSDT,XRPUS...","RNDRUSDT,SXPUSDT,DOGEUSDT,ANKRUSDT,IOTXUSDT,CH...","THETAUSDT,ZILUSDT,DEFIUSDT,BELUSDT,PEOPLEUSDT,...","DOTUSDT,LITUSDT,BCHUSDT,QTUMUSDT,RVNUSDT,HFTUS...","ICPUSDT,COMPUSDT,ENSUSDT,BTCUSDT,LUNA2USDT,AST...","EGLDUSDT,MASKUSDT,IDUSDT,AGIXUSDT,KSMUSDT,NEOU..."
2023-04-11 00:00:00,"BELUSDT,PEOPLEUSDT,INJUSDT,JASMYUSDT,ANTUSDT,F...","KNCUSDT,ZECUSDT,NEARUSDT,OCEANUSDT,CVXUSDT,ONE...","RLCUSDT,RENUSDT,QTUMUSDT,NEOUSDT,BALUSDT,ADAUS...","ICPUSDT,ZENUSDT,JOEUSDT,RNDRUSDT,COMPUSDT,ASTR...","HOOKUSDT,CELRUSDT,COTIUSDT,STXUSDT,MTLUSDT,MAS...","KLAYUSDT,ATAUSDT,LINAUSDT,CRVUSDT,ICXUSDT,ENJU...","TLMUSDT,MANAUSDT,TRXUSDT,KAVAUSDT,SUSHIUSDT,SN...","BLUEBIRDUSDT,STORJUSDT,XEMUSDT,DEFIUSDT,AVAXUS...","DOTUSDT,THETAUSDT,ALGOUSDT,ENSUSDT,CTKUSDT,KSM...","OMGUSDT,MAGICUSDT,EGLDUSDT,HFTUSDT,ZRXUSDT,GMT..."
2023-04-14 08:00:00,"ICPUSDT,NEARUSDT,IDUSDT,INJUSDT,JASMYUSDT,BTCU...","VETUSDT,LITUSDT,KSMUSDT,CRVUSDT,AVAXUSDT,GALAU...","MANAUSDT,TLMUSDT,DOTUSDT,COTIUSDT,BALUSDT,BCHU...","ZENUSDT,BLUEBIRDUSDT,CHRUSDT,LINKUSDT,MATICUSD...","ICXUSDT,DOGEUSDT,RSRUSDT,RDNTUSDT,IOTXUSDT,GRT...","ATAUSDT,KAVAUSDT,NKNUSDT,FILUSDT,AUDIOUSDT,ACH...","MTLUSDT,NEOUSDT,ARPAUSDT,CFXUSDT,BTCDOMUSDT,BA...","KNCUSDT,QTUMUSDT,SUSHIUSDT,STXUSDT,ENJUSDT,STG...","RENUSDT,EGLDUSDT,COMPUSDT,ALGOUSDT,CTKUSDT,HBA...","STORJUSDT,LINAUSDT,THETAUSDT,MASKUSDT,SXPUSDT,..."
2023-04-17 16:00:00,"ICPUSDT,MAGICUSDT,IDUSDT,INJUSDT,JASMYUSDT,CHR...","TLMUSDT,RENUSDT,NEARUSDT,AGIXUSDT,BELUSDT,LITU...","ZENUSDT,BLUEBIRDUSDT,ATAUSDT,ZRXUSDT,C98USDT,N...","MANAUSDT,QTUMUSDT,ICXUSDT,CELOUSDT,DEFIUSDT,AX...","CHZUSDT,SXPUSDT,FOOTBALLUSDT","ASTRUSDT,CVXUSDT,IMXUSDT,SOLUSDT","JOEUSDT,GMXUSDT,LUNA2USDT,ENSUSDT,APTUSDT,BLZU...","KLAYUSDT,ZECUSDT,DOTUSDT,KAVAUSDT,COMPUSDT,GAL...","EGLDUSDT,THETAUSDT,MASKUSDT,1INCHUSDT,BTCUSDT,...","KNCUSDT,STORJUSDT,LINAUSDT,MTLUSDT,EOSUSDT,PEO..."
