In [1]:
import argparse
from datetime import timedelta
# from backtest import backtest
from data.data import get_all_price
import pandas as pd
from comb import find_best
from collections import deque, OrderedDict


coins = ['BTC','ETH','BNB','XRP','DOT','UNI','BCH','LTC','SOL','LINK','MATIC','XLM','ETC','THETA']
coins, data = get_all_price(coins, '1d', match=False)

get_all_prices create batch tasks: [['BTC', 'ETH', 'BNB', 'XRP'], ['DOT', 'UNI', 'BCH', 'LTC'], ['SOL', 'LINK', 'MATIC'], ['XLM', 'ETC', 'THETA']]
invalid_coins: {'UNI', 'SOL', 'DOT'}, data size not valid


In [2]:

def split_data(data, coins, freq='30D'):
    df = data[coins[0]]
    dates = pd.date_range(df.iloc[0].opentime, df.iloc[-1].opentime, freq=freq).to_pydatetime()
    splits = []
    for x, y in zip(dates, dates[1:]):
        split = {}
        for coin in coins:
            df = data[coin]
            split[coin] = df[(df['opentime'] >=x) & (df['opentime']<y)]
        splits.append(split)
    return splits

def split_data_rotate(data, coins, freq='30D', rotate_days=180):
    # rotate_days 整个回看周期是多长时间
    # freq 每次迭代window move 的时间
    df = data[coins[0]]
    dates = pd.date_range(df.iloc[0].opentime, df.iloc[-1].opentime, freq=freq).to_pydatetime()
    splits = []
    for x, y in zip(dates, dates[1:]):
        split = {}
        for coin in coins:
            df = data[coin]
            split[coin] = df[(df['opentime'] >=x) & (df['opentime']< x+timedelta(days=rotate_days))]
        splits.append(split)
    return splits


def stage1(data, coins, freq_days=7, rotate_days=30):
    print("split data ...")
    batches = split_data_rotate(data, coins, freq=f'{freq_days}D',rotate_days=rotate_days)
    best_basket = OrderedDict()
    print("============ stage1: find best basket for each batch =====")
    for i, item in enumerate(batches):
        start = item[coins[0]].iloc[0].opentime
        end = item[coins[0]].iloc[-1].opentime
        result = find_best(item, coins)
        key = list(result.keys())[0]
        best_basket[end] = key
        print(f"batch {i}: {start}, {end}, best:{key}")
    return best_basket

# best_basket = stage1(data, coins, freq_days=7, rotate_days=7)

split data ...
batch 0: 2020-03-29 00:00:00, 2020-04-04 00:00:00, best:BNB;XLM
batch 1: 2020-04-05 00:00:00, 2020-04-11 00:00:00, best:LINK;THETA
batch 2: 2020-04-12 00:00:00, 2020-04-18 00:00:00, best:MATIC;XLM
batch 3: 2020-04-19 00:00:00, 2020-04-25 00:00:00, best:THETA;ETC
batch 4: 2020-04-26 00:00:00, 2020-05-02 00:00:00, best:LTC;LINK
batch 5: 2020-05-03 00:00:00, 2020-05-09 00:00:00, best:BCH;LTC
batch 6: 2020-05-10 00:00:00, 2020-05-16 00:00:00, best:MATIC;LINK
batch 7: 2020-05-17 00:00:00, 2020-05-23 00:00:00, best:THETA;XLM
batch 8: 2020-05-24 00:00:00, 2020-05-30 00:00:00, best:BCH;XRP
batch 9: 2020-05-31 00:00:00, 2020-06-06 00:00:00, best:BCH;ETC
batch 10: 2020-06-07 00:00:00, 2020-06-13 00:00:00, best:ETH;XRP
batch 11: 2020-06-14 00:00:00, 2020-06-20 00:00:00, best:LINK;BCH
batch 12: 2020-06-21 00:00:00, 2020-06-27 00:00:00, best:LINK;MATIC
batch 13: 2020-06-28 00:00:00, 2020-07-04 00:00:00, best:XLM;BTC
batch 14: 2020-07-05 00:00:00, 2020-07-11 00:00:00, best:BNB;BTC
bat

In [11]:
import pandas as pd
from util import key, MDD, CAGR, percentf, days

def calc_pos(data, coin, init_value, is_long=True, col='close'):
    df = data[coin]
    if is_long:
        df['norm_return'] = df[col]/df.iloc[0][col]
    else:
        df['norm_return'] = 2 - df[col]/df.iloc[0][col]
    df['position'] = df['norm_return'] * init_value
    return df['position']

def backtest(data, long_coins, short_coins, allocate=0.5, 
        alloc_long=None, alloc_short=None, init_value=1, timeframe='1d', col='close'):
    all_pos = {}
    index = data[long_coins[0]].opentime # time
    not_used_money = (1 - sum(alloc_long) - sum(alloc_short)) * init_value
    for coin, alloc in zip(long_coins, alloc_long):
        all_pos[coin] = calc_pos(data, coin, init_value * alloc, True, col)
    for coin, alloc in zip(short_coins, alloc_short):
        all_pos[coin] = calc_pos(data, coin, init_value * alloc, False, col)
    value = pd.DataFrame(all_pos)
    value.index = index
    value['total'] = value.sum(axis=1) + not_used_money
    value['daily_return'] = value['total'].pct_change(1)
    sharp_days_multiple = {'1d':1,'4h':6, '1h':24}.get(timeframe, 1)
    sharp = ((365*sharp_days_multiple)**0.5)*value['daily_return'].mean() / value['daily_return'].std()
    mdd = MDD(value.total)
    periods = days(data[long_coins[0]])/365 
    first = value.iloc[0]['total']
    last = value.iloc[-1]['total']
    cagr = CAGR(first, last, periods)
    calmar = abs(cagr/mdd)
    values = {}
    for coin in long_coins:
        values[coin] = value.iloc[-1][coin]
    for coin in short_coins:
        values[coin] = value.iloc[-1][coin]
    return values

def get_basket(baskets, start):
    # 基于当前运行时间段去找basket， basket的结束时间需要和stage2 的start时间在一定范围内
    for end, item in baskets.items():
        if start >= end and start <= end + timedelta(days=3):
            return item
    return None

def stage2(data, coins, best_basket, freq_days=30):
    print("split data ...")
    batches = split_data(data, coins, freq=f'{freq_days}D')
    print("============ stage2: backtest with best baskets ===========")
    value = 1
    allocs1 = [0.5]
    allocs2 = [0.5]
    for i, item in enumerate(batches):
        start = item[coins[0]].iloc[0].opentime
        end = item[coins[0]].iloc[-1].opentime
        long_queue = ['BTC']
        short_queue = ['BCH']
#         basket = get_basket(best_basket, start)
        basket = None
        print(f"batch {i}:{start},{end}, basket:({basket})")
#         if basket:        
#             mcoins = basket.split(";")
#             long_coin = mcoins[0]
#             short_coin = mcoins[1]
#             if long_coin not in set(long_queue):
#                 long_queue.append(long_coin)
#             if short_coin not in set(short_queue):
#                 short_queue.append(short_coin)
        
        pnls = backtest(item, long_queue, short_queue, alloc_long=allocs1, alloc_short=allocs2, init_value=value)
        total = sum(pnls.values())
        allocs1 = [pnls[c]/total for c in long_queue]
        allocs2 = [pnls[c]/total for c in short_queue]
        value = total
        print(f"{long_queue};{short_queue}, value:{value}")
    return 

stage2(data, coins, best_basket, freq_days=7)

split data ...
batch 0:2020-03-29 00:00:00,2020-04-04 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.0039252191000294
batch 1:2020-04-05 00:00:00,2020-04-11 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.0093269014313153
batch 2:2020-04-12 00:00:00,2020-04-18 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.0199952637294458
batch 3:2020-04-19 00:00:00,2020-04-25 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.0447513745505377
batch 4:2020-04-26 00:00:00,2020-05-02 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.1313938279300126
batch 5:2020-05-03 00:00:00,2020-05-09 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.1673972499851162
batch 6:2020-05-10 00:00:00,2020-05-16 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.223763990071506
batch 7:2020-05-17 00:00:00,2020-05-23 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.187277961716215
batch 8:2020-05-24 00:00:00,2020-05-30 00:00:00, basket:(None)
['BTC'];['BCH'], value:1.2355016707201165
batch 9:2020-05-31 00:00:00,2020-06-06 00:

  calmar = abs(cagr/mdd)



batch 49:2021-03-07 00:00:00,2021-03-13 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.7867352625627975
batch 50:2021-03-14 00:00:00,2021-03-20 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.7045773399984405
batch 51:2021-03-21 00:00:00,2021-03-27 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.556670484480952
batch 52:2021-03-28 00:00:00,2021-04-03 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.67732703652644
batch 53:2021-04-04 00:00:00,2021-04-10 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.815434637842394
batch 54:2021-04-11 00:00:00,2021-04-17 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.793088091212709
batch 55:2021-04-18 00:00:00,2021-04-24 00:00:00, basket:(None)
['BTC'];['BCH'], value:5.1727545812175215
batch 56:2021-04-25 00:00:00,2021-05-01 00:00:00, basket:(None)
['BTC'];['BCH'], value:6.075823654719599
batch 57:2021-05-02 00:00:00,2021-05-08 00:00:00, basket:(None)
['BTC'];['BCH'], value:6.309689761951782
batch 58:2021-05-09 00:00:00,2021-05-15 00:00:00, ba