In [1]:
import argparse
from datetime import timedelta
from backtest import backtest
from data import get_all_price
import pandas as pd
from util import get_valid_coins
from comb import find_best
from collections import deque, OrderedDict


coins = ['BTC','ETH','BNB','XRP','DOT','UNI','BCH','LTC','SOL','LINK','MATIC','XLM','ETC','THETA']
data = get_all_price(coins, '1d')
valid_coins = get_valid_coins(data, coins)
print(f"valid coins:{valid_coins}")


get_all_prices create batch tasks: [['BTC', 'ETH', 'BNB', 'XRP'], ['DOT', 'UNI', 'BCH', 'LTC'], ['SOL', 'LINK', 'MATIC'], ['XLM', 'ETC', 'THETA']]
invalid_coins: {'UNI', 'SOL', 'DOT'}, data size not valid
valid coins:['BTC', 'ETH', 'BNB', 'XRP', 'BCH', 'LTC', 'LINK', 'MATIC', 'XLM', 'ETC', 'THETA']


In [2]:

def split_data(data, coins, freq='30D'):
    df = data[coins[0]]
    dates = pd.date_range(df.iloc[0].opentime, df.iloc[-1].opentime, freq=freq).to_pydatetime()
    splits = []
    for x, y in zip(dates, dates[1:]):
        split = {}
        for coin in coins:
            df = data[coin]
            split[coin] = df[(df['opentime'] >=x) & (df['opentime']<y)]
        splits.append(split)
    return splits

def split_data_rotate(data, coins, freq='30D', rotate_days=180):
    df = data[coins[0]]
    dates = pd.date_range(df.iloc[0].opentime, df.iloc[-1].opentime, freq=freq).to_pydatetime()
    splits = []
    for x, y in zip(dates, dates[1:]):
        split = {}
        for coin in coins:
            df = data[coin]
            split[coin] = df[(df['opentime'] >=x) & (df['opentime']< x+timedelta(days=rotate_days))]
        splits.append(split)
    return splits


def stage1(data, coins, rotate_days=180):
    print("split data ...")
    batches = split_data_rotate(data, coins, rotate_days=rotate_days)
    best_basket = OrderedDict()
    print("============ stage1: find best basket for each batch =====")
    for i, item in enumerate(batches):
        start = item[coins[0]].iloc[0].opentime
        end = item[coins[0]].iloc[-1].opentime
        result = find_best(item, coins)
        key = list(result.keys())[0]
        best_basket[end] = key
        print(f"batch {i}: {start}, {end}, best:{key}")
    return best_basket

best_basket = stage1(data, valid_coins)

split data ...
batch 0: 2020-03-23 00:00:00, 2020-09-18 00:00:00, best:ETH;BCH
batch 1: 2020-04-22 00:00:00, 2020-10-18 00:00:00, best:THETA;BCH
batch 2: 2020-05-22 00:00:00, 2020-11-17 00:00:00, best:BTC;ETC
batch 3: 2020-06-21 00:00:00, 2020-12-17 00:00:00, best:BTC;ETC
batch 4: 2020-07-21 00:00:00, 2021-01-16 00:00:00, best:BTC;ETC
batch 5: 2020-08-20 00:00:00, 2021-02-15 00:00:00, best:THETA;LINK
batch 6: 2020-09-19 00:00:00, 2021-03-17 00:00:00, best:THETA;ETC
batch 7: 2020-10-19 00:00:00, 2021-04-16 00:00:00, best:THETA;BCH
batch 8: 2020-11-18 00:00:00, 2021-05-16 00:00:00, best:MATIC;LINK
batch 9: 2020-12-18 00:00:00, 2021-06-15 00:00:00, best:MATIC;XRP
batch 10: 2021-01-17 00:00:00, 2021-07-15 00:00:00, best:MATIC;LINK
batch 11: 2021-02-16 00:00:00, 2021-08-04 00:00:00, best:MATIC;XLM
batch 12: 2021-03-18 00:00:00, 2021-08-04 00:00:00, best:ETC;BTC
batch 13: 2021-04-17 00:00:00, 2021-08-04 00:00:00, best:ETH;LTC
batch 14: 2021-05-17 00:00:00, 2021-08-04 00:00:00, best:ETH;XLM
b

In [3]:
def get_basket(baskets, start):
    # 基于当前运行时间段去找basket， basket的结束时间需要和stage2 的start时间在一定范围内
    for end, item in baskets.items():
        if start >= end and start <= end + timedelta(days=10):
            return item
    return 'BTC;BCH' 

def stage2(data, coins, best_basket, size=5):
    print("split data ...")
    batches = split_data(data, coins)
    print("============ stage2: backtest with best baskets ===========")
    value = 1
    long_queue = deque(maxlen=size)
    short_queue = deque(maxlen=size)
    for i, item in enumerate(batches):
        start = item[coins[0]].iloc[0].opentime
        end = item[coins[0]].iloc[-1].opentime
        basket = get_basket(best_basket, start)
        print(f"batch {i}:{start},{end}, basket:({basket})")
        mcoins = basket.split(";")
        long_coin = mcoins[0]
        short_coin = mcoins[1]
        if long_coin not in set(long_queue):
            long_queue.appendleft(long_coin)
        if short_coin not in set(short_queue):
            short_queue.appendleft(short_coin)
        k, sharp, calmar, mdd, cagr, last = backtest(item, list(long_queue), list(short_queue), init_value=value)
        value = last
        print(f"{list(long_queue)};{list(short_queue)}, value:{value}")
    return 

stage2(data, valid_coins, best_basket)

split data ...
batch 0:2020-03-23 00:00:00,2020-04-21 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.0330772920845144
batch 1:2020-04-22 00:00:00,2020-05-21 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.1844389742898431
batch 2:2020-05-22 00:00:00,2020-06-20 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.2013609776787122
batch 3:2020-06-21 00:00:00,2020-07-20 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.2123983326343677
batch 4:2020-07-21 00:00:00,2020-08-19 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.2008560302421047
batch 5:2020-08-20 00:00:00,2020-09-18 00:00:00, basket:(BTC;BCH)
['BTC'];['BCH'], value:1.2779016641009235
batch 6:2020-09-19 00:00:00,2020-10-18 00:00:00, basket:(ETH;BCH)
['ETH', 'BTC'];['BCH'], value:1.2398770455590657
batch 7:2020-10-19 00:00:00,2020-11-17 00:00:00, basket:(THETA;BCH)
['THETA', 'ETH', 'BTC'];['BCH'], value:1.3936113984648835
batch 8:2020-11-18 00:00:00,2020-12-17 00:00:00, basket:(BTC;ETC)
['THETA', 'ETH', 'BTC'];['ETC'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)
