In [154]:
import pandas as pd
import math
import numpy as np
from tqdm import tqdm
from collections import defaultdict

def triple_barrier(price, ub, lb, max_period):

    def end_price(s):
        return np.append(s[(s / s[0] > ub) | (s / s[0] < lb)], s[-1])[0]/s[0]
    
    r = np.array(range(max_period))
    
    def end_time(s):
        return np.append(r[(s / s[0] > ub) | (s / s[0] < lb)], max_period-1)[0]

    p = price.rolling(max_period).apply(end_price, raw=True).shift(-max_period+1)
    t = price.rolling(max_period).apply(end_time, raw=True).shift(-max_period+1)
    t = pd.Series([t.index[int(k+i)] if not math.isnan(k+i) else np.datetime64('NaT') 
                   for i, k in enumerate(t)], index=t.index).dropna()

    signal = pd.Series(1, p.index)
    signal.loc[p > ub] = 2
    signal.loc[p < lb] = 0
    signal[-(max_period-1):] = np.nan
    ret = pd.DataFrame({'triple_barrier_profit':p, 'triple_barrier_sell_time':t, 'triple_barrier_signal':signal})

    return ret
    

In [155]:
close_df = pd.read_csv('./data/indicator_data/close.csv')

with open('codes.txt', 'r') as f:
    codes = f.read().split()

code_para_map = defaultdict(lambda: defaultdict(list))

for code in tqdm(codes): 
    close_serie = close_df[code].rename('close')

    # label_serie_180 = triple_barrier(close_serie, 1.215, 0.785, 181)['triple_barrier_signal'].rename('180d')
    # label_serie_90 = triple_barrier(close_serie, 1.145, 0.855, 91)['triple_barrier_signal'].rename('90d')
    # label_serie_30 = triple_barrier(close_serie, 1.075, 0.925, 31)['triple_barrier_signal'].rename('30d')
    # label_serie_15 = triple_barrier(close_serie, 1.048, 0.952, 16)['triple_barrier_signal'].rename('15d')
    for d in [15, 30, 90, 180]:
        up_threshold = 1.01
        down_threshold = 0.99
        label_name = str(d)+'d'
        label_serie = triple_barrier(close_serie, up_threshold, down_threshold, d + 1)['triple_barrier_signal'].rename(label_name)

        df = pd.concat([close_serie, label_serie], axis=1)

        df = df.dropna()

        while len(df[label_name].value_counts()) < 3 or df[label_name].value_counts()[2] > df[label_name].value_counts()[1]:
            up_threshold += 0.001
            down_threshold -= 0.001
            label_serie = triple_barrier(close_serie, up_threshold, down_threshold, d + 1)['triple_barrier_signal'].rename(label_name)

            df = pd.concat([close_serie, label_serie], axis=1)

            df = df.dropna()

        code_para_map[code][label_name] += [up_threshold, down_threshold]
        

100%|██████████| 200/200 [2:03:52<00:00, 37.16s/it]  


In [10]:
# import json
# with open('trend_params.json') as jsonfile:
#     code_para_map = json.load(jsonfile)


for code in code_para_map:
    for d in code_para_map[code]:
        code_para_map[code][d][0] = round(code_para_map[code][d][0], 3)
        code_para_map[code][d][1] = round(code_para_map[code][d][1], 3)


In [11]:
code_para_map

{'2330': {'15d': [1.045, 0.955],
  '30d': [1.069, 0.931],
  '90d': [1.138, 0.862],
  '180d': [1.217, 0.783]},
 '2317': {'15d': [1.047, 0.953],
  '30d': [1.072, 0.928],
  '90d': [1.138, 0.862],
  '180d': [1.192, 0.808]},
 '2454': {'15d': [1.065, 0.935],
  '30d': [1.098, 0.902],
  '90d': [1.2, 0.8],
  '180d': [1.278, 0.722]},
 '2412': {'15d': [1.018, 0.982],
  '30d': [1.028, 0.972],
  '90d': [1.051, 0.949],
  '180d': [1.06, 0.94]},
 '2881': {'15d': [1.042, 0.958],
  '30d': [1.064, 0.936],
  '90d': [1.115, 0.885],
  '180d': [1.193, 0.807]},
 '6505': {'15d': [1.043, 0.957],
  '30d': [1.063, 0.937],
  '90d': [1.112, 0.888],
  '180d': [1.175, 0.825]},
 '2882': {'15d': [1.041, 0.959],
  '30d': [1.064, 0.936],
  '90d': [1.126, 0.874],
  '180d': [1.179, 0.821]},
 '1303': {'15d': [1.041, 0.959],
  '30d': [1.064, 0.936],
  '90d': [1.133, 0.867],
  '180d': [1.179, 0.821]},
 '2603': {'15d': [1.061, 0.939],
  '30d': [1.098, 0.902],
  '90d': [1.181, 0.819],
  '180d': [1.285, 0.715]},
 '2308': {'15d':

In [12]:
import json
    
with open("trend_params.json", "w") as outfile:
    json.dump(code_para_map, outfile)