In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from talib.abstract import *
from sklearn.preprocessing import MinMaxScaler
import joblib
from finlab.data import Data

In [63]:
database = Data()
close = database.get("收盤價")
open_ = database.get("開盤價")
high = database.get("最高價")
low = database.get("最低價")
vol = database.get("成交股數")
accu = database.get("成交金額")
rev = database.get("當月營收")
com_rev = database.get("上月比較增減(%)")
d_yield = database.get("殖利率(%)")
pb = database.get("股價淨值比")

In [64]:
date = '2022-04-28'
start_date = '2020-01-01'
close = close[close.index > start_date]
open_ = open_[open_.index > start_date]
high = high[high.index > start_date]
low = low[low.index > start_date]
vol = vol[vol.index > start_date]
accu = accu[accu.index > start_date]
rev = rev[rev.index > start_date]
com_rev = com_rev[com_rev.index > start_date]
d_yield = d_yield[d_yield.index > start_date]
pb = pb[pb.index > start_date]

In [65]:
def relative_strength_index(df, n):
        """Calculate Relative Strength Index(RSI) for given data.
        https://github.com/Crypto-toolbox/pandas-technical-indicators/blob/master/technical_indicators.py
        
        :param df: pandas.DataFrame
        :param n: 
        :return: pandas.DataFrame
        """
        i = 0
        UpI = [0]
        DoI = [0]
        while i + 1 <= df.index[-1]:
            UpMove = df.loc[i + 1, 'high'] - df.loc[i, 'high']
            DoMove = df.loc[i, 'low'] - df.loc[i + 1, 'low']
            if UpMove > DoMove and UpMove > 0:
                UpD = UpMove
            else:
                UpD = 0
            UpI.append(UpD)
            if DoMove > UpMove and DoMove > 0:
                DoD = DoMove
            else:
                DoD = 0
            DoI.append(DoD)
            i = i + 1
        UpI = pd.Series(UpI)
        DoI = pd.Series(DoI)
        PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())
        NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())
        RSI = pd.Series(round(PosDI * 100. / (PosDI + NegDI)), name='RSI_' + str(n))
        # df = df.join(RSI)
        return RSI

def get_rsi(data, window=14):
    df = data.copy(deep=True).reset_index()
    rsi = relative_strength_index(df, window)
    rsi_df = pd.Series(data=rsi.values, index=data.index)
    return rsi_df

def bbands(close_prices, window, no_of_stdev):
    # rolling_mean = close_prices.rolling(window=window).mean()
    # rolling_std = close_prices.rolling(window=window).std()
    rolling_mean = close_prices.ewm(span=window).mean()
    rolling_std = close_prices.ewm(span=window).std()

    upper_band = rolling_mean + (rolling_std * no_of_stdev)
    lower_band = rolling_mean - (rolling_std * no_of_stdev)

    return rolling_mean, upper_band, lower_band

def MA(close, n):
    return close.rolling(window=n).mean()

def bias(close, n):
    return close / close.rolling(n, min_periods=1).mean()

def acc(close, n):
    return close.shift(n) / (close.shift(2*n) + close) * 2

def mom(rev, n):
    return (rev / rev.shift(1)).shift(n)

# vol = vol.reset_index()
sid = '0050'
benchmark = pd.DataFrame({'close': close[sid], 'high': high[sid], 'low': low[sid], 'volume': vol[sid]})

benchmark['b_OBV'] = OBV(benchmark.close, benchmark.volume)
benchmark['b_AD'] = AD(benchmark.high, benchmark.low, benchmark.close, benchmark.volume)
benchmark['b_ADOSC'] = ADOSC(benchmark.high, benchmark.low, benchmark.close, benchmark.volume, fastperiod=3, slowperiod=10)

benchmark['b_MA5'] = MA(benchmark['close'], 5)
benchmark['b_MA20'] = MA(benchmark['close'], 20)
benchmark['b_MA60'] = MA(benchmark['close'], 60)

benchmark['b_bias5'] = bias(benchmark['close'], 5)
benchmark['b_bias10'] = bias(benchmark['close'], 10)
benchmark['b_bias20'] = bias(benchmark['close'], 20)
benchmark['b_bias60'] = bias(benchmark['close'], 60)

benchmark['b_acc5'] = acc(benchmark['close'], 5)
benchmark['b_acc10'] = acc(benchmark['close'], 10)
benchmark['b_acc20'] = acc(benchmark['close'], 20)
benchmark['b_acc60'] = acc(benchmark['close'], 60)

window_stdev = 50
benchmark['b_log_ret'] = np.log(benchmark['close']).diff()
benchmark['b_volatility'] = benchmark['b_log_ret'].rolling(window=window_stdev, min_periods=window_stdev, center=False).std()

In [66]:
benchmark.tail()

Unnamed: 0_level_0,close,high,low,volume,b_OBV,b_AD,b_ADOSC,b_MA5,b_MA20,b_MA60,b_bias5,b_bias10,b_bias20,b_bias60,b_acc5,b_acc10,b_acc20,b_acc60,b_log_ret,b_volatility
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-04-25,129.25,130.05,128.65,35058202.0,-175240740.0,215913100.0,-3448450.0,131.65,134.33,138.224167,0.98177,0.976061,0.962183,0.935075,1.00535,0.991207,1.016575,1.143932,-0.018019,0.012741
2022-04-26,129.25,129.8,128.85,10361395.0,-175240740.0,214277100.0,-3611976.0,131.1,133.8825,137.873333,0.985889,0.9785,0.965399,0.937455,1.008403,0.991213,1.031151,1.131564,0.0,0.012595
2022-04-27,126.55,127.0,126.05,31244250.0,-206484990.0,215921500.0,-2827283.0,129.88,133.375,137.5075,0.974361,0.964264,0.948828,0.920313,1.014144,1.026021,1.031503,1.124574,-0.021111,0.01287
2022-04-28,127.1,127.4,125.95,12311754.0,-194173236.0,223138800.0,47146.84,128.75,132.8875,137.140833,0.987184,0.973984,0.956448,0.926785,1.014714,1.019511,1.022986,1.126771,0.004337,0.01277
2022-04-29,128.75,129.5,128.25,12271215.0,-181902021.0,220684500.0,437868.3,128.18,132.3975,136.909167,1.004447,0.989281,0.97245,0.940405,1.008429,0.989525,1.032414,1.07296,0.012898,0.012945


In [67]:
sid = '2330'
t_final = 5
def get_Daily_Volatility(close,span0=20):
    # simple percentage returns
    df0=close.pct_change()
    # 20 days, a month EWM's std as boundary
    df0=df0.ewm(span=span0).std()
    df0.dropna(inplace=True)
    return df0

def get_atr(stock, win=14):
    atr_df = pd.Series(index=stock.index)
    high = pd.Series(data.high.rolling( \
                        win, min_periods=win))
    low = pd.Series(data.low.rolling( \
                    win, min_periods=win))
    close = pd.Series(data.close.rolling( \
                        win, min_periods=win))    
            
    for i in range(len(stock.index)):
        tr=np.max([(high[i] - low[i]), \
                    np.abs(high[i] - close[i]), \
                    np.abs(low[i] - close[i])], \
                    axis=0)
    atr_df[i] = tr.sum() / win
        
    return  atr_df

def get_3_barriers():
    #create a container
    barriers = pd.DataFrame(columns=['days_passed', 
            'price', 'vert_barrier', \
            'top_barrier', 'bottom_barrier', 'long_ret', 'short_ret'], \
            index = daily_volatility.index)
    for day, vol in daily_volatility.iteritems():
        days_passed = len(daily_volatility.loc \
                    [daily_volatility.index[0] : day])
        #set the vertical barrier 
        if (days_passed + t_final < len(daily_volatility.index) \
            and t_final != 0):
            vert_barrier = daily_volatility.index[
                                days_passed + t_final]
        else:
            vert_barrier = np.nan
        #set the top barrier
        if upper_lower_multipliers[0] > 0:
            top_barrier = prices.loc[day] + prices.loc[day] * \
                        upper_lower_multipliers[0] * vol
        else:
            #set it to NaNs
            top_barrier = pd.Series(index=prices.index)
        #set the bottom barrier
        if upper_lower_multipliers[1] > 0:
            bottom_barrier = prices.loc[day] - prices.loc[day] * \
                        upper_lower_multipliers[1] * vol
        else: 
            #set it to NaNs
            bottom_barrier = pd.Series(index=prices.index)

        barriers.loc[day, ['days_passed', 'price', 'vert_barrier','top_barrier', 'bottom_barrier']] = \
        days_passed, prices.loc[day], vert_barrier, \
        top_barrier, bottom_barrier
    return barriers

def get_labels():
    for i in range(len(barriers.index)):
        start = barriers.index[i]
        end = barriers.vert_barrier[i]
        if pd.notna(end):
            # assign the initial and final price
            price_initial = barriers.price[start]
            price_final = barriers.price[end]
            # assign the top and bottom barriers
            top_barrier = barriers.top_barrier[i]
            bottom_barrier = barriers.bottom_barrier[i]
            #set the profit taking and stop loss conditons
            condition_pt = (barriers.price[start: end] >= \
            top_barrier).any()
            condition_sl = (barriers.price[start: end] <= \
            bottom_barrier).any()
            #assign the labels 如果True情況下不設out為2會產生touch vertical bar 時產生 > 1 的值
            if condition_pt:  # over the top_barrier
                barriers['out'][i] = 1
            elif condition_sl:  # below the bottom_barrier
                # barriers['out'][i] = 0
                barriers['out'][i] = -1    
            else: 
                barriers['out'][i] = 0 # vertical_barrier
                # barriers['out'][i] = max(
                #           [(price_final - price_initial)/ 
                #            (top_barrier - price_initial), \
                #            (price_final - price_initial)/ \
                #            (price_initial - bottom_barrier)],\
                #             key=abs)
            if condition_pt:  # over the top_barrier
                barriers['long_ret'][i] = (top_barrier - price_initial)/price_initial - 0.001
                barriers['short_ret'][i] = -(top_barrier - price_initial)/price_initial - 0.001
            elif condition_sl:  # below the bottom_barrier
                barriers['long_ret'][i] = -(price_initial - bottom_barrier)/price_initial - 0.001
                barriers['short_ret'][i] = (price_initial - bottom_barrier)/price_initial - 0.001
            else: 
                barriers['long_ret'][i] = (price_final - price_initial)/price_initial - 0.001
                barriers['short_ret'][i] = -(price_final - price_initial)/price_initial - 0.001
    return

data = pd.DataFrame({'close': close[sid],
                    'open': open_[sid],
                    'high': high[sid],
                    'low': low[sid],
                    'volume': vol[sid]})

data = data.reset_index()
data.dropna(axis=0, how='any', inplace=True)
rev = rev.reset_index()
# print(f'shape of df {data.shape}')
data = pd.merge(data,rev[['date',sid]], on="date", how='outer')
# print(f'shape of df {data.shape}')
data = data.sort_values(by=['date'])
data = data.rename(columns={sid: "rev"})
data['rev'].fillna(method='ffill', inplace=True)

com_rev = com_rev.reset_index()
# print(f'shape of df {data.shape}')
data = pd.merge(data,com_rev[['date',sid]], on="date", how='outer')
# print(f'shape of df {data.shape}')
data = data.sort_values(by=['date'])
data = data.rename(columns={sid: "com_rev"})
data['com_rev'].fillna(method='ffill', inplace=True)

d_yield = d_yield.reset_index()
# print(f'shape of df {data.shape}')
data = pd.merge(data,d_yield[['date',sid]], on="date", how='outer')
# print(f'shape of df {data.shape}')
data = data.rename(columns={sid: "d_yield"})
data['d_yield'].fillna(method='ffill', inplace=True)

pb = pb.reset_index()
# print(f'shape of df {data.shape}')
data = pd.merge(data,pb[['date',sid]], on="date", how='outer')
# print(f'shape of df {data.shape}')
data = data.rename(columns={sid: "pb"})
data['pb'].fillna(method='ffill', inplace=True)

benchmark = benchmark.reset_index()
benchmark_list = ['date', 'b_OBV', 'b_AD', 'b_ADOSC', 'b_MA5', 'b_MA20', 'b_MA60', 'b_bias5', 'b_bias10', 'b_bias20', 'b_bias60'
, 'b_acc5', 'b_acc10', 'b_acc20', 'b_acc60', 'b_volatility']
data = pd.merge(data,benchmark[benchmark_list], on="date", how='outer')
for features in benchmark_list:
    data[features].fillna(method='ffill', inplace=True)

data = data.set_index('date')
# print(f'shape of df {data.shape}')
data.dropna(axis=0, how='any', inplace=True)
# print(f'shape of df {data.shape}')
# data.tail()

# talib
data.to_csv('test.csv', index=False)
data['upperband'], data['middleband'], data['lowerband'] = BBANDS(data.close, 20, 2., 2. ,0)
data['OBV'] = OBV(data.close, data.volume)
data['AD'] = AD(data.high, data.low, data.close, data.volume)
data['ADOSC'] = ADOSC(data.high, data.low, data.close, data.volume, fastperiod=3, slowperiod=10)

data['MA5'] = MA(data['close'], 5)
data['MA60'] = MA(data['close'], 60)

data['bias5'] = bias(data['close'], 5)
data['bias10'] = bias(data['close'], 10)
data['bias20'] = bias(data['close'], 20)
data['bias60'] = bias(data['close'], 60)

data['acc5'] = acc(data['close'], 5)
data['acc10'] = acc(data['close'], 10)
data['acc20'] = acc(data['close'], 20)
data['acc60'] = acc(data['close'], 60)

rsi_df = get_rsi(data, window=14)
data['rsi'] = pd.Series(data=rsi_df.values, index=data.index)
# Compute sides
data['side'] = np.nan 
data['next_close'] = data['close'].shift(-1)
data['diff'] = (data['next_close'] - data['close'])/data['close']

long_signals = data['low'] <= data['lowerband']
short_signals = data['high'] >= data['upperband']

data.loc[long_signals, 'side'] = 1
data.loc[short_signals, 'side'] = -1

data['side'].fillna(value=0, inplace=True)
# raw_data = data.copy()

# Log Returns
data['log_ret'] = np.log(data['close']).diff()
# Momentum
data['mom1'] = data['close'].pct_change(periods=1)
data['mom2'] = data['close'].pct_change(periods=2)
data['mom3'] = data['close'].pct_change(periods=3)
data['mom4'] = data['close'].pct_change(periods=4)
data['mom5'] = data['close'].pct_change(periods=5)

# Volatility
window_stdev = 50
data['volatility'] = data['log_ret'].rolling(window=window_stdev, min_periods=window_stdev, center=False).std()

# Serial Correlation (Takes about 4 minutes)
window_autocorr = 50

data['autocorr_1'] = data['log_ret'].rolling(window=window_autocorr, min_periods=window_autocorr, center=False).apply(lambda x: x.autocorr(lag=1), raw=False)
data['autocorr_2'] = data['log_ret'].rolling(window=window_autocorr, min_periods=window_autocorr, center=False).apply(lambda x: x.autocorr(lag=2), raw=False)
data['autocorr_3'] = data['log_ret'].rolling(window=window_autocorr, min_periods=window_autocorr, center=False).apply(lambda x: x.autocorr(lag=3), raw=False)
data['autocorr_4'] = data['log_ret'].rolling(window=window_autocorr, min_periods=window_autocorr, center=False).apply(lambda x: x.autocorr(lag=4), raw=False)
data['autocorr_5'] = data['log_ret'].rolling(window=window_autocorr, min_periods=window_autocorr, center=False).apply(lambda x: x.autocorr(lag=5), raw=False)

# Get the various log -t returns
data['log_t1'] = data['log_ret'].shift(1)
data['log_t2'] = data['log_ret'].shift(2)
data['log_t3'] = data['log_ret'].shift(3)
data['log_t4'] = data['log_ret'].shift(4)
data['log_t5'] = data['log_ret'].shift(5)

# Add fast and slow moving averages
fast_window = 7
slow_window = 15

data['fast_mavg'] = data['close'].rolling(window=fast_window, min_periods=fast_window, center=False).mean()
data['slow_mavg'] = data['close'].rolling(window=slow_window, min_periods=slow_window, center=False).mean()

data['sma'] = np.nan

long_signals = data['fast_mavg'] >= data['slow_mavg']
short_signals = data['fast_mavg'] < data['slow_mavg']
data.loc[long_signals, 'sma'] = 1
data.loc[short_signals, 'sma'] = -1
data['sma'].fillna(value=0, inplace=True)

price = data['close']
daily_volatility = get_Daily_Volatility(price)
data['daily_volatility'] = daily_volatility
# how many days we hold the stock which set the vertical barrier
t_final = 10 
#the up and low boundary multipliers
upper_lower_multipliers = [1, 3]
prices = price[daily_volatility.index]
barriers = get_3_barriers()
barriers['out'] = None
get_labels()
data = data.reset_index()
barriers = barriers.reset_index()
data = pd.merge(data,barriers[['date','out','long_ret', 'short_ret','top_barrier', 'bottom_barrier']], on="date")
data.dropna(axis=0, how='any', inplace=True)
data = data.reset_index()

# training_data
# normalize
feature_list = ['com_rev', 'd_yield', 'pb', 'AD', 'OBV', 'ADOSC', 'bias5',
    'bias10', 'bias20', 'bias60', 'acc5', 'acc10', 'acc20', 'acc60', 'rsi',
    'log_ret', 'mom1', 'mom2', 'mom3', 'mom4', 'mom5', 'volatility',
    'autocorr_1', 'autocorr_2', 'autocorr_3', 'autocorr_4', 'autocorr_5',
    'log_t1', 'log_t2', 'log_t3', 'log_t4', 'log_t5', 'b_OBV', 'b_AD', 'b_ADOSC',
    'b_MA5', 'b_MA20', 'b_MA60', 'b_bias5', 'b_bias10', 'b_bias20', 'b_bias60',
    'b_acc5', 'b_acc10', 'b_acc20', 'b_acc60', 'b_volatility','diff']

scale = MinMaxScaler(feature_range = (-1, 1)) #z-scaler物件
for item in feature_list:
    data[item] = scale.fit_transform(np.array(data[item].to_list()).reshape(-1, 1))

# data.dropna(axis=0, how='any', inplace=True)

In [68]:
data.tail()

Unnamed: 0,index,date,close,open,high,low,volume,rev,com_rev,d_yield,...,log_t5,fast_mavg,slow_mavg,sma,daily_volatility,out,long_ret,short_ret,top_barrier,bottom_barrier
308,426,2022-04-08,567.0,567.0,570.0,566.0,31406384.0,146933238.0,-0.940286,-0.365269,...,0.379014,583.714286,585.4,-1.0,0.016067,0,-0.0362734,0.0342734,576.11,539.67
309,427,2022-04-11,558.0,563.0,566.0,558.0,41713758.0,171966525.0,0.373834,-0.329341,...,-0.117992,579.285714,583.8,-1.0,0.015751,1,0.0147513,-0.0167513,566.789,531.632
310,428,2022-04-12,557.0,554.0,564.0,552.0,34799056.0,171966525.0,0.373834,-0.329341,...,-0.297181,573.142857,582.2,-1.0,0.015003,1,0.0140033,-0.0160033,565.357,531.929
311,429,2022-04-13,573.0,564.0,576.0,563.0,36968137.0,171966525.0,0.373834,-0.389222,...,-0.410493,569.714286,581.333333,-1.0,0.017372,-1,-0.0531169,0.0511169,582.954,543.137
312,430,2022-04-14,573.0,577.0,578.0,573.0,20224847.0,171966525.0,0.373834,-0.389222,...,-0.455457,567.428571,580.666667,-1.0,0.016527,-1,-0.0505819,0.0485819,582.47,544.59


In [69]:
data[['close','daily_volatility', 'top_barrier', 'bottom_barrier']].tail()

Unnamed: 0,close,daily_volatility,top_barrier,bottom_barrier
308,567.0,0.016067,576.11,539.67
309,558.0,0.015751,566.789,531.632
310,557.0,0.015003,565.357,531.929
311,573.0,0.017372,582.954,543.137
312,573.0,0.016527,582.47,544.59


In [70]:
mrl_m = joblib.load(r'C:\Users\Drey\finlab_ml_course\lazypredict\0501_mrs_long_LGBM.pkl')
mrs_m = joblib.load(r'C:\Users\Drey\finlab_ml_course\lazypredict\0501_mrs_short_LGBM.pkl')
tl_m = joblib.load(r'C:\Users\Drey\finlab_ml_course\lazypredict\0501_trend_long_LGBM.pkl')
ts_m = joblib.load(r'C:\Users\Drey\finlab_ml_course\lazypredict\0501_trend_short_LGBM.pkl')

In [71]:
X = data[feature_list]
model_dict = {'mrl_m':mrl_m, 'mrs_m':mrs_m, 'tl_m':tl_m, 'ts_m':ts_m}
for key in model_dict:
    # make predictions for test data
    y_pred = model_dict[key].predict(X)
    data[key] = y_pred

In [72]:
data['side_1_result'] = [1 if (x > 0) & (y == 0) else 0 for x,y in zip(data['tl_m'], data['mrs_m'])]
data['side_m1_result'] = [-1 if (x > 0) & (y == 0) else 0 for x,y in zip(data['ts_m'], data['mrl_m'])]
data = data.set_index('date')

In [73]:
# data[data.side == -1]['side_1_result']
data['ret'] = [z if (x < 0) & (y > 0) else 0 for x,y,z in zip(data['side'], data['side_1_result'], data['long_ret'])]
# data['ret'] = [z if (x < 0) & (y < 0) else 0 for x,y,z in zip(data['side'], data['side_m1_result'], data['short_ret'])]
data.tail()

Unnamed: 0_level_0,index,close,open,high,low,volume,rev,com_rev,d_yield,pb,...,short_ret,top_barrier,bottom_barrier,mrl_m,mrs_m,tl_m,ts_m,side_1_result,side_m1_result,ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-08,426,567.0,567.0,570.0,566.0,31406384.0,146933238.0,-0.940286,-0.365269,-0.054867,...,0.0342734,576.11,539.67,0,0,0,0,0,0,0.0
2022-04-11,427,558.0,563.0,566.0,558.0,41713758.0,171966525.0,0.373834,-0.329341,-0.093805,...,-0.0167513,566.789,531.632,1,0,0,0,0,0,0.0
2022-04-12,428,557.0,554.0,564.0,552.0,34799056.0,171966525.0,0.373834,-0.329341,-0.097345,...,-0.0160033,565.357,531.929,1,0,1,0,1,0,0.0
2022-04-13,429,573.0,564.0,576.0,563.0,36968137.0,171966525.0,0.373834,-0.389222,-0.030088,...,0.0511169,582.954,543.137,0,0,0,0,0,0,0.0
2022-04-14,430,573.0,577.0,578.0,573.0,20224847.0,171966525.0,0.373834,-0.389222,-0.030088,...,0.0485819,582.47,544.59,0,0,0,0,0,0,0.0


In [74]:
data[data['ret'] != 0]

Unnamed: 0_level_0,index,close,open,high,low,volume,rev,com_rev,d_yield,pb,...,short_ret,top_barrier,bottom_barrier,mrl_m,mrs_m,tl_m,ts_m,side_1_result,side_m1_result,ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-04,123,536.0,530.0,540.0,528.0,39489959.0,124865438.0,-0.139125,-0.568862,0.299115,...,-0.0117655,541.77,518.689,0,0,1,0,1,0,0.009766
2021-01-06,125,549.0,555.0,555.0,541.0,55614434.0,124865438.0,-0.139125,-0.616766,0.362832,...,-0.0111728,554.585,532.245,0,0,1,0,1,0,0.009173
2021-12-30,366,615.0,619.0,620.0,615.0,20522055.0,148267599.0,0.090608,-0.736527,0.263717,...,-0.00929339,620.1,599.699,1,0,1,0,1,0,0.007293
2022-01-03,367,631.0,619.0,632.0,618.0,73703302.0,148267599.0,0.090608,-0.796407,0.334513,...,-0.0117942,637.811,610.567,1,0,1,0,1,0,0.009794


In [75]:
import pyfolio as pf

In [76]:
perf_func = pf.timeseries.perf_stats

In [77]:
def get_daily_returns(intraday_returns):
    """
    This changes returns into daily returns that will work using pyfolio. Its not perfect...
    """
    
    cum_rets = ((intraday_returns + 1).cumprod())

    # Downsample to daily
    daily_rets = cum_rets.resample('B').last()

    # Forward fill, Percent Change, Drop NaN
    daily_rets = daily_rets.ffill().pct_change().dropna()
    
    return daily_rets

In [78]:
test_dates = data.index

base_rets = data.loc[test_dates, 'ret']
# base_rets = labels['ret']
primary_model_rets = get_daily_returns(base_rets)

# Save the statistics in a dataframe
perf_stats_all = perf_func(returns=primary_model_rets, 
                           factor_returns=None, 
                           positions=None,
                           transactions=None,
                           turnover_denom="AGB")
perf_stats_df = pd.DataFrame(data=perf_stats_all, columns=['Primary Model'])

# pf.create_returns_tear_sheet(labels.loc[test_dates, 'ret'], benchmark_rets=None)
pf.show_perf_stats(primary_model_rets)

  np.divide(average_annual_return, annualized_downside_risk, out=out)
  np.abs(np.percentile(returns, 5))
  np.divide(average_annual_return, annualized_downside_risk, out=out)
  np.abs(np.percentile(returns, 5))


Start date,2020-12-28,2020-12-28
End date,2022-04-14,2022-04-14
Total months,16,16
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,2.702%,
Cumulative returns,3.651%,
Annual volatility,1.556%,
Sharpe ratio,1.72,
Calmar ratio,,
Stability,0.55,
Max drawdown,0.0%,
Omega ratio,,
Sortino ratio,inf,
Skew,9.21,
