# Backtest engine

In [1]:
from typing import List, Tuple, Union

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

import quantstats as qs

from pathlib import Path
from tqdm import tqdm

In [2]:
data_path = Path('.').resolve() / 'data'
data_path

WindowsPath('E:/VSCodeProjects/hanaquantchallenge/data')

In [39]:
df = pd.read_pickle(data_path / 'data_quanttrading.pickle')

## Simple alpha example

1. 3번 연속 양봉/음봉이 나오고 
2. 캔들의 크기가 점차 줄어들면서
3. `vwap` 이 2번 연속 떨어질 때 

추세의 반대 방향으로 1주 매매 (reversion)

In [7]:
df.columns

Index(['open', 'high', 'low', 'close', 'vwap', 'volume', 'amount', 'ticks',
       'bid', 'bid_size', 'ask', 'ask_size', 'KST', 'tr_date', 'signal', 'mid',
       'microprice', 'bidask_size_ratio', 'LD_bidask_spread'],
      dtype='object')

In [40]:
from itertools import groupby

def all_equal(iterable):
    g = groupby(iterable)
    return next(g, True) and not next(g, False)

## https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical

In [41]:
def simple_candle_reversion(df, n=3, use_price='vwap'):
    data_used = ['open', 'high', 'low', 'close', 'vwap']
    dm = df[data_used].copy()

    dm['is_candle_positive'] = np.where(dm['close'] >= dm['open'], 2, -3)
    dm['candle_size'] = dm['high'] - dm['low']
    dm['is_size_decreasing'] = np.where(dm['candle_size'] < dm['candle_size'].shift(1), 5, -7)
    dm['is_vwap_increasing'] = np.where(dm['vwap'] >= dm['vwap'].shift(1), 11, -13)
    

    def is_consecutive(vector):
        if all_equal(vector):
            return vector[0]
        else:
            return 0
    
    dm['case_value'] = dm['is_candle_positive'].rolling(n).apply(is_consecutive) * \
        dm['is_size_decreasing'].rolling(n).apply(is_consecutive) * \
            dm['is_vwap_increasing'].rolling(n).apply(is_consecutive)
    
    dm['alpha'] = np.select(
        condlist=[dm['case_value'] == 2*5*11 , dm['case_value'] == -3*5*-13],
        choicelist=[1, -1],
        default=0
    )
    
    return dm['alpha'].shift().values
    # return dm

In [52]:
df['position'] = simple_candle_reversion(df)
df.loc[df.index[0], 'position'] = 0

In [53]:
df['position'] = df['position'] * 10

In [62]:
df['signal_unlimited'] = df['position'].diff()
df.loc[df.index[0], 'signal_unlimited'] = 0

In [60]:
pd.DataFrame(
    [
        {'a': 1, 'b': 2},
        {'a': 5, 'b': 4},
        {'a': 8, 'b': 1},
    ]
).diff()

Unnamed: 0,a,b
0,,
1,4.0,2.0
2,3.0,-3.0


In [43]:
def realize_cashflow_after_trade(
    signal:int, 
    bid_price, 
    bid_size, 
    ask_price, 
    ask_size) -> float:
    """Trade result of one day shifted signal

    Args:
        signal (int): yesterday's buy/sell amount (difference of yesterday & today's position)
        bid_price (float): price when you sell
        bid_size (int): maximum sellable amount
        ask_price (float): price when you buy
        ask_size (int): maximum buyable amount

    Returns:
        int: amount of realized position change from this trade
        float: amount of cash change from this trade
    """    
    if signal == 0:

        return 0, 0.0
    
    elif signal > 0: # 매수시 ask_price로 체결
        traded = min(signal, ask_size)

        return traded, -traded * ask_price
    
    elif signal < 0: # 매도시 bid_price로 체결    
        traded = -min(abs(signal), ask_size)

        return traded, -traded * bid_price


In [63]:
df['cash_chg'] = df.apply(
    lambda row: realize_cashflow_after_trade(
        row['signal_unlimited'], 
        row['bid'],
        row['bid_size'],
        row['ask'],
        row['ask_size'],
        )[1],
    axis=1
    )

In [67]:
INITIAL_CASH = 2e+6

df.loc[df.index[0], 'cash_chg'] = INITIAL_CASH # 최초의 cash amount

In [68]:
df['cash_chg'].sum()

2079164.25

In [66]:
df['position'].iloc[-1] * df['mid'].iloc[-1]

0.0

In [69]:
df['current_actual_position'] = df['signal_unlimited'].cumsum() # TODO: Wrong calculation. signal limited로 해야한다. 

In [70]:
df['current_cash'] = df['cash_chg'].cumsum()

In [71]:
df['current_value'] = df['current_actual_position'] * df['mid'] + df['current_cash']

In [73]:
df['port_return'] = df['current_value'].pct_change()

## Performance analytics using `QuantStats`

`quantstats` 에서 돌리려면 10분 단위로 되어있는 datetime index를 일환산 해줘야 한다. 

In [86]:
daily_port_return = df['port_return'].resample('D').sum()
daily_port_return.index = daily_port_return.index.tz_localize(None)

In [87]:
daily_port_return.index

DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05',
               '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09',
               '2020-01-10', '2020-01-11',
               ...
               '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24',
               '2022-09-25', '2022-09-26', '2022-09-27', '2022-09-28',
               '2022-09-29', '2022-09-30'],
              dtype='datetime64[ns]', name='virtual_dt', length=1003, freq='D')

In [89]:
qs.reports.html(daily_port_return)

  returns = returns.pivot('Year', 'Month', 'Returns').fillna(0)
