# Configuration 

In [1]:
cd '/Users/xusikun/Desktop/Research/[research]tsinghua_options/options-study'

/Users/xusikun/Desktop/Research/[research]tsinghua_options/options-study


In [2]:
import os 

import pandas as pd 
import numpy as np

from datetime import datetime

import backtrader as bt

In [3]:
DATA = 'data/'

# Prepare data

In this step, we need to prepare both the price data and the portofolio data. The portfolio data describes the portofolio at each re-balancing time. 

*Note that the strategy development should be run a priori. The backtest system only make use of the strategy results. It doesn't accept a strategy function or class as inputs*. 

## Sample Data

In [4]:
from src.data import prepare_sample_data

In [5]:
prepare_sample_data_params = {
    'price_direc': os.path.join(DATA, 'sample/daily_price.csv'), 
    'portfolio_direc': os.path.join(DATA, 'sample/trade_info.csv')
}

In [6]:
data = prepare_sample_data(**prepare_sample_data_params)
price, portfolio = data['price'], data['portfolio']

In [7]:
price.head()

Unnamed: 0_level_0,sec_code,open,high,low,close,volume,openinterest
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-02,600466.SH,33.064891,33.496709,31.954503,32.386321,10629352,0
2019-01-02,603228.SH,50.66023,51.458513,50.394136,51.120778,426147,0
2019-01-02,600315.SH,148.258423,150.480132,148.258423,149.558935,2138556,0
2019-01-02,000750.SZ,49.512579,53.154883,48.715825,51.561375,227557612,0
2019-01-02,002588.SZ,36.608672,36.608672,35.669988,35.763857,2841517,0


In [8]:
portfolio.head()

Unnamed: 0,trade_date,sec_code,weight
0,2019-01-31,000006.SZ,0.007282
1,2019-01-31,000008.SZ,0.009783
2,2019-01-31,000025.SZ,0.006928
3,2019-01-31,000090.SZ,0.007234
4,2019-01-31,000536.SZ,0.003536


## Options Data

***重新跑了一遍后发现期权的价格信息用错了，找不到在哪里，需要协助！***

In [9]:
from src.options_data import read_options_data

In [10]:
%%time
# read and preprocess options data
data = read_options_data('data/DataFile.mat')

CPU times: user 32.5 s, sys: 886 ms, total: 33.4 s
Wall time: 35.5 s


In [11]:
data['options'].head()

Unnamed: 0,asof,code,name,type,strike,date,exp_date,time_to_exp
0,2015-02-16,10000001.SH,50ETF购3月2200,CALL,2.2,2015-02-09,2015-03-25,37 days
1,2015-02-16,10000002.SH,50ETF购3月2250,CALL,2.25,2015-02-09,2015-03-25,37 days
2,2015-02-16,10000003.SH,50ETF购3月2300,CALL,2.3,2015-02-09,2015-03-25,37 days
3,2015-02-16,10000004.SH,50ETF购3月2350,CALL,2.35,2015-02-09,2015-03-25,37 days
4,2015-02-16,10000005.SH,50ETF购3月2400,CALL,2.4,2015-02-09,2015-03-25,37 days


In [12]:
data['ETF'].head()

Unnamed: 0,open,high,low,close,adj_close
2015-02-16,2.39,2.403,2.379,2.394,2.39
2015-02-17,2.401,2.429,2.401,2.411,2.4155
2015-02-25,2.411,2.411,2.359,2.37,2.385
2015-02-26,2.368,2.452,2.348,2.45,2.4051
2015-02-27,2.447,2.465,2.428,2.438,2.4494


### Strangle

In [18]:
from src.strategy import get_daily_strangle

In [20]:
options = data['options']
ETF = data['ETF']

In [22]:
# get strangle
security_selected = []
for date in options['asof'].unique():
    daily = options[options['asof'] == date].reset_index(drop=True)
    security_selected += get_daily_strangle(
        date=date, 
        options_book=daily, 
        ETF=ETF
    )

In [24]:
# complete strangle portfolio data
strangle = pd.DataFrame(security_selected)

In [30]:
asof = '2021-06-11'
spot = ETF.loc[asof, 'close']
print(f'At date {asof}, spot={spot}, the strangle portfolio is as follows: \n')
strangle[strangle['asof'] == asof]

At date 2021-06-11, spot=3.571, the strangle portfolio is as follows: 



Unnamed: 0,asof,code,name,type,strike,date,exp_date,time_to_exp
12489,2021-06-11,10003009.SH,50ETF购6月3700,CALL,3.7,2020-11-30,2021-6-23,12 days
12490,2021-06-11,10002873.SH,50ETF沽6月3450A,PUT,3.45,2020-10-29,2021-6-23,12 days
12491,2021-06-11,10003420.SH,50ETF购7月3700,CALL,3.7,2021-5-27,2021-7-28,47 days
12492,2021-06-11,10003426.SH,50ETF沽7月3400,PUT,3.4,2021-5-27,2021-7-28,47 days
12493,2021-06-11,10003208.SH,50ETF购9月3700,CALL,3.7,2021-1-28,2021-9-22,103 days
12494,2021-06-11,10003214.SH,50ETF沽9月3400,PUT,3.4,2021-1-28,2021-9-22,103 days
12495,2021-06-11,10003379.SH,50ETF购12月3700,CALL,3.7,2021-4-29,2021-12-22,194 days
12496,2021-06-11,10003385.SH,50ETF沽12月3400,PUT,3.4,2021-4-29,2021-12-22,194 days


# Backtest

## Instantiate Cerebro

In [9]:
# instantiate cerebro
cerebro = bt.Cerebro()

## Import Data

In [10]:
stocks = price['sec_code'].unique()
dates = price.index.unique()

In [11]:
# Helper functions: get data for one stock
def get_single_stock_data(sec_code: str, price: pd.DataFrame, dates: list) -> pd.DataFrame:
    # aligh dates
    data = pd.DataFrame(index=dates)
    df = price.query(f"sec_code=='{sec_code}'")[['open', 'high', 'low', 'close', 'volume', 'openinterest']]
    data_ = pd.merge(data, df, left_index=True, right_index=True, how='left')
    
    # clean missing values
    data_.loc[:,['volume','openinterest']] = data_.loc[:,['volume','openinterest']].fillna(0)
    data_.loc[:,['open','high','low','close']] = data_.loc[:,['open','high','low','close']].fillna(method='pad')
    data_.loc[:,['open','high','low','close']] = data_.loc[:,['open','high','low','close']].fillna(0)
    
    return data_

In [12]:
for stock in stocks:
    data = get_single_stock_data(
        sec_code=stock, 
        price=price, 
        dates=dates
    )

    # export data to cerebro
    # 1. create a datafeed object
    
    datafeed = bt.feeds.PandasData(dataname=data, fromdate=datetime(2019, 1, 2), todate=datetime(2021, 1, 28))
    # 2. put feed into cerebro
    cerebro.adddata(datafeed, name=stock)

## Set Backtest Conditions

In [13]:
# Broker
# initial capital
cerebro.broker.setcash(100000000.0)

# comission
cerebro.broker.setcommission(commission=0.0003)

# slippage
cerebro.broker.set_slippage_perc(perc=0.0001)

In [14]:
# Add analyzers
cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='pnl') # return time series
cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='_AnnualReturn') # annual return
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='_SharpeRatio') # sharpe ratio
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='_DrawDown') # drawdown

## Strategy

In [15]:
class TestStrategy(bt.Strategy):
    def __init__(self):
        # add portfolio info
        self.buy_stock = portfolio
        
        # read rebalance date
        self.trade_dates = pd.to_datetime(self.buy_stock['trade_date'].unique()).tolist()
        
        # record pst orders, so that we can handle them at rebalance time
        self.order_list = []
        
        # record the last-period holding
        self.buy_stocks_pre = []
        
    def next(self):
        # get the current datetime
        now = self.datas[0].datetime.date(0)
        
        # if it's rebalance date, we do the following 
        if now in self.trade_dates:
            print(f" ---------- {now} is rebalance date ---------- ")
            # 0. before rebalance, we cancel all the unfinished and un-expired orders
            if len(self.order_list) > 0:
                for order in self.order_list:
                    self.cancel(order)
                self.order_list = []
            
            # 1. extract the holdings
            buy_stocks_data = self.buy_stock.query(f"trade_date=='{now}'")
            long_list = buy_stocks_data['sec_code'].tolist()
            print('long_list', long_list)
            
            # 2. In current holdings, sell all stocks that we won't hold anymore
            sell_stock = [i for i in self.buy_stocks_pre if i not in long_list]
            print('sell_stock', sell_stock)
            if len(sell_stock) > 0:
                print(" ---------- Sell all stocks that we won't hold ---------- ")
                for stock in sell_stock:
                    data = self.getdatabyname(stock)
                    if self.getposition(data).size > 0:
                        order = self.close(data=data)
                        self.order_list.append(order)
                        
            # 3. Buy all stocks that we'll hold
            print(" ---------- Buy all stocks we'll hold ---------- ")
            for stock in long_list:
                w = buy_stocks_data.query(f"sec_code=='{stock}'")['weight'].iloc[0]
                data = self.getdatabyname(stock)
                print(data._name, w, w*0.95)
                order = self.order_target_percent(data=data, target=w*0.95)
                self.order_list.append(order)
            self.buy_stocks_pre = long_list
            
#     def notify_order(self, order):
#         # un-processed orders
#         if order.status in [order.Submitted, order.Accepted]:
#             return
        
#         # processed orders
#         if order.status in [order.Completed, order.Canceled, order.margin]:
#             if order.isbuy():
#                 self.log(f"BUY EXECUTED, ref: {order.ref}, Price: {order.executed.price}, \
#                          Cost: {order.executed.value}, Comm: {order.executed.comm}, \
#                          Size: {order.executed.size}, Stock: {order.data._name}")
#             else:
#                 self.log(f"SELL EXECUTED, ref: {order.ref}, Price: {order.executed.price}, \
#                          Cost: {order.executed.value}, Comm: {order.executed.comm}, \
#                          Size: {order.executed.size}, Stock: {order.data._name}")

In [16]:
# add strategy to cerebro
cerebro.addstrategy(TestStrategy)

0

## Run

In [17]:
# start backtest
result = cerebro.run()



 ---------- 2019-01-31 is rebalance date ---------- 
long_list ['000006.SZ', '000008.SZ', '000025.SZ', '000090.SZ', '000536.SZ', '000587.SZ', '000598.SZ', '000612.SZ', '000636.SZ', '000656.SZ', '000690.SZ', '000712.SZ', '000766.SZ', '000807.SZ', '000829.SZ', '000877.SZ', '000980.SZ', '000999.SZ', '002002.SZ', '002048.SZ', '002051.SZ', '002074.SZ', '002110.SZ', '002127.SZ', '002128.SZ', '002131.SZ', '002152.SZ', '002195.SZ', '002308.SZ', '002358.SZ', '002359.SZ', '002375.SZ', '002400.SZ', '002408.SZ', '002437.SZ', '002463.SZ', '002465.SZ', '002642.SZ', '002707.SZ', '002745.SZ', '002818.SZ', '300001.SZ', '300010.SZ', '300058.SZ', '300113.SZ', '300146.SZ', '300166.SZ', '300266.SZ', '300376.SZ', '300450.SZ', '600006.SH', '600039.SH', '600053.SH', '600056.SH', '600062.SH', '600141.SH', '600151.SH', '600158.SH', '600169.SH', '600259.SH', '600260.SH', '600280.SH', '600366.SH', '600373.SH', '600392.SH', '600393.SH', '600428.SH', '600478.SH', '600500.SH', '600525.SH', '600528.SH', '600582.SH', 

## Analyse

In [18]:
# extract backtest results
strat = result[0]

In [19]:
# return daily return sequence
daily_return = pd.Series(strat.analyzers.pnl.get_analysis())

In [20]:
# print
print("--------------- AnnualReturn -----------------")
print(strat.analyzers._AnnualReturn.get_analysis())
print("--------------- SharpeRatio -----------------")
print(strat.analyzers._SharpeRatio.get_analysis())
print("--------------- DrawDown -----------------")
print(strat.analyzers._DrawDown.get_analysis())

--------------- AnnualReturn -----------------
OrderedDict([(2019, 0.2421668400755459), (2020, 0.2154227563253983), (2021, 0.017567210073598405)])
--------------- SharpeRatio -----------------
OrderedDict([('sharperatio', 1.4813312115232609)])
--------------- DrawDown -----------------
AutoOrderedDict([('len', 136), ('drawdown', 6.655064560819013), ('moneydown', 10952970.349310696), ('max', AutoOrderedDict([('len', 206), ('drawdown', 20.374812759676267), ('moneydown', 27705182.493407518)]))])
