In [1]:
from Backtest import Backtest
BT = Backtest()  

## Backtest framework

* The backtest framework assumes that we are running a long/short strategy, i.e. the sum of long weight equals the absolute value of the sum of short weight.
* The trade price is set to be open price, or more specifically the signal at T is used to trade at T+1's open and realize the return from T+1's open to T+2's open.
* The daily return is computed base on adjusted open price.
* There are 5 performance indicators: **annual_return, annual_vol, sharpe, maxdd, annual_turnover**. The annual return here is compound return and sharpe is computed as $\frac{E(daily\_pnl)}{STD(daily\_pnl)} * \sqrt{365}$ (sqrt of number of trading days per year, 365 in this dataset). The turnover ratio accounts for both long and short side, with a maximum possible value of 200% per day, which is equivalent to 200%*365 = 730 per year.


In [7]:
import pandas as pd
import numpy as np

factors = pd.read_csv('/home/ckuang/MLP/factors.csv')
factors

Unnamed: 0,date,symbol,factor0,factor1,factor2,factor3,factor4,factor5,factor6,factor7,...,factor559,factor560,factor561,factor562,factor563,factor564,factor565,factor566,factor567,factor568
0,2010-01-01,10,0.263158,,,,,,,,...,,,,,,,,,,
1,2010-01-02,10,0.263158,,-0.763240,,,,,,...,,,,,,,,,,
2,2010-01-03,10,-0.931034,,-0.519096,,,,,,...,,,,,,,,,,
3,2010-01-04,10,-0.931034,0.542380,-0.367935,,,,,,...,,,,,,,,,,
4,2010-01-05,10,-0.931034,0.843820,-0.288258,,-0.421049,0.816497,,0.111600,...,,,,,,7.681146,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38995,2010-05-01,993,-0.920000,-0.274686,-0.110802,-9.684927e-11,0.442279,0.346168,0.300282,-1.014262,...,,-0.076624,-0.250000,-0.000290,-0.000054,5.176905,,0.251902,-0.002746,-0.398060
38996,2010-05-02,993,-0.920000,-0.261433,-0.106344,-9.251319e-11,0.356897,0.816497,0.283359,-0.361110,...,,-0.077427,-0.125000,-0.000255,,4.949936,,0.274118,-0.088429,-0.385274
38997,2010-05-03,993,-0.857143,-0.055582,-0.103987,-1.076482e-10,0.481603,-0.687243,0.317467,0.928652,...,,-0.078495,-0.222222,-0.000033,,4.975249,,0.281456,,-0.277053
38998,2010-05-04,993,-0.857143,0.115718,-0.099986,-1.762209e-10,0.355688,0.000000,0.228547,0.552630,...,,-0.078194,-0.500000,-0.000029,-0.000162,5.209307,,0.360597,-0.003988,-0.265853


Selected 569 factors in total

In [8]:
factor_list = []
for factor_name in factors.columns[2:]:
    factor = factors.pivot(columns = 'symbol', index = 'date', values = factor_name)
    factor.index = pd.to_datetime(factor.index)
    factor.columns = factor.columns.astype(str)
    factor_list.append(factor)


Display performance for random one factor

In [31]:
# T+1 shift(1)
BT.backtest(signal=factor_list[0].shift(1), type='LONG/SHORT')
BT.performance

Unnamed: 0,annual_return,annual_vol,sharpe,maxdd,annual_turnover
2010,0.08101,0.048661,4.699833,0.012411,23.755411
Total,0.255403,0.048661,4.699833,0.012411,69.365801


### Remark: 
The 0.08 annual return in 2010 is actually the return within the 5 months of valid data in 2010. While the 0.255 corresponds to the actually annualized return in 2010. It is the same for turnover ratio. The 23.75 is the turnover for the 5 months period and the 69.36 is the annualized turnover.

Compute equal weight average of all the factors

In [9]:
from functools import reduce

def standardize(factor):
    standardized_factor = factor.T.copy()
    standardized_factor = (standardized_factor - standardized_factor.mean()) / (standardized_factor.std())
    return standardized_factor.T

def equal_weight_signals(signal_list):
    signal_list = [standardize(s).fillna(0) for s in signal_list]
    combined_signal = reduce(lambda x,y: x.add(y),signal_list)
    combined_signal = combined_signal/len(signal_list)
    return combined_signal

combined_signal = equal_weight_signals(factor_list)
combined_signal

symbol,10,11,18,94,112,118,149,171,183,192,...,4695,4751,4769,4774,4852,4928,4949,4964,5102,5264
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01,0.001825,0.001942,0.000831,-0.000528,0.000238,0.002319,-0.001954,-0.000045,0.003380,-0.000902,...,0.000000,0.001335,0.002365,0.000235,0.002247,0.000536,-0.000992,0.002399,-0.000170,0.000000
2010-01-02,-0.000159,0.003826,0.001568,-0.002282,0.002814,-0.004015,-0.003769,0.001719,-0.000010,0.001302,...,0.000000,-0.002810,-0.000518,0.000701,-0.009607,0.002593,-0.000109,0.000395,0.002255,0.000000
2010-01-03,-0.004548,0.000694,0.003272,-0.003862,0.000390,-0.003132,-0.002480,0.000234,0.001239,0.000491,...,0.000000,-0.002274,-0.000289,0.002584,-0.008979,0.004605,0.000613,0.000102,0.003786,0.000000
2010-01-04,-0.002978,-0.000668,0.004136,-0.004303,-0.001752,-0.003677,0.000531,0.000816,0.001508,0.000106,...,0.000000,-0.003592,-0.002429,0.000322,-0.011350,0.003224,0.000163,-0.000048,0.001467,0.000000
2010-01-05,0.011088,-0.007880,0.064881,-0.071260,-0.005572,-0.048543,0.043757,0.038587,0.046908,0.053762,...,0.000000,-0.017336,-0.014410,-0.058191,-0.114562,-0.002168,0.025860,0.003724,0.066555,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-05-01,-0.048798,0.004122,0.000000,0.022168,-0.037283,0.004811,-0.080433,0.000785,-0.012184,0.006052,...,-0.005016,0.021653,-0.069720,0.099410,0.199827,-0.034609,0.151997,0.094285,-0.213173,-0.037742
2010-05-02,-0.057160,-0.016815,0.000000,0.029873,0.001043,-0.012861,-0.050125,0.005981,-0.034745,0.056201,...,0.011012,-0.011622,-0.088990,0.133093,0.133872,0.035223,0.124307,0.076605,-0.138850,-0.022523
2010-05-03,-0.029187,-0.014767,0.000000,0.041096,-0.015886,-0.000731,-0.057292,0.004095,-0.047397,-0.087350,...,0.013648,-0.001455,-0.034187,0.146506,0.087028,0.047164,0.121572,0.089692,-0.157830,0.004744
2010-05-04,-0.024178,0.050645,0.000000,0.048587,-0.039104,0.045321,-0.002253,-0.009818,-0.038075,-0.060598,...,0.000085,-0.064915,-0.074300,0.130183,0.100178,-0.020647,0.091154,0.101217,-0.176724,0.014088


In [32]:
# T+1 shift(1)
BT.backtest(signal=combined_signal.shift(1), type='LONG/SHORT')
BT.performance

Unnamed: 0,annual_return,annual_vol,sharpe,maxdd,annual_turnover
2010,0.597084,0.069533,19.732184,0.006034,52.407982
Total,2.923891,0.069533,19.732184,0.006034,153.031309


### Comments:
 The Sharpe ratio of individual factors ranges from approximately 4 to 8. Given that I selected factors with low correlations (some even exhibiting negative correlations), the combined factor appears to demonstrate unreasonably strong performance. From my perspective, this result seems distorted due to the very short backtest horizon. Additionally, it is challenging to split the data into in-sample and out-of-sample sets given the limited number of timestamps. Furthermore, trading fees are not considered in this analysis. Incorporating trading fees would likely degrade performance, as indicated by the high turnover ratio of this strategy.