In [17]:
import pandas as pd
import numpy as np
import glob
import os
from scipy import stats

# Momentum strategy 1: Selecting top and bottom performing stocks for long and short positions in the portfolio based on month-end observations.

### Read ticker data downloaded from quantquote

In [18]:
files = glob.glob("/home/home/vbox_shared/projects/Momentum-Trading/daily/*.csv")
df = pd.concat([pd.read_csv(fp,names=['date','0','open','high','low','close','volume'])
                .assign(ticker=os.path.basename(fp).replace('_','.').split('.')[1]) for fp in files])
df['date'] = pd.to_datetime(df['date'],format='%Y%m%d')
print (df.head())


        date  0     open     high      low    close        volume ticker
0 1998-01-02  0  23.5573  23.6455  23.2714  23.5573  7.108452e+06     ko
1 1998-01-05  0  23.5784  23.6031  23.0279  23.4479  1.004965e+07     ko
2 1998-01-06  0  23.2502  23.5149  23.2044  23.3808  7.462927e+06     ko
3 1998-01-07  0  23.2255  23.3596  22.9855  23.3385  7.102218e+06     ko
4 1998-01-08  0  23.2255  23.6455  23.2044  23.5361  8.561481e+06     ko


### Pivot to closing prices

In [3]:
close = df.pivot(index='date', columns='ticker', values='close')

In [42]:
close.iloc[:,:10].head()

ticker,a,aa,aapl,abbv,abc,abt,ace,acn,act,adbe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1998-01-02,,13.3511,3.95098,,6.50799,10.3555,22.9865,,32.06,4.99041
1998-01-05,,13.5853,3.8902,,6.40419,10.4031,22.8365,,33.63,5.05201
1998-01-06,,13.2817,4.60502,,6.28477,10.2311,23.018,,33.44,5.23685
1998-01-07,,13.3042,4.24032,,6.34839,10.288,23.1389,,32.69,5.15182
1998-01-08,,12.7533,4.39107,,6.38299,10.4507,22.7446,,33.38,5.20604


### Check data for survivor bias

In [20]:
a =close.isnull().sum()
a = a[a>0]

In [21]:
for i in a.index:
    if close[i].tail(20).isnull().sum() >0:
        print(i)


cvh
hnz
pcs


In [7]:
print(close['cvh'].tail(70))

date
2013-05-02    49.81
2013-05-03    50.19
2013-05-06    50.11
2013-05-07      NaN
2013-05-08      NaN
              ...  
2013-08-05      NaN
2013-08-06      NaN
2013-08-07      NaN
2013-08-08      NaN
2013-08-09      NaN
Name: cvh, Length: 70, dtype: float64


Includes ticker with NaN values at tail. All is well.

### For each month-end observation period, rank the stocks by previous returns, from the highest to the lowest

In [8]:
monthly = close.resample('M').last()
returns = np.log(monthly) - np.log(monthly.shift(1))
prev_returns = returns.shift(1)
lookahead_returns = returns.shift(-1)

In [9]:
# rank the stocks by prev returns from highest to lowest
def rank_(prev_returns, n):
    top_prices = pd.DataFrame(0, index=prev_returns.index, columns=prev_returns.columns)
    for index, col in prev_returns.iterrows():
        top_prices.loc[index, col.nlargest(n).index] = 1
    return top_prices


### Select the top performing stocks for the long position, and the bottom performing stocks for the short position

In [10]:
# select top for long, bottom for short
long = rank_(prev_returns, 10)
short = rank_(-1 * prev_returns, 10) 


In [45]:
# calculate expected portfolio returns
port_ret = (long * lookahead_returns - short * lookahead_returns)/(10+10)
port_ret.iloc[:,:10].head()

ticker,a,aa,aapl,abbv,abc,abt,ace,acn,act,adbe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1998-01-31,,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0
1998-02-28,,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0
1998-03-31,,0.0,-0.000456,,0.0,0.0,0.0,,0.0,0.0
1998-04-30,,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0
1998-05-31,,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0


In [32]:
(port_ret.T.sum() <0).astype(int).sum()

79

## statistical tests

In [12]:
def t_test(returns):
    t_value = stats.ttest_1samp(returns,0.0)[0]
    p_value = stats.ttest_1samp(returns,0.0)[1]/2
    if p_value < 0.05:
        result = "Strategy might work. proceed further"
    else:
        result = "rethink strategy. There doesn't seem to be an alpha factor involved"
    return t_value, p_value, result


In [13]:
port_ret.T.sum().mean()

0.0060848424554046255

In [14]:
(np.exp(port_ret.T.sum().mean() * 12 ) -1 ) * 100

7.575001799545111

In [15]:
t, p , result= t_test(port_ret.T.sum())

In [16]:
print('t-value: ',t,'\np-value: ',p,'\nconclusion: ',result)

t-value:  1.5072186565119008 
p-value:  0.06672104990276045 
conclusion:  rethink strategy. There doesn't seem to be an alpha factor involved
