In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict

def symbol_to_path(symbol, base_dir="AdjDaily"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates,col):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'TASI' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'TASI')
    dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=['Date'],date_parser=dateparse, usecols=['Date', col ], na_values=['nan'])
        df_temp = df_temp.rename(columns={col: symbol})
        df = df.join(df_temp)

        if symbol == 'TASI':  # drop dates SPY did not trad
            df = df.dropna(subset=["TASI"])

    return df

def load_df():
    dates = pd.date_range('01/01/2002', '01/01/2017')
    N= (dates[-1]-dates[0])/365
    N = str(N).split()[0]
    files = os.listdir("AdjDaily")
    symbols=[]
    for name in files:
        if name[0].isdigit():
            symbols.append(name.split('.')[0])
    df = get_data(symbols, dates, 'Close')
    df.to_pickle('database.pkl')
    return df
def normalize_data(df):
    return df/df.iloc[0,:]

def compute_daily_returns(df):
    daily_returns = (df/df.shift(1))-1
    daily_returns = daily_returns[1:]
    return daily_returns

def stats(df,period):
    if period == 'W':
        p = 52.0
    if period == 'M':
        p = 12.0
    if period == 'D':
        p = 365.0
    cagr = (df.iloc[-1]/df.iloc[0])**(1.0/(len(df)/p)) - 1.0
    dr = compute_daily_returns(df)
    sharpe = np.sqrt(p) * dr.mean() / dr.std()
    print (' CAGR(%) = ' + str(cagr * 100))
    print (' Sharpe ratio = ' + str(sharpe))

def multi_period_return(period_returns):
    return np.prod(period_returns + 1) - 1

def pos_val(df,capital=100000,alloc=[]):
    if alloc == []:
        alloc= 1.0/len(df.columns)
    return (normalize_data(df) * alloc * capital).sum(axis=1)

In [2]:
try:
    df = pd.read_pickle('database.pkl')
except:
    print ('No pkl')
    df = load_df()


In [3]:
df = df.resample('M').mean()
df = df.dropna(subset=["TASI"])
tasi = df['TASI'].copy()
df = df.drop('TASI',axis=1)

returns = compute_daily_returns(df)

In [4]:
lookback = 11
holdPeriod = 3
test = returns.rolling(lookback).apply(multi_period_return)
test = test[lookback:]
mask = test.iloc[0].isnull()
test = test.loc[:,~mask]
print (test)
sorteddf = test.sort_values(by=test.index.values[0], ascending=False, axis=1)
print (sorteddf)

  This is separate from the ipykernel package so we can avoid doing imports until


                1010      1020      1030      1040      1050      1060  \
2003-01-31  0.097989  0.234546  0.317989  0.018193  0.251666  0.099824   
2003-02-28 -0.044398  0.110934  0.093783 -0.081941  0.102588  0.031242   
2003-03-31 -0.140330  0.114771  0.002866 -0.070177 -0.046892 -0.025212   
2003-04-30 -0.099123  0.258905 -0.011536 -0.030308 -0.020535  0.025672   
2003-05-31 -0.036714  0.548368  0.061914 -0.004678  0.020955  0.088172   
2003-06-30  0.033992  0.619755  0.076303  0.021050  0.017076  0.102678   
2003-07-31  0.019476  0.775673  0.066640  0.046064  0.057160  0.144612   
2003-08-31  0.087658  0.891596  0.121399  0.018009  0.160606  0.153059   
2003-09-30  0.129485  0.929544  0.224179  0.045749  0.192510  0.161712   
2003-10-31  0.193546  0.822155  0.235320  0.045058  0.238828  0.191471   
2003-11-30  0.213667  0.796610  0.211902  0.031388  0.223602  0.206842   
2003-12-31  0.218254  0.893076  0.307118  0.109830  0.209466  0.277835   
2004-01-31  0.282288  1.010378  0.5042

In [5]:
test.index.values[0]

numpy.datetime64('2003-01-31T00:00:00.000000000')

In [6]:

# This function creates chunks and returns them
def chunkify(lst,n):
    return [ lst[i::n] for i in np.arange(n) ]

chunks = chunkify(sorteddf.T, 6)


In [7]:
chunks[0]

Unnamed: 0,2003-01-31 00:00:00,2003-02-28 00:00:00,2003-03-31 00:00:00,2003-04-30 00:00:00,2003-05-31 00:00:00,2003-06-30 00:00:00,2003-07-31 00:00:00,2003-08-31 00:00:00,2003-09-30 00:00:00,2003-10-31 00:00:00,...,2016-04-30 00:00:00,2016-05-31 00:00:00,2016-06-30 00:00:00,2016-07-31 00:00:00,2016-08-31 00:00:00,2016-09-30 00:00:00,2016-10-31 00:00:00,2016-11-30 00:00:00,2016-12-31 00:00:00,2017-01-31 00:00:00
6020,1.287352,1.053377,0.217694,0.227573,0.374796,0.275408,0.236631,0.270403,0.721179,0.616496,...,-0.334099,-0.278839,-0.331574,-0.197173,-0.168402,-0.321159,-0.491145,-0.416977,-0.130896,-0.02489
2040,0.727183,0.585605,0.530084,0.514422,1.070688,1.516057,1.447615,1.231118,1.551264,1.633058,...,-0.521699,-0.521739,-0.515319,-0.44393,-0.430722,-0.490403,-0.549958,-0.422745,-0.129238,-0.081443
3030,0.409434,0.298806,0.185894,0.165361,0.224424,0.195208,0.187341,0.226515,0.26464,0.26612,...,-0.352999,-0.342503,-0.293124,-0.237021,-0.202613,-0.203635,-0.233723,-0.152763,0.29129,0.209869
2050,0.323148,0.301644,0.173198,0.163043,0.363897,0.541114,0.529412,0.513228,0.568966,0.665002,...,-0.449749,-0.478204,-0.472866,-0.416459,-0.43219,-0.432107,-0.420648,-0.318558,0.011604,0.069602
3010,0.262354,0.135829,0.026407,0.063369,0.130047,0.226376,0.272933,0.284052,0.329251,0.371788,...,-0.375314,-0.353417,-0.335054,-0.233404,-0.215596,-0.242849,-0.257349,-0.143008,0.118988,-0.018758
6070,0.223221,0.044974,0.046293,0.018886,0.045455,0.033563,0.03883,0.074136,0.191146,0.095317,...,-0.446184,-0.36688,-0.379547,-0.265329,-0.244357,-0.274577,-0.319184,-0.203605,0.394374,0.542705
4020,0.116248,-0.00847,-0.10085,-0.07518,0.150694,0.3086,0.386587,0.415616,0.566863,0.514682,...,-0.526626,-0.507368,-0.400517,-0.229846,-0.11677,-0.131564,-0.274025,-0.130529,0.328486,0.568182
2070,0.061724,0.032241,0.056575,0.057052,0.305792,0.41014,0.377595,0.384549,0.428574,0.354834,...,-0.289436,-0.197971,-0.192625,-0.035231,-0.024573,-0.127623,-0.145647,0.015012,0.377883,0.436481
2110,0.026187,-0.061405,-0.182821,-0.151582,0.034083,0.109764,-0.000853,0.089403,0.13513,0.035461,...,-0.282951,-0.264735,-0.194018,-0.200753,-0.204434,-0.353666,-0.440594,-0.326227,-0.094056,-0.062882
1120,-0.013861,-0.052693,-0.080071,0.024008,0.073643,0.220839,0.268558,0.377858,0.417717,0.481701,...,-0.136107,-0.098341,-0.071709,-0.026782,0.028944,0.012452,0.038869,0.222425,0.286696,0.208147


In [8]:
for i in chunks:
    stats(pos_val(df.loc[:,i.T.columns]),'M')

 CAGR(%) = 10.306109312349164
 Sharpe ratio = 0.4617703819192823
 CAGR(%) = 16.073431932855687
 Sharpe ratio = 0.5280701297765142
 CAGR(%) = 7.88342659563237
 Sharpe ratio = 0.3998237845434888
 CAGR(%) = 8.514472410649109
 Sharpe ratio = 0.41695994761516536
 CAGR(%) = 7.678804942756212
 Sharpe ratio = 0.38985444934923064
 CAGR(%) = 9.963315579440636
 Sharpe ratio = 0.4448976409868695


In [9]:
deciles = pd.qcut(sorteddf.iloc[0], 10).values.codes

In [10]:
sorteddfT = sorteddf.T
sorteddfT['deciles'] =  deciles

In [11]:
sorteddfT

Unnamed: 0,2003-01-31 00:00:00,2003-02-28 00:00:00,2003-03-31 00:00:00,2003-04-30 00:00:00,2003-05-31 00:00:00,2003-06-30 00:00:00,2003-07-31 00:00:00,2003-08-31 00:00:00,2003-09-30 00:00:00,2003-10-31 00:00:00,...,2016-05-31 00:00:00,2016-06-30 00:00:00,2016-07-31 00:00:00,2016-08-31 00:00:00,2016-09-30 00:00:00,2016-10-31 00:00:00,2016-11-30 00:00:00,2016-12-31 00:00:00,2017-01-31 00:00:00,deciles
6020,1.287352,1.053377,0.217694,0.227573,0.374796,0.275408,0.236631,0.270403,0.721179,0.616496,...,-0.278839,-0.331574,-0.197173,-0.168402,-0.321159,-0.491145,-0.416977,-0.130896,-0.02489,9
6060,1.018824,0.586394,-0.00527,-0.026036,0.04295,-0.079178,-0.032639,0.002869,0.059093,-0.004866,...,-0.497855,-0.51338,-0.421742,-0.35124,-0.455877,-0.586842,-0.423532,0.115175,0.166902,9
2210,0.874276,0.531537,0.43028,0.35063,0.532107,0.378262,0.129787,0.135053,0.216115,0.167204,...,-0.423715,-0.412649,-0.287418,-0.287768,-0.378953,-0.418885,-0.269886,0.141359,0.038378,9
4080,0.840525,1.180965,0.984127,0.418269,0.535316,0.68254,0.90478,1.14376,1.731824,1.174173,...,-0.293483,-0.337191,-0.252705,-0.2857,-0.319247,-0.380707,-0.369392,-0.180366,-0.054929,9
4140,0.791042,0.63932,0.518288,0.535661,0.111983,0.088275,-0.033055,0.04435,0.00992,0.00459,...,-0.203353,-0.198402,0.01952,0.005929,-0.19613,-0.360663,-0.202306,0.125221,-0.095141,9
4061,0.73685,0.635472,0.223131,0.13761,0.182862,0.247789,0.291509,0.542075,1.093741,1.034746,...,-0.326132,-0.357627,-0.267377,-0.164794,-0.306183,-0.467012,-0.339584,0.017109,-0.007897,9
2040,0.727183,0.585605,0.530084,0.514422,1.070688,1.516057,1.447615,1.231118,1.551264,1.633058,...,-0.521739,-0.515319,-0.44393,-0.430722,-0.490403,-0.549958,-0.422745,-0.129238,-0.081443,8
4130,0.60184,0.527529,0.298158,0.208036,0.486286,0.294709,0.395934,0.375852,0.637546,0.456615,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8
4040,0.498278,0.307085,0.072391,-0.023454,0.131429,0.371725,0.521786,0.795249,0.950088,0.826682,...,-0.535642,-0.513671,-0.345133,-0.273992,-0.321876,-0.408039,-0.287901,0.189147,0.48813,8
4150,0.489693,0.37412,0.261075,0.152292,0.158325,0.16731,0.842757,0.995937,1.127991,1.115829,...,-0.167648,-0.132243,0.11446,0.17718,0.091924,-0.044024,0.089831,0.339201,0.30764,8


In [12]:
sorteddfT.groupby('deciles')

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x000002885B710208>