In [64]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict

def symbol_to_path(symbol, base_dir="AdjDaily"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates,col):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'TASI' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'TASI')
    dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=['Date'],date_parser=dateparse, usecols=['Date', col ], na_values=['nan'])
        df_temp = df_temp.rename(columns={col: symbol})
        df = df.join(df_temp)

        if symbol == 'TASI':  # drop dates SPY did not trad
            df = df.dropna(subset=["TASI"])

    return df

def load_df():
    dates = pd.date_range('01/01/2002', '01/01/2017')
    N= (dates[-1]-dates[0])/365
    N = str(N).split()[0]
    files = os.listdir("AdjDaily")
    symbols=[]
    for name in files:
        if name[0].isdigit():
            symbols.append(name.split('.')[0])
    df = get_data(symbols, dates, 'Close')
    df.to_pickle('database.pkl')
    return df
def normalize_data(df):
    return df/df.iloc[0,:]

def compute_daily_returns(df):
    daily_returns = (df/df.shift(1))-1
    daily_returns = daily_returns[1:]
    return daily_returns

def stats(df,period):
    if period == 'W':
        p = 52.0
    if period == 'M':
        p = 12.0
    if period == 'D':
        p = 365.0
    cagr = (df.iloc[-1]/df.iloc[0])**(1.0/(len(df)/p)) - 1.0
    dr = compute_daily_returns(df)
    sharpe = np.sqrt(p) * dr.mean() / dr.std()
    print (' CAGR(%) = ' + str(cagr * 100))
    print (' Sharpe ratio = ' + str(sharpe))

def multi_period_return(period_returns):
    return np.prod(period_returns + 1) - 1

def pos_val(df,capital=100000,alloc=[]):
    if alloc == []:
        alloc= 1.0/len(df.columns)
    return (normalize_data(df) * alloc * capital).sum(axis=1)

In [65]:
try:
    df = pd.read_pickle('AdjustedClose.pkl')
except:
    print ('No pkl')
    df = load_df()


In [67]:
df = df[['AdjClose']].unstack(level=0)
df = df.resample('M').mean()
# df = df.dropna(subset=["TASI"])
# tasi = df['TASI'].copy()
# df = df.drop('TASI',axis=1)

returns = compute_daily_returns(df)

In [68]:
returns.head()

Unnamed: 0_level_0,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose
Symbol,1010,1020,1030,1040,1050,1060,1080,1090,1120,1140,...,8240,8250,8260,8270,8280,8290,8300,8310,8311,8312
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2002-01-31,0.035651,0.044118,0.003951,0.042733,0.016842,0.041615,0.028334,0.00951,0.011382,,...,,,,,,,,,,
2002-02-28,0.083871,-0.027722,-0.038797,0.060936,0.021045,-0.029764,0.050492,0.006784,-0.055028,,...,,,,,,,,,,
2002-03-31,0.118246,0.069901,0.117975,0.065279,0.16047,0.064901,0.071964,0.091449,0.053627,,...,,,,,,,,,,
2002-04-30,0.090908,0.025443,0.068566,0.027462,0.078228,0.051961,0.068121,-0.009202,-0.020431,,...,,,,,,,,,,
2002-05-31,0.0883,0.002031,0.120621,-0.013396,0.034913,0.014763,0.086221,-0.013421,0.040621,,...,,,,,,,,,,


In [69]:
lookback = 11
holdPeriod = 3
test = returns.rolling(lookback).apply(multi_period_return)
test = test[lookback:]
mask = test.iloc[0].isnull()
test = test.loc[:,~mask]
print (test)
sorteddf = test.sort_values(by=test.index.values[0], ascending=False, axis=1)
print (sorteddf)

            AdjClose                                                    \
Symbol          1010      1020      1030      1040      1050      1060   
Date                                                                     
2002-12-31  0.197433  0.115895  0.240910  0.094301  0.227000  0.074082   
2003-01-31  0.183522  0.234546  0.367960  0.065033  0.282572  0.152747   
2003-02-28 -0.001608  0.110934  0.105709 -0.041285  0.102588  0.065096   
2003-03-31 -0.096613  0.137429  0.050312 -0.068336 -0.046892  0.010030   
2003-04-30 -0.049172  0.284493  0.040891 -0.009541 -0.020535  0.072637   
2003-05-31  0.016698  0.579839  0.118237  0.016638  0.020955  0.137999   
2003-06-30  0.081844  0.652677  0.133389  0.042916  0.017076  0.153169   
2003-07-31  0.069578  0.811764  0.123213  0.068466  0.057160  0.177173   
2003-08-31  0.149473  0.930043  0.180876  0.054117  0.183258  0.199822   
2003-09-30  0.193677  0.968762  0.289107  0.086894  0.216666  0.214146   
2003-10-31  0.261379  0.859191  0.3008

  app.launch_new_instance()


In [81]:
sorteddf = returns.sort_values(by=test.index.values[0], ascending=False, axis=1)
chunks = chunkify(sorteddf, 11)
chunks[0]

Unnamed: 0_level_0,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose,AdjClose
Symbol,4160,2040,2050,4030,3020,1120,3040,1020,3010,3060,...,8240,8250,8260,8270,8280,8290,8300,8310,8311,8312
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2002-01-31,,-0.111416,0.088183,-0.018018,0.01001515,0.011382,0.000678,0.044118,-0.000318,-0.009197,...,,,,,,,,,,
2002-12-31,0.078947,0.042723,0.033058,0.027436,0.02282688,0.020636,0.017822,0.017534,0.017014,0.015865,...,,,,,,,,,,
2003-11-30,0.102273,-0.02551,0.014,0.002807,2.220446e-16,0.006744,-0.013181,0.003269,-0.014832,-0.000602,...,,,,,,,,,,
2004-10-31,-0.044832,-0.028407,-0.035498,0.035704,0.04744662,0.136828,0.000954,0.029589,-0.017936,-0.003735,...,,,,,,,,,,
2005-09-30,0.218205,0.112218,0.003605,0.016359,-0.04604677,0.014031,-0.06682,0.039298,-0.01596,-0.021014,...,,,,,,,,,,
2006-08-31,0.319261,0.036193,-0.034798,-0.068028,-0.02504538,-0.010351,0.016629,-0.005081,-0.014565,-0.054005,...,,,,,,,,,,
2007-07-31,-0.050507,0.068613,0.030347,0.015875,-0.02036334,-0.005222,0.075015,-0.06401,-0.015203,0.109128,...,,,,,,,,,,
2008-06-30,-0.046124,0.060735,-0.019795,0.117395,-0.0259003,0.005635,-0.049962,-0.052323,-0.027351,-0.013552,...,,,,,,,,,,
2009-05-31,0.407304,0.113142,0.058167,0.133045,0.09163636,0.13406,0.167758,0.188081,0.169901,0.082903,...,,,,,,,,,,
2010-04-30,-0.201544,0.095579,0.027131,0.059914,0.03267369,0.045329,-0.009541,-0.028937,-0.034185,-0.021986,...,-0.013507,-0.034793,0.017836,-0.006259,-0.022085,,,,,


In [71]:

# This function creates chunks and returns them
def chunkify(lst,n):
    return [ lst[i::n] for i in np.arange(n) ]

chunks = chunkify(sorteddf.T, 6)


In [72]:
chunks[0]

Unnamed: 0_level_0,Date,2002-12-31 00:00:00,2003-01-31 00:00:00,2003-02-28 00:00:00,2003-03-31 00:00:00,2003-04-30 00:00:00,2003-05-31 00:00:00,2003-06-30 00:00:00,2003-07-31 00:00:00,2003-08-31 00:00:00,2003-09-30 00:00:00,...,2019-01-31 00:00:00,2019-02-28 00:00:00,2019-03-31 00:00:00,2019-04-30 00:00:00,2019-05-31 00:00:00,2019-06-30 00:00:00,2019-07-31 00:00:00,2019-08-31 00:00:00,2019-09-30 00:00:00,2019-10-31 00:00:00
Unnamed: 0_level_1,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AdjClose,6020,1.192963,1.287352,1.053377,0.217694,0.227573,0.374796,0.275408,0.236631,0.270403,0.721179,...,-0.142946,-0.094518,-0.066188,-0.053031,-0.134552,-0.025248,0.039886,0.05588,0.061873,-0.000367
AdjClose,4130,0.59678,0.60184,0.527529,0.298158,0.208036,0.486286,0.294709,0.395934,0.375852,0.637546,...,-0.010437,-0.028042,0.026758,0.002673,-0.112971,0.008374,0.187161,0.150434,0.062323,-0.163225
AdjClose,4100,0.365856,0.3109,0.064211,-0.004069,-0.001834,0.047933,0.008407,0.030924,0.073452,0.146842,...,0.016389,0.009995,0.019796,-0.03338,-0.132439,-0.155981,-0.1265,-0.135241,-0.170998,-0.192755
AdjClose,3010,0.257619,0.318583,0.186423,0.04802,0.080215,0.187468,0.288692,0.337615,0.349298,0.396794,...,-0.333005,-0.254374,-0.208902,-0.11416,-0.107212,0.005441,0.29773,0.518544,-0.273066,-0.31985
AdjClose,6030,0.22381,0.238,0.061581,-0.023199,0.109797,0.179979,0.120805,0.064872,0.034527,0.141905,...,,,,,,,,,,
AdjClose,6050,0.172742,0.190167,0.021208,-0.05801,-0.059877,0.002708,0.135777,0.110709,0.152227,0.202878,...,0.184369,0.142632,0.116003,0.124194,0.02382,0.094425,0.156547,0.284886,0.410004,-0.106056
AdjClose,6090,0.095121,0.115967,-0.095833,-0.282039,-0.167728,-0.108844,-0.205762,-0.225484,0.026423,0.082005,...,-0.197525,-0.191922,-0.192806,-0.074631,-0.147332,-0.191441,-0.215496,-0.224084,-0.166296,-0.133385
AdjClose,2080,0.057461,0.091418,0.035271,-0.013613,0.013376,0.183703,0.265151,0.253908,0.315504,0.410229,...,0.002579,0.029482,0.078613,0.117545,-0.018561,0.046243,0.183527,0.180384,-0.351982,-0.35146
AdjClose,2010,-0.028535,0.074632,0.019926,-0.068464,-0.0397,0.054474,0.226481,0.459402,0.939107,1.154143,...,0.196839,0.158321,0.078903,0.081767,-0.07214,-0.049427,-0.074917,-0.130009,-0.221283,-0.699844
AdjClose,1120,-0.10494,0.020313,-0.052822,-0.0481,0.072299,0.124287,0.278868,0.328939,0.443223,0.485028,...,0.397809,0.418333,0.297571,0.430571,0.297361,0.297614,0.300661,0.166406,0.04424,0.087793


In [73]:
for i in chunks:
    stats(pos_val(df.loc[:,i.T.columns]),'M')

 CAGR(%) = 6.826230191027927
 Sharpe ratio = 0.3666143940473547
 CAGR(%) = 7.1317385468402605
 Sharpe ratio = 0.38124284807910735
 CAGR(%) = 9.023210516087854
 Sharpe ratio = 0.4311152831484829
 CAGR(%) = 7.815926085444436
 Sharpe ratio = 0.40504191819920304
 CAGR(%) = 6.668914945452964
 Sharpe ratio = 0.37931704077642037
 CAGR(%) = 4.967757947438534
 Sharpe ratio = 0.31438599659451


In [74]:
deciles = pd.qcut(sorteddf.iloc[0], 10).values.codes

In [75]:
sorteddfT = sorteddf.T
sorteddfT['deciles'] =  deciles

In [76]:
sorteddfT

Unnamed: 0_level_0,Date,2002-12-31 00:00:00,2003-01-31 00:00:00,2003-02-28 00:00:00,2003-03-31 00:00:00,2003-04-30 00:00:00,2003-05-31 00:00:00,2003-06-30 00:00:00,2003-07-31 00:00:00,2003-08-31 00:00:00,2003-09-30 00:00:00,...,2019-02-28 00:00:00,2019-03-31 00:00:00,2019-04-30 00:00:00,2019-05-31 00:00:00,2019-06-30 00:00:00,2019-07-31 00:00:00,2019-08-31 00:00:00,2019-09-30 00:00:00,2019-10-31 00:00:00,deciles
Unnamed: 0_level_1,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AdjClose,6020,1.192963,1.287352,1.053377,0.217694,0.227573,0.374796,0.275408,0.236631,0.270403,0.721179,...,-0.094518,-0.066188,-0.053031,-0.134552,-0.025248,0.039886,0.05588,0.061873,-0.000367,9
AdjClose,6060,0.982554,1.018824,0.586394,-0.00527,-0.026036,0.04295,-0.246126,-0.353983,-0.330271,-0.292723,...,0.144784,0.050602,-0.12915,-0.188962,-0.146639,-0.124167,-0.109824,-0.041103,-0.086631,9
AdjClose,4140,0.877315,0.791042,0.63932,0.518288,0.535661,0.13616,0.154454,0.025746,0.107858,0.071334,...,0.525144,0.446234,0.585443,0.505442,0.459765,0.595948,0.567168,-0.235486,-0.460594,9
AdjClose,2210,0.65931,0.874276,0.531537,0.43028,0.35063,0.532107,0.378262,0.129787,0.135053,0.216115,...,0.586453,0.398452,0.05047,-0.172163,-0.245761,-0.174762,-0.183023,-0.314236,-0.349274,9
AdjClose,4080,0.644311,0.924953,1.28101,1.075142,0.483327,0.605743,0.75972,0.992155,1.227249,1.731824,...,-0.169497,-0.183879,-0.237173,-0.274772,-0.232577,-0.142859,-0.098091,-0.061636,-0.011546,9
AdjClose,4061,0.621324,0.73685,0.635472,0.223131,0.13761,0.182862,0.247789,0.291509,0.542075,1.093741,...,0.004031,-0.004537,-0.043283,-0.11855,-0.138454,-0.203612,-0.172326,-0.119383,-0.253249,9
AdjClose,4130,0.59678,0.60184,0.527529,0.298158,0.208036,0.486286,0.294709,0.395934,0.375852,0.637546,...,-0.028042,0.026758,0.002673,-0.112971,0.008374,0.187161,0.150434,0.062323,-0.163225,8
AdjClose,2040,0.507794,0.727183,0.585605,0.530084,0.514422,1.070688,1.516057,1.447615,1.231118,1.551264,...,0.004748,0.000863,-0.044714,-0.067421,0.043554,0.183692,0.298389,0.231288,0.24564,8
AdjClose,4040,0.471605,0.57101,0.370536,0.124448,0.023951,0.154581,0.405541,0.573025,0.855695,1.015748,...,-0.365123,-0.360416,-0.188556,-0.15877,-0.051629,0.012119,0.021091,0.175177,0.199264,8
AdjClose,3090,0.439192,0.526838,0.274952,-0.017826,0.066163,0.29142,0.424774,0.423503,0.449307,0.549021,...,-0.139962,-0.112508,-0.072241,-0.100558,-0.117914,0.048374,0.156769,0.200827,0.260945,8


In [77]:
sorteddfT.groupby('deciles')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fdb3efbdac8>