In [127]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def symbol_to_path(symbol, base_dir="AdjDaily"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates,col):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'TASI' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'TASI')
    dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=['Date'],date_parser=dateparse, usecols=['Date', col ], na_values=['nan'])
        df_temp = df_temp.rename(columns={col: symbol})
        df = df.join(df_temp)

        if symbol == 'TASI':  # drop dates SPY did not trad
            df = df.dropna(subset=["TASI"])

    return df

def load_df():
    dates = pd.date_range('01/01/2002', '01/01/2017')
    N= (dates[-1]-dates[0])/365
    N = str(N).split()[0]
    files = os.listdir("AdjDaily")
    symbols=[]
    for name in files:
        if name[0].isdigit():
            symbols.append(name.split('.')[0])
    df = get_data(symbols, dates, 'Close')
    df.to_pickle('database.pkl')
    return df


In [143]:
from collections import OrderedDict
try:
    df = pd.read_pickle('database.pkl')
except:
    print 'No pkl'
    df = load_df()
df = df.resample('W').mean()
tasi = df['TASI']
df = df.drop('TASI',axis=1)
print tasi
print df
# Slice the dataframe to Weeks
data = OrderedDict()
j=52
i=1
k=0
while j < len(df):
    data['Week'+str(i)] = df[k:j]
    k=j
    j+=52
    i+=1
i=1
pctReturn = OrderedDict()
while i <= len(data):
    mask = data['Week'+str(i)].iloc[0].isnull()
    data['Week'+str(i)] = data['Week'+str(i)].loc[:,~mask]
    data['Week'+str(i)] = data['Week'+str(i)].fillna(method='ffill')
    data['Week'+str(i)].to_excel('Week'+str(i)+'.xlsx')
    data['Week'+str(i)].min().to_excel('Min Week'+str(i)+'.xlsx')
    pctReturn['Week'+str(i)] = (data['Week'+str(i)].iloc[-1] / data['Week'+str(i)].min())-1
    pctReturn['Week'+str(i)]= pctReturn['Week'+str(i)].sort_values()
    i+=1
print pctReturn


2002-01-06    2452.046000
2002-01-13    2463.388333
2002-01-20    2449.485000
2002-01-27    2453.158333
2002-02-03    2456.481667
2002-02-10    2442.586667
2002-02-17    2416.956667
2002-02-24    2422.195000
2002-03-03    2441.676000
2002-03-10    3098.227143
2002-03-17    2540.946667
2002-03-24    2565.785000
2002-03-31    2569.696667
2002-04-07    3125.931429
2002-04-14    3426.831429
2002-04-21    4273.857500
2002-04-28    4043.858889
2002-05-05    2876.130000
2002-05-12    2878.645000
2002-05-19    2903.360000
2002-05-26    2907.536667
2002-06-02    2874.883333
2002-06-09    2852.418333
2002-06-16    2788.645000
2002-06-23    2754.385000
2002-06-30    2769.831667
2002-07-07    3246.300000
2002-07-14    2784.898333
2002-07-21    2733.485000
2002-07-28    2687.991667
                 ...     
2016-06-12            NaN
2016-06-19    6573.588000
2016-06-26    6534.784000
2016-07-03    6486.365000
2016-07-10            NaN
2016-07-17    6677.890000
2016-07-24    6616.800000
2016-07-31  

OrderedDict([('Week1', 6010    0.000000
4070    0.000000
4010    0.002732
1090    0.025698
2100    0.030400
2160    0.032819
2140    0.034380
2110    0.038886
2070    0.038945
2080    0.044915
2010    0.045977
3040    0.049469
2020    0.053134
4030    0.055276
1120    0.073394
1060    0.076404
4090    0.085271
3080    0.086909
3020    0.093665
1040    0.097233
6090    0.098039
4020    0.137893
3050    0.141098
1010    0.149719
1080    0.163097
1020    0.168067
2060    0.177778
6030    0.200000
1050    0.209423
6040    0.215926
6050    0.230427
3010    0.236842
2120    0.241214
3060    0.279809
1030    0.285714
6070    0.286747
4050    0.296680
2090    0.299065
2130    0.326316
2050    0.362416
2170    0.375000
3030    0.377792
4100    0.404059
4150    0.424444
3090    0.438272
4040    0.454545
4110    0.458333
4130    0.601476
4061    0.621324
2040    0.692351
2210    0.857741
4140    1.000000
6060    1.078125
6020    1.201923
dtype: float64), ('Week2', 2230    0.080338
4180    0.11568

In [148]:
# Divide to Quartiles and get the last week price
w=1
qNum=1
q=4
j=0
k=1
quartiles = OrderedDict()
capital = 100000
while w <= len(pctReturn):
    numInQuartile = np.round(len(pctReturn['Week'+str(w)])/float(q))
    i=0
    while qNum <= q:
        x = int(numInQuartile*qNum)
        quartiles['Week'+str(w)+'Q'+str(qNum)] = [pctReturn['Week'+str(w)][int(j):x].index]
        qNum+=1
        j+=numInQuartile
#     qNum=1
#     while i < len(pctReturn['Week'+str(w)]):
#         price = data['Week'+str(w)][pctReturn['Week'+str(w)].index[i]].iloc[-1]
#         index = pctReturn['Week'+str(w)].index[i]
#         quartiles['Week'+str(w)+'Q'+str(qNum)].loc[index]=price

#         if k == numInQuartile:
#             qNum+=1
#             k=0
#         if qNum > q:
#             qNum=q
#         i+=1
#         k+=1
    w+=1
    qNum=1
    j=0
    k=1
print(pctReturn)               

OrderedDict([('Week1', 6010    0.000000
4070    0.000000
4010    0.002732
1090    0.025698
2100    0.030400
2160    0.032819
2140    0.034380
2110    0.038886
2070    0.038945
2080    0.044915
2010    0.045977
3040    0.049469
2020    0.053134
4030    0.055276
1120    0.073394
1060    0.076404
4090    0.085271
3080    0.086909
3020    0.093665
1040    0.097233
6090    0.098039
4020    0.137893
3050    0.141098
1010    0.149719
1080    0.163097
1020    0.168067
2060    0.177778
6030    0.200000
1050    0.209423
6040    0.215926
6050    0.230427
3010    0.236842
2120    0.241214
3060    0.279809
1030    0.285714
6070    0.286747
4050    0.296680
2090    0.299065
2130    0.326316
2050    0.362416
2170    0.375000
3030    0.377792
4100    0.404059
4150    0.424444
3090    0.438272
4040    0.454545
4110    0.458333
4130    0.601476
4061    0.621324
2040    0.692351
2210    0.857741
4140    1.000000
6060    1.078125
6020    1.201923
dtype: float64), ('Week2', 2230    0.080338
4180    0.11568

In [150]:
cond = [c for c in data['Week2'].columns if c not in quartiles['Week1Q1'][0]]
print cond
test = data['Week2'].drop(cond,axis=1)
print len(test.columns)
def normalize_data(df):
    return df/df.ix[0,:]

port = normalize_data(test) * (1.0/14.0) * 100
print port
tasi_port = normalize_data(tasi[52:104]) * 100
port_val = port.sum(axis=1)
print port_val
print tasi_port

['1010', '1020', '1030', '1040', '1050', '1060', '1080', '1120', '2030', '2040', '2050', '2060', '2090', '2120', '2130', '2150', '2170', '2190', '2210', '2220', '2230', '2240', '3010', '3020', '3030', '3050', '3060', '3080', '3090', '4020', '4040', '4050', '4061', '4080', '4090', '4100', '4110', '4130', '4140', '4150', '4160', '4170', '4180', '5110', '6020', '6030', '6040', '6050', '6060', '6070', '6090']
14
                1090       2010       2020       2070       2080       2100  \
2003-01-05  7.142857   7.142857   7.142857   7.142857   7.142857   7.142857   
2003-01-12  7.269972   7.338017   7.336343   7.241059   7.243816   7.325073   
2003-01-19  7.186858   7.377049   7.368591   7.275876   7.218576   7.397959   
2003-01-26  7.108634   7.396565   7.110609   7.316050   7.306916   7.033528   
2003-02-02  6.930185   7.142857   6.699452   7.252665   7.092378   6.647230   
2003-02-09  6.825071   6.947697   6.659142   7.206242   7.041898   6.559767   
2003-02-16  6.930185   7.318501   6

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  
