In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sb
import empyrical as ep
import pyfolio as pf
from collections import OrderedDict
from IPython.display import display, HTML
%matplotlib inline

  'Module "zipline.assets" not found; mutltipliers will not be applied' +


In [2]:

def symbol_to_path(symbol, base_dir="AdjDaily"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates,col):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'TASI' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'TASI')
    dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=['Date'],date_parser=dateparse, usecols=['Date', col ], na_values=['nan'])
        df_temp = df_temp.rename(columns={col: symbol})
        df = df.join(df_temp)

        if symbol == 'TASI':  # drop dates SPY did not trad
            df = df.dropna(subset=["TASI"])

    return df

def load_df():
    dates = pd.date_range('01/01/2002', '01/01/2017')
    N= (dates[-1]-dates[0])/365
    N = str(N).split()[0]
    files = os.listdir("AdjDaily")
    symbols=[]
    for name in files:
        if name[0].isdigit():
            symbols.append(name.split('.')[0])
    df = get_data(symbols, dates, 'Close')
    df.to_pickle('database.pkl')
    return df
def normalize_data(df):
    return df/df.iloc[0,:]

def compute_daily_returns(df):
    daily_returns = (df/df.shift(1))-1
    daily_returns = daily_returns[1:]
    return daily_returns

def stats(df,period):
    if period == 'W':
        p = 52.0
    if period == 'M':
        p = 12.0
    if period == 'D':
        p = 365.0
    cagr = (df.iloc[-1]/df.iloc[0])**(1.0/(len(df)/p)) - 1.0
    dr = compute_daily_returns(df)
    sharpe = np.sqrt(p) * dr.mean() / dr.std()
    print (' CAGR(%) = ' + str(cagr * 100))
    print (' Sharpe ratio = ' + str(sharpe))

def multi_period_return(period_returns):
    return np.prod(period_returns + 1) - 1

def pos_val(df,capital=100000,alloc=[]):
    if alloc == []:
        alloc= 1.0/len(df.columns)
    return (normalize_data(df) * alloc * capital).sum(axis=1)

# This function creates chunks and returns them
def chunkify(lst,n):
    return [ lst[i::n] for i in np.arange(n) ]

def portfolio(df,holdPeriod,capital=100000):
    if isinstance(df,pd.DataFrame):
#         capital = df.shape[1] * 1000
        alloc = 1 / df.shape[1]
    else:
#         capital = 1000
        alloc = 1 
    postions = norm(df[:holdPeriod+1]) * alloc * capital * 0.99845 # Trading Commissions
    if isinstance(df,pd.DataFrame):
        portfolio = postions.sum(axis=1)
    else:
        portfolio = postions
    return portfolio

def norm(df):
    if isinstance(df,pd.DataFrame):
        mask = df.iloc[0].isnull()
        df = df.loc[:,~mask]
        mask = df.iloc[0]==0
        df = df.loc[:,~mask]
    df = df / df.iloc[0]
    return df

# https://github.com/quantopian/pyfolio/blob/13af662b5c3851ede1f1910bf3c3acfca3b53c52/pyfolio/utils.py#L170
def print_table(table,
                name=None,
                float_format=None,
                formatters=None,
                header_rows=None):
    """
    Pretty print a pandas DataFrame.
    Uses HTML output if running inside Jupyter Notebook, otherwise
    formatted text output.
    Parameters
    ----------
    table : pandas.Series or pandas.DataFrame
        Table to pretty-print.
    name : str, optional
        Table name to display in upper left corner.
    float_format : function, optional
        Formatter to use for displaying table elements, passed as the
        `float_format` arg to pd.Dataframe.to_html.
        E.g. `'{0:.2%}'.format` for displaying 100 as '100.00%'.
    formatters : list or dict, optional
        Formatters to use by column, passed as the `formatters` arg to
        pd.Dataframe.to_html.
    header_rows : dict, optional
        Extra rows to display at the top of the table.
    """

    if isinstance(table, pd.Series):
        table = pd.DataFrame(table)

    if name is not None:
        table.columns.name = name

    html = table.to_html(float_format=float_format, formatters=formatters)

    if header_rows is not None:
        # Count the number of columns for the text to span
        n_cols = html.split('<thead>')[1].split('</thead>')[0].count('<th>')

        # Generate the HTML for the extra rows
        rows = ''
        for name, value in header_rows.items():
            rows += ('\n    <tr style="text-align: right;"><th>%s</th>' +
                     '<td colspan=%d>%s</td></tr>') % (name, n_cols, value)

        # Inject the new HTML
        html = html.replace('<thead>', '<thead>' + rows)
    display(HTML(html))
    
percentage_formatter = '{:.2%}'.format
float_formatter = '{:.2f}'.format

In [3]:
sb.set()
sb.set_color_codes("deep")
# Change the default figure size
# https://stackabuse.com/python-data-visualization-with-matplotlib/
fig_size = plt.rcParams["figure.figsize"]  
fig_size[0] = 20  
fig_size[1] = 10  
plt.rcParams["figure.figsize"] = fig_size 

In [4]:
data = pd.read_pickle('AdjustedClose.pkl')

In [5]:
df = data.xs(slice('01/01/2010','30/09/2019'),level=1,drop_level=False)
df.head()
bh = df[['AdjClose']].unstack(level=0)
bh = bh.droplevel(0,1)
df = df[['Close']].unstack(level=0)
# returns = df.resample('BM').mean()
df = df.droplevel(0,1)
# returns = returns.droplevel(0,1)
# df = df.dropna(subset=["TASI"])
# tasi = df['TASI'].copy()
# df = df.drop('TASI',axis=1)         

In [61]:
lookback = 90
mom = (df - df.shift(lookback))*0.5

In [62]:
mom = mom[lookback:]
mask = mom.iloc[0].isnull()
mom = mom.loc[:,~mask]

In [8]:
porto = hold.sort_values(by=hold.index.values[0], ascending=False, axis=1)
porto.head()

Symbol,2360,2010,3030,2290,2020,4190,2040,4001,2240,8010,...,8100,8080,4160,4140,6050,2370,8040,8110,8220,2110
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-05-08,9.2,8.625,5.7,5.35,3.9,3.845,2.97,2.65,2.35,2.32,...,-5.12,-5.2,-7.4,-7.78,-7.945,-8.335,-9.35,-9.49,-12.75,-22.32
2010-05-09,9.8,9.375,5.975,5.9,4.1,4.125,3.83,2.925,2.7,2.48,...,-4.79,-4.95,-6.95,-6.565,-5.425,-7.845,-8.95,-9.32,-12.2,-20.44
2010-05-10,10.5,9.0,6.375,6.45,4.5,4.685,3.67,3.25,3.8,3.52,...,-3.675,-4.5,-6.8,-5.355,-5.43,-7.32,-8.65,-7.82,-9.575,-18.725
2010-05-11,9.6,8.125,6.275,5.75,4.2,4.875,3.52,3.15,4.0,4.0,...,-3.78,-4.3,-6.7,-5.555,-5.165,-7.695,-7.7,-6.785,-7.1,-20.275
2010-05-12,9.675,8.25,6.15,5.15,3.9,4.785,3.435,2.95,4.2,3.56,...,-4.185,-3.9,-6.8,-5.455,-5.425,-7.62,-7.75,-5.995,-7.75,-18.885


In [19]:
tickers = np.array(porto.columns)
tickers

array([2360, 2010, 3030, 2290, 2020, 4190, 2040, 4001, 2240, 8010, 1120,
       2320, 2280, 6001, 7020, 1050, 2050, 2230, 8030, 1090, 2260, 4100,
       2060, 3020, 1211, 2350, 2070, 4030, 2002, 2200, 1212, 4002, 3050,
       1010, 4280, 1080, 2270, 1040, 1060, 6090, 2300, 4150, 1030, 4250,
       2250, 2140, 7040, 4240, 2310, 4200, 2150, 6080, 4040, 3040, 2190,
       4080, 4090, 1140, 2001, 5110, 2130, 4300, 1210, 4220, 8210, 4020,
       4290, 4050, 2120, 1150, 1020, 4180, 2100, 3090, 6060, 8200, 4260,
       4010, 2330, 6040, 8070, 2170, 4270, 4130, 6020, 4110, 8170, 1310,
       4070, 8060, 8190, 2080, 8180, 2220, 4230, 2210, 2090, 3060, 8280,
       1320, 8090, 6010, 8250, 2380, 7010, 3010, 8140, 4210, 6070, 8150,
       7030, 2160, 8130, 4061, 3080, 4170, 2030, 8240, 8160, 2340, 8120,
       8020, 8050, 2180, 8230, 8100, 8080, 4160, 4140, 6050, 2370, 8040,
       8110, 8220, 2110], dtype=int64)

In [20]:
for array in chunkify(tickers,10):
    print(len(array))

14
14
14
14
14
13
13
13
13
13


In [33]:
holdPeriod = 90
quartiles = df.loc[porto.index.values[0]:]
quartiles = quartiles[tickers[14:28]]
test = portfolio(quartiles,holdPeriod)

In [64]:
buy = mom > 0
buy.apply(pd.value_counts,axis=1)

Unnamed: 0_level_0,False,True
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-05-08,84,51
2010-05-09,82,53
2010-05-10,81,54
2010-05-11,82,53
2010-05-12,78,57
2010-05-15,87,48
2010-05-16,87,48
2010-05-17,87,48
2010-05-18,85,50
2010-05-19,90,45


In [65]:
buy = buy*1
pos = buy.diff()
pos = pos.combine_first(buy)

In [99]:
hold = df[mom.columns][lookback:]
mask = hold.iloc[0].isnull()
hold = hold.loc[:,~mask]
cash_val = hold * pos * -1
stk_val = hold * buy

In [100]:
cash_val = cash_val.sum(axis=1) 
cash_val.iloc[0]=10000
cash_val = cash_val.cumsum()
mkt_val = stk_val.sum(axis=1)

In [101]:
cash_val

Date
2010-05-08    10000.00
2010-05-09     9911.95
2010-05-10     9896.45
2010-05-11     9857.63
2010-05-12     9754.29
2010-05-15    10088.01
2010-05-16    10031.03
2010-05-17    10019.84
2010-05-18     9923.09
2010-05-19    10197.34
2010-05-22    10275.75
2010-05-23    10637.16
2010-05-24    10781.29
2010-05-25    11158.72
2010-05-26    11027.76
2010-05-29    10636.00
2010-05-30    10747.17
2010-05-31    10699.43
2010-06-01    10784.58
2010-06-02    10762.25
2010-06-05    10834.62
2010-06-06    10860.50
2010-06-07    10684.85
2010-06-08    10885.40
2010-06-09    10655.38
2010-06-12    10212.71
2010-06-13    10390.03
2010-06-14    10363.38
2010-06-15    10284.35
2010-06-16    10260.81
                ...   
2019-08-19     9903.20
2019-08-20     9958.42
2019-08-21     9982.60
2019-08-22    10352.96
2019-08-25    10510.99
2019-08-26    10572.22
2019-08-27    10528.25
2019-08-28    10477.49
2019-08-29    10553.39
2019-09-01    10602.09
2019-09-02    10650.19
2019-09-03    10761.19
2019-0

In [102]:
mkt_val

Date
2010-05-08    1686.56
2010-05-09    1810.68
2010-05-10    1854.18
2010-05-11    1874.67
2010-05-12    1967.32
2010-05-15    1592.25
2010-05-16    1644.44
2010-05-17    1649.31
2010-05-18    1760.29
2010-05-19    1437.96
2010-05-22    1338.56
2010-05-23     969.92
2010-05-24     812.06
2010-05-25     390.33
2010-05-26     530.48
2010-05-29     940.97
2010-05-30     822.53
2010-05-31     876.72
2010-06-01     776.54
2010-06-02     802.00
2010-06-05     719.61
2010-06-06     693.48
2010-06-07     870.44
2010-06-08     671.57
2010-06-09     906.38
2010-06-12    1369.87
2010-06-13    1183.54
2010-06-14    1222.29
2010-06-15    1301.32
2010-06-16    1326.27
               ...   
2019-08-19    1918.31
2019-08-20    1870.83
2019-08-21    1834.52
2019-08-22    1452.51
2019-08-25    1268.95
2019-08-26    1211.04
2019-08-27    1279.36
2019-08-28    1315.16
2019-08-29    1224.58
2019-09-01    1179.20
2019-09-02    1141.46
2019-09-03    1025.31
2019-09-04    1224.60
2019-09-05    1105.98
2019-

In [None]:
from collections import defaultdict
holdPeriod = 30
returns = x[1000:]
epochs = int((np.floor(returns.shape[0])/(holdPeriod)))
print(epochs)
newCapital = 100000
deciles = defaultdict(dict)
for i in range(0,epochs):
    # First loop 
    if i == 0:
        mask = returns.iloc[0].isnull()
        hold = returns.loc[:,~mask]
        hold = hold[:holdPeriod]
        h = holdPeriod
    else:
        h = (holdPeriod * i)
        hold = returns[h:holdPeriod*(i+1)]
        mask = hold.iloc[0].isnull()
        hold = hold.loc[:,~mask]  
    tickers = hold.max().sort_values(ascending=False)
    tickers = tickers[tickers > 7].index
    print(tickers)
    prices = df[df.index.get_loc(hold.index.values[0]):]
    if i == 0:
    # If first loop use 100k as starting capital, else use the capital from last trade
        newCapital = 100000.0
    else:
        if len(deciles) != 0:
            newCapital = list(deciles.values())[-1]
            
        else:
            pass
    print(newCapital)
    if len(tickers) != 0:
        print('Hold Stocks')
        prices = prices[tickers]
    #     print(np.array(prices.columns))
        port = portfolio(prices,holdPeriod,newCapital)
        for index, row in port.iteritems():
            deciles.update({index:row})                
    else:
        print('Go Cash')
        prices = prices[:holdPeriod]
        port = pd.Series(index=prices.index,data=[newCapital]*len(prices.index))
        for index, row in port.iteritems():
            deciles.update({index:row})     
print(newCapital)
final = pd.Series(deciles)
fdr = compute_daily_returns(final)