In [None]:

import pandas as pd
import cvxpy as cvx
import numpy as np

frame = 20 #for limiting the range of optimizations, 1 year
frequency = "W-THU"; lb = 20 # initial value for lookback
min_gross=0.5; max_gross=1; min_w=0; max_w=0.1 # default optimization vars
active_etf = None # ETF name OR None for broad market
consol_px=pd.read_pickle("consol_px.pkl")

In [None]:
def last_allocation(alloc, min_weight):
    last_alloc = alloc[-1:].T
    last_alloc.columns = ['Allocation']
    last_alloc = last_alloc[last_alloc[last_alloc.columns[0]] > min_weight]
    return last_alloc


def port_metrics(px, rec):
    # this is supposed to be the righ way to calculate the portfolio risk
    px.sort_index(inplace=True)
    returns = px[rec.index.tolist()].pct_change()
    mean_daily_returns = returns.mean()
    cov_matrix = returns.cov()
    weights = np.asarray(rec.values)
    mult = len(mean_daily_returns)
    #port_return = np.sum(mean_daily_returns.values * weights) * mult # bug fix
    port_return = np.dot(mean_daily_returns.values, weights) * mult
    port_risk = np.sqrt(np.dot(weights.T, np.dot(gb_matrix.values, weights))) * np.sqrt(mult)
    return port_return[0], port_risk[0][0]

In [None]:

cleanmin = lambda x: max(float(x), 1)
def clean_nas(df):
    cols = df.count().sort_values()[df.count().sort_values() < 1].index.tolist()
    df = df.drop(cols, axis=1)
    df.fillna(method='pad', inplace=True)
    df.fillna(method='bfill', inplace=True)
    df = df.applymap(cleanmin)
    return df

cols = consol_px.count().sort_values()[consol_px.count().sort_values() < 1].index.tolist()

clean_nas(consol_px)

In [None]:


def date_intervals(df, freq):
    #using pandas
    return df.resample(freq, closed='left', label='left').mean()

# Mean variance optimization
def get_mean_variance(rets):
    w_len = rets.shape[1] # number of columns
    eq_weights = np.asarray([1/w_len for _ in range(w_len)]) #default weights
    mu = rets.mean()
    std_dev = rets.std()
    cov_matrix = rets.cov()
    return w_len, eq_weights, mu.values, std_dev, cov_matrix.values  


def get_mvo_allocations(n, mu_ret, cov_mtrx, min_sum, max_sum, min_w, max_w, gamma_val):
    mu = mu_ret.T
    Sigma = cov_mtrx
    w = cvx.Variable(n)
    gamma = cvx.Parameter(sign='positive')
    ret = mu.T * w 
    risk = cvx.quad_form(w, Sigma)
    prob = cvx.Problem(cvx.Maximize(ret - gamma*risk), 
        [cvx.sum_entries(w) >= min_sum, cvx.sum_entries(w) <= max_sum, 
         w > min_w, w < max_w])
    gamma.value = gamma_val
    prob.solve()
    if prob.status == 'optimal': 
        return [i[0] for i in w.value.tolist()]

def get_weights(px, freq, lb, min_sum, max_sum, min_w, max_w, gamma):
    px = clean_nas(px)
    returns = px.sort_index().pct_change(); returns.iloc[0] = 0
    intervals = pd.to_datetime(date_intervals(returns, freq).index.tolist())
    valid_dates = [d for d in intervals if d in returns.index]    
    hist_alloc = pd.DataFrame(np.zeros((returns.shape)), index=returns.index, columns=returns.columns)
    for i in valid_dates:
        lb_returns = returns.loc[:i.date()].tail(lb).dropna()
        weights = np.array([0 for _ in range(len(returns.columns))])
        if (len(lb_returns) > 2):
            n, weights, mu_ret, std_dev, cov_mtrx = get_mean_variance(lb_returns)
            weights = get_mvo_allocations(
                n, mu_ret, cov_mtrx, min_sum, max_sum, min_w, max_w, gamma)
        hist_alloc.loc[i.date()] = weights
    hist_alloc = hist_alloc.loc[returns.index].replace(0, np.nan).fillna(method='ffill')
    hist_alloc.replace(np.nan, 0, inplace=True)
    return returns, hist_alloc


def calc_port_performance(arr, weights):
    return np.cumprod(np.sum(arr * weights, axis=1) + 1)


def recommend_allocs(px, frame, lb, freq, min_sum, max_sum, min_w, max_w, gamma):
    px = clean_nas(px)
    px_portion = px[-abs(frame):].copy() 
    returns, alloc = get_weights(
        px_portion, freq, lb, min_sum, max_sum, min_w, max_w, gamma)
    port_perf = calc_port_performance(returns.values, alloc.values)
    pdf = pd.DataFrame(port_perf, index=returns.index, columns=["M2-cvxopt"])
    return px_portion, returns, alloc, pdf

In [None]:

# Finds the best gamma risk parameter
SAMPLES = 100
gamma_vals = np.logspace(-2, 3, num=SAMPLES)
opt_cols = ["Return", "Risk", "Sharpe"]
opt_df = pd.DataFrame([], index=gamma_vals, columns=opt_cols)

for i in gamma_vals:
    px_portion, _, alloc, pdf = recommend_allocs(
        consol_px, frame, lb, frequency, min_gross, max_gross, min_w, max_w, i)
    rec = last_allocation(alloc, 0.01)
    ret, risk = port_metrics(px_portion, rec)
    opt_df.loc[i][opt_cols] = [ret, risk, ret / risk]

top = 5; best_gamma_ports = opt_df.sort_values(by='Sharpe', ascending=False).head(top)
top_gammas = best_gamma_ports.index.tolist(); gamma_val = top_gammas[0]

In [None]:

def portfolio_metrics(name, pdf):
    timespan = len(pdf)
    ret = (pdf.pct_change().mean() * timespan).values[0]
    std = (pdf.pct_change().std() * sqrt(timespan)).values[0]
    if log: print(p_template.format(name, ret, std, ret / std))
    return ret, std, ret / std

portfolio_metrics('Benchmark', px_spy.loc[pdf.index]);
best_gamma_ports.head(top)
# Creates matrix of lookbacks and weights to determine the best combination
lbs = [x for x in range(5, 25, 5)]
ws = [y/100 for y in [x for x in range(5, 20, 5)]]

mtx_cols = ["Lookback", "Weight"]
mtx_cols.extend(opt_cols)
mtx_df = pd.DataFrame([], index=range(len(lbs) * len(ws)), columns=mtx_cols)

log = True; i = 0
for l in lbs:
    for w in ws:
        px_portion, _, alloc, _ = recommend_allocs(
            consol_px, frame, l, frequency, min_gross, max_gross, min_w, w, gamma_val)
        rec = last_allocation(alloc, 0.01); tickers = rec.index.tolist()
        ret, risk = port_metrics(px_portion, rec)
        mtx_df.loc[i][mtx_cols] = [l, w, ret, risk, ret / risk]
        i += 1

In [None]:

mtx_df = mtx_df.sort_values(by='Return', ascending=False)
lb, max_w = mtx_df.iloc[0]['Lookback'], mtx_df.iloc[0]['Weight']
print("Gamma: {0:.2f}, Lookback: {1}, Max Weight: {2}".format(gamma_val, lb, max_w))


# In[11]:

# what is the difference between port with gammas 1..5?
def rr_portfolios(g_list):
    best_ports = pd.DataFrame([], index=g_list)
    for g in g_list:
        _, _, alloc, _ = recommend_allocs(consol_px, frame, lb, frequency, min_gross, max_gross, min_w, max_w, g)
        rec = last_allocation(alloc, 0.01);
        df1 = pd.DataFrame(rec.T.values, index=[g], columns=rec.index.tolist())
        best_ports = best_ports.combine_first(df1)
    return best_ports

In [None]:

import os

universe = 'spy-sectors' 
component_path = "/Users/Yes/Quantum/quantum/sector_components/"
sector_tickers_map = {}
ticker_map = {
    'benchmark': ['SPY'],
    'equity': ['VTI','VTV','VOE','VBR','VEA','VWO'],
    'fixed_income': ['VTIP', 'SHV', 'MUB', 'LQD', 'BNDX', 'EMB'],
    'spy-sectors': ['XLE', 'XLU', 'XLK', 'XLB', 'XLP', 'XLY', 'XLI', 'XLV', 'XLF', 'XLRE'],
    'ark-etfs': ['ARKG', 'ARKK', 'ARKQ', 'ARKW']
}


config = {
    'spy-sectors' : {
        'hold_cols': ['Symbol','Company Name', 'Index Weight'],
        'hold_format': r'holdings-spy-',
        'idx_col': 'Symbol',
        'fname': 'spy-sectors',
        'skiprows': 1
    },
    'ark-etfs' : {
        'hold_cols': ['ticker','company', 'weight(%)'],
        'hold_format': r'holdings-ark-',
        'idx_col': 'ticker',
        'fname': 'ark-etfs',
        'skiprows': 0
    }
}

hold_cols = config[universe]['hold_cols']
hold_format = config[universe]['hold_format']
idx_col = config[universe]['idx_col']
fname = config[universe]['fname']
skiprows = config[universe]['skiprows']

companies = pd.DataFrame([])


In [None]:

def clean_idx(df, s):
    dfidx = df.index.dropna()
    df = df.loc[dfidx].copy()
    rows = df[df.index.str.contains(s) == True]
    if len(rows) > 0:
        idx = df[df.index.str.contains(s) == True].index
        df = df.drop(idx, axis=0)
    return df

# Load component from ETF holding CSVs
col_names = ['Symbol','Company', 'Weight']
def load_components(cos, pattern, cols, idxcol, sectors, srows=1):
    flist = os.listdir(component_path)
    files = [f for f in flist if f.startswith(pattern)]
    for s in sectors:
        fname = component_path + pattern + s.lower() + '.csv'
        print(fname)
        df = pd.read_csv(fname, skiprows=srows, index_col=idxcol, usecols=cols)
        df.index.name = col_names[0]
        df.columns = col_names[1:]
        df = clean_idx(df, ' ')
        df['ETF'] = s
        sector_tickers_map[s] = df.index.tolist()
        cos = cos.append(df)
    return cos

companies = load_components(
    companies, hold_format, hold_cols, 
    idx_col, ticker_map[universe], srows=skiprows)

In [None]:
if(active_etf != None):
    companies = companies[companies['ETF']==active_etf] # filter by selected ETF

# run optimization with best gamma, returns initial weights
px_portion, returns, alloc, pdf = recommend_allocs(
    consol_px, frame, lb, frequency, min_gross, max_gross, min_w, max_w, gamma_val)
rec = last_allocation(alloc, 0.01)
tickers = rec.index.tolist()
recommend = rec.copy()
cos_no_dup = companies.drop_duplicates(subset='Company')
recommend['Sector'] = cos_no_dup.loc[tickers]['ETF']
recommend['Company'] = cos_no_dup.loc[tickers][['Company']]
recommend = recommend.astype({"Allocation": np.float})
recommend[['Company', 'Sector', 'Allocation']]
recommend


In [None]:

# plots all tickers to verify if there are any M&A targets
for t in recommend.sort_index().index:
    consol_px[t][-frame:].plot()
    plt.title(companies.loc[t]['Company'])
    plt.show()


In [None]:

def sect_group_stats(col):
    re_group = recommend.groupby(by=col)
    print("Total % Allocation {0:.2f}".format(recommend.Allocation.sum() * 100));
    sector_cols = ['Sector Weight', 'Avg Position']
    sector_df = pd.DataFrame([], index=pd.unique(recommend[col]), columns=sector_cols)
    sector_df[sector_df.columns[0]] = re_group.sum()
    sector_df[sector_df.columns[1]] = re_group.mean()
    return sector_df

sect_group_stats('Sector')


recommend.to_pickle("recommend.pkl")
px_portion.to_pickle("px_portion.pkl")
rec.to_pickle("rec.pkl")
