In [1]:
import numpy as np
import pandas as pd
import json
import yfinance as yf
from datetime import datetime
from cvxopt import matrix, solvers
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np

def simplex_projection_selfnorm2(v,b):
    print(type(v))
    while (max(abs(v)) > 1e6):
        v = v / 10
        break
    u = np.sort(v)[::-1]
    sv = np.cumsum(u)
    c = np.array(range(1, len(u)+1, 1))
    sample = u - (sv - b) / c
    sample = sample[sample>0]
    rho = np.argmin(sample)
    theta = (sv[rho] - b) / (rho+1)
    w = np.maximum(v - theta, 0)
    return w

In [126]:
def simplex_projection_selfnorm2(v,b=1):
    ticker = v.index
    nstk = v.shape[0]
    Q = (0 * matrix(np.identity(nstk)))
    p = matrix(v)
    G = matrix(-1 * np.identity(nstk))
    h = matrix(np.zeros(shape=(nstk, 1)))
    A = matrix(np.ones(shape=(1, nstk)))
    b = matrix(np.ones(shape=(1, 1)))
    sol = solvers.qp(Q, p, G, h, A, b)
    x = np.array(sol['x']).reshape(nstk)
    w = pd.Series(x,index=ticker)
    return w

In [4]:
with open("sp500-historical-components.json",'r') as load_f:
    load_dict = json.load(load_f)

In [5]:
load_dict

[{'Date': '2017/05/03',
  'Symbols': ['AAPL',
   'MSFT',
   'AMZN',
   'FB',
   'XOM',
   'JNJ',
   'BRKB',
   'JPM',
   'GOOGL',
   'GOOG',
   'GE',
   'WFC',
   'BAC',
   'T',
   'PG',
   'CVX',
   'PFE',
   'HD',
   'VZ',
   'CMCSA',
   'INTC',
   'MRK',
   'V',
   'PM',
   'CSCO',
   'KO',
   'C',
   'UNH',
   'DIS',
   'PEP',
   'MO',
   'IBM',
   'ORCL',
   'AMGN',
   'MMM',
   'MCD',
   'MDT',
   'WMT',
   'MA',
   'ABBV',
   'BA',
   'HON',
   'SLB',
   'CELG',
   'PCLN',
   'BMY',
   'UNP',
   'AVGO',
   'UTX',
   'SBUX',
   'GILD',
   'GS',
   'USB',
   'CVS',
   'AGN',
   'QCOM',
   'TXN',
   'COST',
   'LLY',
   'TWX',
   'ABT',
   'ACN',
   'LOW',
   'UPS',
   'WBA',
   'NKE',
   'CHTR',
   'DOW',
   'MDLZ',
   'DD',
   'LMT',
   'NFLX',
   'ADBE',
   'TMO',
   'CB',
   'CL',
   'MS',
   'NEE',
   'NVDA',
   'AXP',
   'PNC',
   'CAT',
   'BIIB',
   'COP',
   'DUK',
   'AIG',
   'MET',
   'CRM',
   'GD',
   'PYPL',
   'AMT',
   'RAI',
   'KHC',
   'SPG',
   'EOG',
   'TJX',

In [4]:
d = {}
for i in load_dict:
    if i['Date'] == '2017/05/03':
        continue
    j = i['Date'].replace('/','-')
    d[j] = i['Symbols']
dd = set()
for i,v in d.items():
    dd = dd.union(set(v))
date_list_comp = list(d.keys())
date_list_comp.sort()

In [5]:
price_data = None
for name in dd:
    tmp = pd.read_csv('./data1/'+name+'.csv', index_col=0)[['Adj Close']]
    if price_data is None:
        price_data = tmp.copy()
        price_data.columns = [name]
    else:
        price_data[name] = tmp['Adj Close'].tolist()
ret = price_data/price_data.shift(1)
sma = price_data.rolling(5).mean()/price_data
pp = price_data.rolling(5).max()/price_data

In [6]:
def get_prediction_weights(date='2010-10-07', k=5, sigma=0.05):
    # select stock from composite data
    for i,v in enumerate(date_list_comp):
        if v>date:
            break
    selected_stock = set(d[date_list_comp[i-1]]+d[date_list_comp[i-2]])

    date_list = price_data.index.tolist()
    
    # fetch used data
    for i,v in enumerate(date_list):
        if v>=date:
            break
    used_price = price_data.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_ret = ret.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_price.dropna(axis=1, inplace=True)

    notnan_stocks = used_ret.columns.tolist()
    selected_stock = selected_stock.intersection(set(notnan_stocks))
    used_ret = ret.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_ret[used_ret>2] = np.nan
    used_ret[used_ret<0.5] = np.nan
    used_ret.dropna(axis=1, inplace=True)
    notnan_stocks = used_ret.columns.tolist()
    selected_stock = selected_stock.intersection(set(notnan_stocks))

    used_sma = sma.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_sma.dropna(axis=1, inplace=True)
    notnan_stocks = used_sma.columns.tolist()
    selected_stock = selected_stock.intersection(set(notnan_stocks))

    used_pp = pp.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_pp.dropna(axis=1, inplace=True)
    notnan_stocks = used_pp.columns.tolist()
    selected_stock = selected_stock.intersection(set(notnan_stocks))
    
    used_price = price_data.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_sma = sma.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_ret = ret.loc[date_list[i-k-5]:date_list[i-1], selected_stock]
    used_pp = pp.loc[date_list[i-k-5]:date_list[i-1], selected_stock]

    # projection
    sma_weights = used_sma.apply(lambda x: simplex_projection_selfnorm2(x, 1), axis=1)
    pp_weights = used_pp.apply(lambda x: simplex_projection_selfnorm2(x, 1), axis=1)
    # calcualte representation historical return
    sma_return = (sma_weights.shift(1)*used_ret).sum(axis=1)
    pp_return = (pp_weights.shift(1)*used_ret).sum(axis=1)
    
    # minmax
    sma_min = sma_return.iloc[-k:].min()
    pp_min = pp_return.iloc[-k:].min()
    distance = np.exp(-((sma_return.iloc[-k:] - pp_return.iloc[-k:])**2).sum()/(2*sigma**2))
    if sma_min > pp_min:
        fi = [1, distance]
    else:
        fi = [distance, 1]
        
    return (sma_weights.iloc[-1,:] * fi[0]  +  pp_weights.iloc[-1,:] * fi[1]).to_frame().apply(lambda x: simplex_projection_selfnorm2(x,1),axis=0)

In [7]:
date_list = price_data.index.tolist()

In [8]:
yield_ = []
bmk = []
start = 100
end = 1000
ttt = 0
for date in date_list[start:end]:
    weights = get_prediction_weights(date)
    invest_stock = weights.index.tolist()
    ret1 = ret.loc[date, invest_stock]
    yield_.append((weights.iloc[:, 0] * ret1).sum())
    bmk.append(ret1.mean())
    
result = pd.DataFrame(yield_, index = date_list[start:end])
result.columns = ['s']
result['bmk']=bmk

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S

KeyboardInterrupt: 

In [None]:
result[result>1.2] = 1
result[result<0.8] = 1

In [1]:
(result).cumprod().plot()
plt.show()

NameError: name 'result' is not defined

In [297]:
(result-1).mean() / (result-1).std() * np.sqrt(252)

s      1.301297
bmk    0.968364
dtype: float64