In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
from IPython import display
import fix_yahoo_finance as yf
import pandas 
import numpy as np
from datetime import datetime
from datetime import timedelta
import os.path
import tensorflow as tf
import time
import json


def downloadAndMerge(names):
    downloaded = []
    for n in names:
        csv = 'csv/' + n + '.csv'
        temp = None
        if os.path.isfile(csv):
            temp = pandas.read_csv(csv, index_col=0)
        else:
            temp = yf.download(n)
            temp.to_csv(csv)
        nv = temp[['Close']]
        nv.columns = [n]
        downloaded.append(nv)
    
    for i in downloaded:
        if i.index.dtype != 'datetime64[ns]':
            i.index = i.index.astype('datetime64')
    
    merged = pandas.concat(downloaded, axis=1, join='outer')
    x = merged.values 
    i = len(x) - 1
    while i > 0:
        if np.isnan(x[i]).any():
            break
        i-=1
    print(merged.columns[merged.iloc[i].isna()][0])
    print(merged.iloc[i].name)
    merged = merged.iloc[i+1:].T
    return downloaded, merged


def getD(df):
    base = 10
    a = df.iloc[:, :-1]
    b = df.iloc[:, 1:]
    b.columns = a.columns = range(df.shape[1] - 1)
    r = (b - a) / a
    d = np.concatenate((np.ones([r.shape[0], 1]) * base,
                        np.add.accumulate(r * base, axis=1) + np.ones(r.shape) * base), axis=1)
    return d 


def toWeek(df, start):
    idx = datetime.strptime(start, '%b %d %Y')
    week = []
    names = []
    while idx <= df.columns[-1]:
        names.append(idx)
        if idx in df.columns:
            week.append(df[idx].values)
        else:
            found = False
            for i in range(1, 7):
                nidx = idx + timedelta(days=(i*-1))
                if nidx in df.columns:
                    week.append(df[nidx].values)
                    found = True
                    break
            if not found:
                week.append(np.empty(df[idx].shape))
        idx = idx + timedelta(days=7)
    
    ndf = pandas.DataFrame(week).T
    ndf.index = df.index
    ndf.columns = names
    return ndf


def findDrop(s, debug=True):
    drop = 1 - s / np.maximum.accumulate(s)
    i = np.argmax(drop) # end of the period
    if i == 0: return 0,0
    j = np.argmax(s[:i]) # start of period
    jump = 1
    while i + jump < len(s):
        if s[i + jump] >= s[j]:
            break
        jump += 1
        
    if debug:
        print('max drawback {:0.2f}%, from {:0.2f} to {:0.2f}'.format(drop[i] * 100, s[j], s[i]))    
        print('wait {} after max drop'.format(jump))
        print('drop lasting {}'.format(i - j))
        print('average drop', drop.mean())
    #pandas.DataFrame(drop).hist(bins=20)
    return drop[i] * 100, drop.mean()

def getCO(arr):
    r = arr[:, 1:] / arr[:, :-1] - 1
    co = np.zeros([len(r), len(r)])
    for a in range(len(r)):
        for b in range(a):
            x = r[a]
            y = r[b]
            co[a][b] = (x * y).sum() / np.linalg.norm(x) / np.linalg.norm(y)
    return np.absolute(co)

def anaTop(top, d, w, debug):
    x = np.abs(w)
    x.sort()
    selected = np.where(np.abs(w) >= x[-1 * top])[0]
    if len(selected) > 0:
        co = getCO(d[selected])
        t = d[selected] * np.expand_dims(w[selected], -1)
        s = t.sum(axis=0)
        s = s / s[0] * 10
        drop, meandrop = findDrop(s, debug)

        x = np.arange(s.shape[0])
        a, b = np.polyfit(x, s, 1)
        y = b + a * x
        if debug:
            #pandas.DataFrame(s).plot()
            print('slope {}, fit loss {}'.format(a, (np.power(s - y, 2)).sum() / y.shape[0]))
            print('co ', co)
            print(w[selected].T)
        return drop, meandrop, selected, a, co
    else:
        return 1, 1, 1, 0, []
    
def stat(s):
    drop, meandrop = findDrop(s, False)
    x = np.arange(s.shape[0])
    a, b = np.polyfit(x, s, 1)
    y = b + a * x
    fitloss = (np.power(s - y, 2)).sum() / y.shape[0]
    return drop, meandrop, a, fitloss
        
        
# regression loss
def reg_loss(s, v):
    a = tf.get_variable("a", [1], tf.float32, initializer=tf.constant_initializer(0))
    x = np.arange(int(v.shape[1]))
    y = a * x + 10
    return tf.reduce_sum(tf.pow(s - y, 2)) / int(y.shape[0]) , a

def drop_loss(s, v):
    length = v.shape[1]
    mask = np.ones([length,length], dtype=bool)
    zero = np.zeros([length,length])
    for i in range(length):
        for j in range(i):
            mask[j][i] = 0
    
    b = tf.constant(mask)
    y = tf.tile(s, [length])
    y = tf.reshape(y, [length,length])
    y = tf.where(b, y, np.full([length,length], np.NINF))
    y = tf.reduce_max(y, axis=1)
    return tf.reduce_max(1 - s / y)

def co_loss(w, co):
    return tf.reduce_sum(co * tf.abs(tf.expand_dims(w,0) * tf.expand_dims(w, -1)))

def getW(V, debug):
    w = tf.get_variable("weight", [V.shape[0]], tf.float32, initializer=tf.ones_initializer())
    v = tf.constant(V, tf.float32)
    co = tf.constant(getCO(V), tf.float32)
    
    #coloss = co_loss(w, co)
    _, selected = tf.math.top_k(tf.abs(w), 6)
    w = tf.gather(w, selected)
    v = tf.gather(v , selected)
    co = tf.gather(tf.gather(co, selected), selected, axis=1)
    coloss = tf.reduce_max(co)
    
    s = tf.reduce_sum(tf.expand_dims(w, -1) * v, axis=0)

    rl, a = reg_loss(s, v)
    losses = [100 * drop_loss(s, v), 200 * rl, 3 * coloss]
 
    key_loss = sum(losses)
    w_loss = (tf.reduce_sum(tf.abs(w)) - 1) ** 2
    std_loss = 0.1 - 3 * tf.nn.moments(tf.abs(w), axes=[0])[1]

    total_loss = key_loss + w_loss + std_loss
    
    
    optimizer = tf.train.AdamOptimizer(0.01)
    train_op = optimizer.minimize(total_loss)

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        start_time = datetime.now()
        
        lastSelect = None
        found = False
        for step in range(10000000):
            if step % 4000 == 0:
                if (datetime.now() - start_time).seconds > 180:
                    print('took too much time, abort, drop {}, meandrop {}'.format(drop, meandrop))
                    return [], []
                
                output_w = sess.run(w)                
                output_select = sess.run(selected)
            
                #reverse test 
                s = (V[output_select] *  np.expand_dims(output_w, -1)).sum(axis=0)
                s = s / s[0] * 10
                drop, meandrop, slope, fitloss = stat(s)
                max_co = sess.run(coloss)
                
                found = drop < 3 and fitloss < 0.01 and max_co < 0.6
                if not found and step % 12000 == 0:
                    print('seeking, current drop {}, fitloss {}, max_co {}'.format(drop, fitloss, max_co))
                
                if debug or found:
                    print(step)
                    print(output_w)
                    print(output_select)
                    output_losses = sess.run(losses)
                    print('drop loss, tensor {}, real {}'.format(output_losses[0], drop))
                    print('rl loss, tensor {}, real {}'.format(output_losses[1], fitloss))
                    print('max co ', max_co)
                    print('close to 1 loss ', sess.run(w_loss))
                    print('std loss ', sess.run(std_loss))
                    print('total loss', sess.run(total_loss))

                if found:
                    plt.figure()
                    plt.plot(list(range(len(s))), s)
                    display.display(plt.gcf())
                    time.sleep(1)
                    return output_select, output_w

            sess.run(train_op)
        return [],[]

In [None]:
search = ['SPY','AAPL','AMZN','QQQ','NFLX','MSFT','KEYS','TSLA','IWM','GOOG','EEM','GOOGL','FB','BABA','AMD','NVDA','BRK-B','XLF','IVV','SBUX','EFA','XOM','BKNG','XLI','SQ','GE','BAC','IYR','XLU','INTC','JPM','PG','XLP','IBM','EWZ','ATVI','CVX','HYG','WFC','C','VXX','XLK','FXI','DIS','XLE','MU','MRK','PFE','V','UNH','EMB','MCD','ABBV','QCOM','T','LLY','BA','CSCO','KO','JNJ','LGND','IEMG','WMT','ADBE','UNP','GLD','VZ','DIA','CMCSA','LIN','HD','XLV','UTX','XOP','TQQQ','ORCL','CME','CRM','BIDU','HON','TWTR','MA','RHT','CAT','SMH','ABT','AVGO','PEP','SYY','TXN','COST','GS','MDY','GDX','LQD','MS','MMM','OXY','IEFA','NKE','VWO','KHC','TLT','VNQ','EOG','DWDP','AMGN','XLY','LMT','WBA','VOO','CVS','CL','MO','SLB','VTI','RTN','VEA','GD','BDX','XLB','GILD','SQQQ','TEVA','PM','GM','EL','EWJ','AAL','ISRG','MCHI','DGAZ','F','PYPL','EA','TVIX','IJH','CSX','IJR','COF','MDT','KRE','NOC','ECA','AMT','AGG','NEE','BMY','ACHC','NXPI','STX','MET','NWL','CELG','COP','PBR','MPC','CMG','SCHW','TSM','SPLK','JNK','TJX','LRCX','ULTA','BIIB','APC','LOW','STZ','HAL','LUV','EBAY','MTUM','EMR','WCG','AIG','BLK','CGC','TGT','PNC','MDLZ','ROKU','SHV','VLO','CCI','BAX','ALGN','XBI','BKLN','HRS','MAR','REGN','CI','DAL','INTU','NSC','ZTS','ANTM','WTW','USB','WDC','PLCE','BP','W','EW','AXP','SHW','COG','EXC','EQIX','EWT','WMB','FDX','NVS','AGN','HCA','ABMD','SPOT','CZR','PH','LLL','ACN','ETN','LVS','AMAT','SYMC','TDOC','IWD','USO','TMO','M','UPS','ILMN','GPC','VRSN','DRI','DUK','X','CHTR','USFD','UNG','JD','WDAY','FOXA','ADP','GIS','UGAZ','BBT','NOW','FDC','CAG','BIL','IEF','UVXY','TWLO','UAL','PSA','WYNN','SO','MYL','ANET','INDA','D','K','AKAM','APD','CIEN','ORLY','HES','GDXJ','AABA','KLAC','BK','ESRX','EWY','DE','IQV','PSX','EWW','KSS','NUE','FEZ','AET','WM','EXPE','XRT','FLOT','RSX','IWO','DXC','BBY','UAA','IP','TDG','ICE','CLX','YUM','FMC','CTSH','LULU','FTNT','PCG','DLR','KR','SAGE','MSI','IWB','RL','RACE','TNA','KEY','DHR','SWKS','PRU','VALE','SPGI','GRUB','CXO','CB','AAP','PXD','MCHP','SRE','DHI','CMI','NFX','IGV','BHC','DVN','TTWO','CHD','SDS','ROST','XLNX','IWF','SHY','MLM','AME','VTV','TYL','KMI','CNC','NTES','URI','STT','HLT','TMUS','VCSH','BX','MRO','DG','RDS-A','APTV','ADI','SPG','ABX','OMC','SWK','AMRN','VIG','WPC','VMC','BRK-A','APH','DXCM','NOV','ITUB','ADM','GOVT','QRVO','LYB','QID','WEC','TEL','PE','BSX','HPQ','SHOP','OLED','PBYI','VO','HUM','VIAB','ESS','UPRO','VST','FISV','KMB','CHK','AMP','XRAY','ET','EZU','HCP','MFC','LEN','CCL','PANW','FE','PAYX','XEL','PNW','JCI','SYK','MCK','FLR','NRZ','OHI','IWN','AZN','AEP','PEG','ED','EXAS','FSLR','ROP','SWN','TTD','CTL','ALL','GWW','COL','RRC','MCO','PPG','CMA','SYF','WP','OKE','HBAN','GDDY','STI','WRK','VFC','XPO','NBL','OIH','A','PGR','APA','VEEV','ENB','NRG','SPXU','ADSK','FANG','FAST','NCLH','CF','MHK','EQT','ALXN','CBS','MOH','IBB','SPLV','HRL','FLT','SBAC','BLL','MMI','SSO','AMLP','FOX','SCZ','MGM','CE','HOG','PLD','SRPT','SIRI','SJM','MAS','DLTR','LB','DPZ','ACWI','XEC','TPR','BND','RCL','WBT','IAC','DISCA','KORS','ITW','TLRY','MKC','EZA','EXPD','DTE','BBD','NTR','MTCH','IFF','CMS','RF','VRTX','GPN','AVB','TRV','NEM','AZO','MNST','DVA','MOS','ZBH','WLTW','MELI','HST','TRIP','ARNC','FITB','AES','SIVB','FL','DFS','MTB','SHPG','AFL','JBGS','TAP','TIP','SOXX','XLRE','LEA','BHGE','BURL','TRGP','BUD','NTAP','KDP','EIX','NUGT','LNC','HSIC','IR','REZI','WU','HSY','ECL','HYLB','MUB','LABU','CTXS','PBR-A','EPD','CRI','CLR','RPM','IDXX','IPG','SNA','SPTM','IAU','BG','ABEV','SPXL','JBHT','TAL','RH','ETR','LH','TIF','SH','HDV','MXIM','FCX','TSN','EXR','NVR','JWN','ON','NBIX','SPXS','VGK','BHP','TFI','GLW','CTAS','IVW','O','PACB','USMV','HBI','WELL','PHM','PPL','HAS','GG','CPT','GDI','QLD','GSK','EXEL','Z','VXF','FCE-A','MSG','WPX','HIG','WY','JNPR','ANSS','XLC','ZION','BHF','WLL','BF-B','WB','YUMC','VOD','VCIT','VTR','HIIQ','KWEB','BLUE','AR','CHRW','MTD','ROK','TOL','WEN','FTV','WAT','GGB','PTC','WEX','WSM','MRVL','DKS','SCHO','VUG','ULTI','IVZ','SEAS','MBB','EFX','EMN','ALK','CNP','BSV','ABC','CP','CFG','FIS','PAYC','MSCI','SCHF','IVE','GLNG','CDW','SEE','BR','RIG','CAH','VER','RJF','TZA','PII','CPB','TD','AOS','UA','SNAP','RIO','EWH','IQ','VRSK','HRC','HPE','LNG','OSK','CLF','EQR','DATA','VT','CTRP','MPW','RDS-B','DVMT','TRU','ETFC','ASML','KBE','FFIV','CWB','TSCO','PCAR','AMED','WHR','VAR','BXP','SU','OLLI','JAZZ','HFC','L','S']
downloaded, merged = downloadAndMerge(search)

In [None]:
for i in downloaded:
    if i.index.dtype != 'datetime64[ns]':
        i.index = i.index.astype('datetime64')
        
merged = pandas.concat(downloaded, axis=1, join='outer')
merged = toWeek(merged.T, 'Jan 01 1999').T
merged.shape

In [None]:
step = 30 
pool = None
height = 1000000
totalBet = 0
winBet = 0
def update(names, w, i):
    global pool
    global height 
    global step 
    global totalBet
    global winBet 
    if totalBet > 0: print(winBet * 1.0 / totalBet)
    entry = merged[names].iloc[i].values
    if pool is not None:
        now = merged[pool[0]].iloc[i].values
        old = pool[2]
        c = now / old - 1
        change = c * pool[1] * height
        if change.sum() > 0:
            winBet += 1
        height = change.sum() + height
        print('out\t{}\tw\t{}\tchange\t{}\theight\t{}'.format(pool[0], pool[1], c, height))
    print('inn\t{}\tw\t{}\tchange\t{}\theight\t{}'.format(names, w, 0, height))
    pool = [names, w, merged[names].iloc[i].values]
    totalBet += 1

def clear(i):
    global pool
    global height 
    global step 
    global winBet 
    if pool is not None:
        now = merged[pool[0]].iloc[i].values
        old = pool[2]
        c = now / old - 1
        change = c * pool[1] * height
        if change.sum() > 0:
            winBet += 1
        height = change.sum() + height
        print('out\t{}\tw\t{}\tchange\t{}\theight\t{}'.format(pool[0], pool[1], c, height))
    pool = None

for i in range(30, merged.shape[0]):
    tf.reset_default_graph()
    temp = merged.iloc[i-step:i, :].dropna(axis=1).T
    print('i is {}, feed data from {} to {}'.format(i, temp.columns[0], temp.columns[-1]))
    d = getD(temp)
    selected, w = getW(d, True)    
    if len(selected) > 0:
        w = w / np.absolute(w).sum()
        names = temp.index[selected].tolist()
        update(names, w, i)
    else:
        clear(i)
