In [1]:
from pandas_datareader import data as pdr
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import cvxpy as cvx
from scipy.stats import norm
from numpy.linalg import inv
import math
import random
import benchmarks

# for plotting
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
import matplotlib.pyplot as plt

from benchmarks import *

# Initialize plotly offline
plotly.offline.init_notebook_mode(connected=True)

## Load Data

### Hundred stocks

In [2]:
# Read in the data
hundred_stock_prices = pd.read_csv('./00_Data/hundred_stocks.csv', header=0, index_col = 0)

In [3]:
hundred_stock_prices.index = pd.to_datetime(hundred_stock_prices.index)

#Process Stock Data
# hundred_stock_relative_prices, hundred_stock_returns = process_stock_data(hundred_stock_prices)

#Read processed data to save time
hundred_stock_relative_prices = pd.read_csv('./00_Data/hundred_stock_relative_prices.csv', header=0, index_col = 0)
hundred_stock_returns = pd.read_csv('./00_Data/hundred_stock_returns.csv', header=0, index_col = 0)

#### SPY

In [4]:
# download_stock_data('2001-01-02', '2019-07-17', ['SPY'], 'SPY_same_dates_as_hundred_stocks')

In [5]:
# Read in the data
SPY_prices = pd.read_csv('./00_Data/SPY_same_dates_as_hundred_stocks.csv', header=0, index_col = 0)
SPY_prices.index = pd.to_datetime(hundred_stock_prices.index)

#Process Stock Data
# SPY_relative_prices, SPY_returns = process_stock_data(SPY_prices)

# Read in data to save time
SPY_relative_prices = pd.read_csv('./00_Data/SPY_relative_prices_2001to2019.csv', header=0, index_col = 0)
SPY_returns = pd.read_csv('./00_Data/SPY_returns_2001to2019.csv', header=0, index_col = 0)

## Risk Aware Portfolio Selection Algorithm
#### Bug: cannot initialize gamma as cp.Parameter

In [6]:
def risk_aware_portfolio(stock_returns, gamma, delta):
    """
    input:
    stock_return: returns for the stock data
    gamma: the confidence level
    delta: number of dates we use to estimate the density of returns
    
    output:
    portfolio: portfolio selected
    """
    u = cvx.Variable((stock_returns.shape[1], 1))
    a = cvx.Variable()
    #     gamma = cp.Parameter(gamma)

    constraints = [cvx.sum(u) == 1, u >= 0]

    portfolio = []

    for t in range(2, len(stock_returns)):
        coefficient = 1 / ((delta + t - 1) * (1 - gamma))

        sliced = stock_returns.iloc[:delta]
        sliced = sliced.to_numpy()
        inner_part = cvx.pos(sliced * (-u) - a)
        first_sum = cvx.sum(inner_part)

        sliced2 = stock_returns.iloc[:t - 1]
        sliced2 = sliced2.to_numpy()
        inner_part2 = sliced2 * (-u) - a
        second_sum = cvx.sum(cvx.pos(inner_part2))

        obj = cvx.Minimize(a + coefficient * (first_sum + second_sum))

        prob = cvx.Problem(obj, constraints)
        prob.solve(solver='SCS')
        portfolio.append(u.value)

    return pd.DataFrame(np.array(portfolio).reshape(len(portfolio), len(portfolio[0])))

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 70 days here

In [7]:
hundred_stock_returns_wo_0 = hundred_stock_returns.replace(0, 0.001)

In [8]:
risk_aware_selection = risk_aware_portfolio(np.log(hundred_stock_returns_wo_0.iloc[-70:]), 0.95, 30)

In [9]:
benchmark_portfolio(risk_aware_selection.T,
                    'Risk-aware',
                    SPY_relative_prices.iloc[-68:], # two less days since beginning from 2
                    stock_returns=hundred_stock_returns.iloc[-68:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-68:])

In [10]:
hundred_stock_returns.iloc[:68]

Unnamed: 0_level_0,BAC,CHK,GE,AMD,NOK,CSX,TEVA,ERIC,NIO,F,...,BX,GLW,SCHW,NUS,TXT,CX,OXY,CY,NEM,FLEX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-01-03,1.068181,0.986111,1.092857,1.130435,1.093272,1.041363,1.061907,1.120000,0.0,1.033419,...,0.0,1.170185,1.261391,1.146067,1.054522,1.038328,0.976744,1.149206,0.972125,1.256477
2001-01-04,1.031290,0.964789,1.005229,1.026923,0.988811,1.039719,0.902390,0.938775,0.0,1.072139,...,0.0,0.996618,0.912547,0.990196,1.044136,1.010067,0.994709,1.069061,0.982079,1.002062
2001-01-05,0.932038,1.051095,0.984395,0.958801,0.957567,1.006741,0.984158,0.983696,0.0,0.974478,...,0.0,0.907240,0.987500,0.960396,0.934782,0.996678,1.023937,0.922481,0.985402,0.890947
2001-01-08,1.005209,1.083333,0.963012,1.011719,1.019202,1.040179,1.006036,1.005525,0.0,0.976190,...,0.0,1.053616,0.976793,0.989691,0.998708,1.013333,0.994805,1.019607,1.003704,0.986143
2001-01-09,0.974093,0.935897,0.979424,1.034749,0.911594,1.004292,1.008000,0.950549,0.0,1.031707,...,0.0,0.998816,0.952484,0.968750,1.009056,1.019737,0.971279,1.002747,0.981550,1.049180
2001-01-10,1.033245,1.020548,1.001400,1.022388,1.006359,0.976496,0.915675,0.982659,0.0,0.992908,...,0.0,1.080569,0.990929,1.021506,1.000000,1.016129,1.008064,1.082192,1.011278,1.002232
2001-01-11,1.018018,0.966443,1.041958,1.062044,1.061611,0.997812,1.039003,1.152941,0.0,1.026191,...,0.0,1.035088,1.073227,1.042105,1.014102,1.028572,0.997334,1.005063,0.985130,1.167038
2001-01-12,0.992415,1.083333,0.981208,0.979381,0.985119,0.964913,1.005213,1.005102,0.0,0.979118,...,0.0,1.031779,0.972282,0.989899,0.983565,1.000000,0.986631,0.992443,1.018868,0.982824
2001-01-16,1.038217,1.025641,1.036936,0.961404,0.951662,1.040909,0.995851,0.949239,0.0,1.028436,...,0.0,0.984600,0.982456,0.857143,1.007712,0.990741,0.994580,0.954315,1.000000,0.970874
2001-01-17,0.973007,0.906250,0.985488,1.080292,1.020635,1.006551,0.991666,1.032086,0.0,0.990783,...,0.0,1.066736,1.008929,1.047619,0.978316,1.009346,0.983652,1.082447,0.977778,1.046000


## Bandit Algorithm

## Portfolio selection with original UCB

In [11]:
def ucb_bandit_portfolio(stock_returns):
    n_time, n_assets = stock_returns.shape # n_time >> n_assets in our case
    portfolio = np.zeros((n_assets, n_time)) # the output portfolio
    Rbar = np.zeros((n_assets,1)) # empirical mean of return for assets

    num_selected = {}
    for i in range(n_assets):
        num_selected[i] = 0
        
    # for loop up until n_time, output the bandit portfolio at each time t
    
    for t in range(n_time):
        
        if t < n_assets:
            portfolio[t, t] = 1
            Rbar[t] = stock_returns.iloc[t, t]
            num_selected[t] = 1
            continue
            
        max_asset = 0
        max_upper_bound = 0
        for asset in range(n_assets):
            
            avg_reward = Rbar[asset]
            right_part = np.sqrt(2*np.log(t) / num_selected[asset])
            upper_bound = avg_reward + right_part
            
            if upper_bound > max_upper_bound:
                max_asset = asset
                max_upper_bound = upper_bound
                
        portfolio[max_asset, t] = 1
        #pull
        Rbar[max_asset] = (Rbar[max_asset] * num_selected[max_asset] + stock_returns.iloc[t, max_asset]) / (num_selected[max_asset] + 1)
        num_selected[max_asset] += 1
        
    return portfolio

In [12]:
og_ucb_selection = ucb_bandit_portfolio(hundred_stock_returns)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 70 days here

In [13]:
benchmark_portfolio(og_ucb_selection.T[-70:],
                    'Original UCB',
                    SPY_relative_prices.iloc[-70:],
                    stock_returns=hundred_stock_returns.iloc[-70:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])

## EXP3

In [14]:
def distr(weights, gamma=0.0):
    theSum = float(sum(weights))
    return tuple((1.0 - gamma) * (w / theSum) + (gamma / len(weights)) for w in weights)

In [15]:
def draw(weights):
    choice = random.uniform(0, sum(weights))
    choiceIndex = 0

    for weight in weights:
        choice -= weight
        if choice <= 0:
            return choiceIndex

        choiceIndex += 1

In [16]:
def exp3(stock_returns, gamma = 0.0):
    n_time, n_assets = stock_returns.shape # n_time >> n_assets in our case
    portfolio = np.zeros((n_assets, n_time))
    weights = [1] * n_assets
    
    for t in range(n_time):
        
        probabilityDistribution = distr(weights, gamma)
        choice = draw(probabilityDistribution)
        
        theReward = stock_returns.iloc[t, choice]

        estimatedReward = 1.0 * theReward / probabilityDistribution[choice]
        weights[choice] *= math.exp(
            estimatedReward * gamma /
            n_assets)  # important that we use estimated reward here!
        
        portfolio[choice, t] = 1

    return portfolio

In [17]:
exp3_selection = exp3(hundred_stock_returns, 0.8)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [18]:
benchmark_portfolio(exp3_selection.T[-70:],
                    'EXP3',
                    SPY_relative_prices.iloc[-70:],
                    stock_returns=hundred_stock_returns.iloc[-70:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])

## Improved EXP3

In [19]:
def improved_exp3(stock_returns, gamma = 0.0):
    n_time, n_assets = stock_returns.shape # n_time >> n_assets in our case
    portfolio = np.zeros((n_assets, n_time))
    weights = [1] * n_assets
    
    for t in range(n_time):
        
        probabilityDistribution = distr(weights, gamma)
        choice = draw(probabilityDistribution)
        
        theReward = stock_returns.iloc[t, choice]

        rewardMin, rewardMax = -1, 1
        
        for choice in range(n_assets):
            rewardForUpdate = stock_returns.iloc[t, choice]
            scaledReward = (rewardForUpdate - rewardMin) / (rewardMax - rewardMin)
            estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice]
            weights[choice] *= math.exp(estimatedReward * gamma / n_assets)
            
#         estimatedReward = 1.0 * theReward / probabilityDistribution[choice]
#         weights[choice] *= math.exp(
#             estimatedReward * gamma /
#             n_assets)  # important that we use estimated reward here!
        
        portfolio[choice, t] = 1

    return portfolio
    

In [20]:
improved_exp3_selection = improved_exp3(hundred_stock_returns)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [21]:
benchmark_portfolio(improved_exp3_selection.T[-70:],
                    'Improved EXP3',
                    SPY_relative_prices.iloc[-70:],
                    stock_returns=hundred_stock_returns.iloc[-70:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])

## Improved UCB

In [22]:
def improved_ucb(stock_returns):
    n_time, n_assets = stock_returns.shape  # n_time >> n_assets in our case
    portfolio = np.zeros((n_assets, n_time))  # the output portfolio
    Rbar = np.zeros((n_assets, 1))  # empirical mean of return for assets

    num_selected = {}
    for i in range(n_assets):
        num_selected[i] = 0

    # for loop up until n_time, output the bandit portfolio at each time t

    for t in range(n_time):

        if t < n_assets:
            portfolio[t, t] = 1
            Rbar[t] = stock_returns.iloc[t, t]
            num_selected[t] = 1
            continue

        max_asset = 0
        max_upper_bound = 0

        for asset in range(n_assets):

            avg_reward = Rbar[asset]
            right_part = np.sqrt(2 * np.log(t) / num_selected[asset])
            upper_bound = avg_reward + right_part

            if upper_bound > max_upper_bound:
                max_asset = asset
                max_upper_bound = upper_bound
            
            # Update all UCBs, which includes the max asset.
            Rbar[asset] = (Rbar[asset] * num_selected[asset] +
                               stock_returns.iloc[t, asset]) / (
                                   num_selected[asset] + 1)

        portfolio[max_asset, t] = 1
        num_selected[max_asset] += 1

    return portfolio

In [23]:
improved_ucb_selection = improved_ucb(hundred_stock_returns)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [24]:
benchmark_portfolio(improved_ucb_selection.T[-70:],
                    'Improved UCB',
                    SPY_relative_prices.iloc[-70:],
                    stock_returns=hundred_stock_returns.iloc[-70:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])

## UCB + Risk aware

Note that default gamma is set to 0.5

In [25]:
def ucb_plus_riskaware(stock_returns, lambd, gamma=0.5, delta=30):
    risk_aware_selection = risk_aware_portfolio(stock_returns, gamma,
                                                delta).values
    ucb_selection = ucb_bandit_portfolio(stock_returns).T[2:] # drop first two to match risk aware's dates
    return lambd * ucb_selection + (1 - lambd) * risk_aware_selection

In [26]:
ucb_plus_riskaware_selection = ucb_plus_riskaware(
    hundred_stock_returns.iloc[:70], 0.5)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 70 days

In [27]:
benchmark_portfolio(ucb_plus_riskaware_selection,
                    'UCB + risk aware',
                    SPY_relative_prices.iloc[-68:],
                    stock_returns=hundred_stock_returns.iloc[-68:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-68:])

## EXP3 + Risk aware


Note that default gamma is set to 0.5

In [28]:
def exp3_plus_riskaware(stock_returns, lambd, gamma=0.5, delta=30):
    risk_aware_selection = risk_aware_portfolio(stock_returns, gamma,
                                                delta).values
    exp3_selection = exp3(stock_returns).T[
        2:]  # drop first two to match risk aware's dates
    return lambd * exp3_selection + (1 - lambd) * risk_aware_selection

In [29]:
exp3_plus_riskaware_selection = exp3_plus_riskaware(
    hundred_stock_returns.iloc[-70:], 0.5)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [30]:
benchmark_portfolio(exp3_plus_riskaware_selection,
                    'exp3 + risk aware',
                    SPY_relative_prices.iloc[-68:],
                    stock_returns=hundred_stock_returns.iloc[-68:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-68:])

## Improved UCB + Risk aware

Note that default gamma is set to 0.5

In [31]:
def improved_ucb_plus_riskaware(stock_returns, lambd, gamma=0.5, delta=30):
    risk_aware_selection = risk_aware_portfolio(stock_returns, gamma,
                                                delta).values
    improved_ucb_selection = improved_ucb(stock_returns).T[
        2:]  # drop first two to match risk aware's dates
    return lambd * improved_ucb_selection + (1 - lambd) * risk_aware_selection

In [32]:
improved_ucb_plus_riskaware_selection = improved_ucb_plus_riskaware(
    hundred_stock_returns.iloc[-70:], 0.5)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [33]:
benchmark_portfolio(improved_ucb_plus_riskaware_selection,
                    'improved ucb + risk aware',
                    SPY_relative_prices.iloc[-68:],
                    stock_returns=hundred_stock_returns.iloc[-68:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-68:])

## Improved EXP3 + Risk aware

Note that default gamma is set to 0.5

In [34]:
def improved_exp3_plus_riskaware(stock_returns, lambd, gamma=0.5, delta=30):
    risk_aware_selection = risk_aware_portfolio(stock_returns, gamma,
                                                delta).values
    improved_exp3_selection = improved_exp3(stock_returns).T[2:]  # drop first two to match risk aware's dates
    return lambd * improved_exp3_selection + (1 - lambd) * risk_aware_selection

In [35]:
improved_exp3_plus_riskaware_selection = improved_exp3_plus_riskaware(
    hundred_stock_returns.iloc[-70:], 0.5)

### Benchmarking
> **WARNING**: Takes long time to run! Only using first 100 days

In [36]:
benchmark_portfolio(improved_exp3_plus_riskaware_selection,
                    'improved exp3 + risk aware',
                    SPY_relative_prices.iloc[-68:],
                    stock_returns=hundred_stock_returns.iloc[-68:],
                    stock_prices_norm=hundred_stock_relative_prices.iloc[-68:])

# All graphs without risk aware algorithm

In [39]:
benchmark_portfolios(
    [
        og_ucb_selection.T[-70:], exp3_selection.T[-70:], improved_exp3_selection.T[-70:],
        improved_ucb_selection.T[-70:]
    ], ['ucb', 'exp3', 'improved exp3', 'improved ucb'],
    SPY_relative_prices.iloc[-70:],
    stock_returns=hundred_stock_returns.iloc[-70:],
    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])

# All graphs with risk aware algorithm

In [None]:
benchmark_portfolios(
    [
        risk_aware_selection.T, exp3_plus_riskaware_selection, ucb_plus_riskaware_selection
    ], ['risk aware', 'exp3+RA', 'improved exp3', 'improved ucb'],
    SPY_relative_prices.iloc[-70:],
    stock_returns=hundred_stock_returns.iloc[-70:],
    stock_prices_norm=hundred_stock_relative_prices.iloc[-70:])