In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime
import itertools
import argparse
import re
import os
import pickle

from sklearn.preprocessing import StandardScaler
import yfinance as yf

In [49]:
#Get data from api
def get_data(prd):
    stock_list = ['AAPL','MSI','SBUX']
    stock_data = pd.DataFrame()
    for sl in stock_list:
        data = yf.Ticker(sl).history(period=prd)
        data['stock'] = sl
        data = data.reset_index()
        data = data.loc[:,['Date','Close','stock']]
        stock_data = pd.concat([data,stock_data])

    stock_data=stock_data.pivot(index= 'Date',columns='stock', values='Close')
    stock_data = stock_data.reset_index()

    stock_data = stock_data[['AAPL','MSI','SBUX']]
    stock_data = stock_data.reset_index(drop=True)

    stock_data.to_csv('stock_data.csv',index=False)

#write data
get_data(prd = '5y')

In [50]:
#Get data from csv
stock_data = pd.read_csv('stock_data.csv')
stock_data.head()

Unnamed: 0,AAPL,MSI,SBUX
0,33.253315,77.284012,51.92453
1,33.942547,77.284012,52.033409
2,34.018085,77.40361,52.205799
3,33.973232,78.08445,52.768318
4,33.9095,79.326515,52.976994


In [40]:
def get_scaler(env):
    states = []
    for _ in range(env.n_step):
        action = np.random.choice(env.action_space)
        state,reward,done,info = env.step(action)
        states.append(state)
        if done:
            break
    scaler = StandardScaler()
    scaler.fit(states)
    return scaler

In [51]:
def maybe_make_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [52]:
class LinearModel:
    def __init__(self,input_dim,n_action):
        self.W = np.random.randh(input_dim,n_action)/ np.sqrt(input_dim)
        self.b = np.zeros(n_action)

        #momentum terms
        self.vW = 0
        self.vb = 0

        self.losses = []

    def predict(self,X):
        # X should be N *D
        assert(len(X.shape)==2)
        return X.dot(self.W)+ self.b

    def sgd(self,X,Y,learning_rate = 0.01,momentum = 0.9):
        assert(len(X.shape)==2)

        # loss values are 2-D
        num_values = np.prod(Y.shape)

        Yhat = self.predict(X)
        gW = 2 * X.T.dot(Yhat - Y) / num_values
        gb = 2 * (Yhat - Y).sum(axis=0) / num_values

        #Update momentum
        self.vW = momentum * self.vW - learning_rate * gW
        self.vb = momentum * self.vb - learning_rate * gb

        #update params
        self.W += self.vW
        self.b += self.vb

        mse = np.mean((Yhat - Y)**2)
        self.losses.append(mse)

    def load_weights(self,filepath):
        npz = np.load(filepath)
        self.W = npz['W']
        self.b = npz['b']

    def save_weights(self,filepath):
        np.savez(filepath, W = self.W, b = self.b)




In [None]:
class MultiStockEnv:

    def __init__(self,data,initial_investment = 20000):

        self.stock_price_history = data
        self.n_step, self.n_stock = self.stock_price_history.shape

        #instance attributes
        self.initial_investment = initial_investment
        self.cur_step = None
        self.stock_owned = None
        self.stock_price = None
        self.cash_in_hand = None

        self.action_space = np.arange(3**self.n_stock)

        #actions: [0,0,1] or [0,2,1]
        # 0 = sell, 1 = hold, 2 = buy
        self.action_list = list(map(list,itertools.product([0,1,2], repeat=self.n_stock)))

        #calculate size of state
        self.state_dim = self.n_stock * 2 + 1

        self.reset()

    def reset(self):
        self.cur_step = 0
        self.stock_owned = np.zeros(self.n_stock)
        self.stock_price = self.stock_price_history[self.cur_step]
        self.cash_in_hand = self.initial_investment
        return self._get_obs()

    def step(self, action):
        assert action in self.action_space

        #get current value before performing the action
        prev_val = self._get_val()

        #update price, go to the next day
        self.cur_step += 1
        self.stock_price = self.stock_price_history[self.cur_step]

        #perform the trade
        self._trade(action)

        #get the new value after taking the action

        cur_val = self._get_val()

        #reward is the increase in the value
        reward = cur_val - prev_val

        #done if we run out of data
        done = self.cur_step == self.n_step - 1

        #store the current value  of portfolio
        info = {'cur_val':cur_val}

        return self._get_obs(),reward,done,info

    #return the state
    def _get_obs(self):
        obs = np.empty(self.state_dim)
        obs[:self.n_stock] = self.stock_owned
        obs[self.n_stock:2 * self.n_stock] = self.stock_price
        obs[-1] = self.cash_in_hand
        return obs

    def _get_val(self):
        return self.stock_owned.dot(self.stock_price) + self.cash_in_hand

    def _trade(self, action):
        action_vec = self.action_list[action]

        #determine which stock to buy or sell
        sell_index = []
        buy_index = []
        for i,a in enumerate(action_vec):
            if a ==0:
                sell_index.append(i)
            elif a == 2:
                buy_index.append(i)

        #sell the stock we want to sell, then buy the stock we want to buy
        if sell_index:
            for i in sell_index:
                self.cash_in_hand += self.stock_price[i] * self.stock_owned[i]
                self.stock_owned[i] = 0

        if buy_index:
            can_buy = True
            while can_buy:
                for i in buy_index:
                    if self.cash_in_hand > self.stock_price[i]:
                        self.stock_owned[i] += 1
                        self.cash_in_hand -= self.stock_price[i]
                    else:
                        can_buy = False

