# Group 34 Project: Stock Portfolio Recommender with Reinforcement Learning

In [3]:
#define environment
# from: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf
import numpy as np
import pandas as pd

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts


#stock_basket_size = 5 #number of stocks to choose by end


# from: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial
class StockMarket(object):
  
  def __init__(self):
    self.db = pd.read_csv("FINAL_FROM_DF.csv")   #read raw data
    self.first_day = "2016-01-01"                #first date. Given in dataset listing 
    self.curr_day = self.first_day

    #dates in dataset are out of order. Initialize array of dates 
    #based on start and end dates from dataset listing
    self.dates = pd.date_range(start="2016-01-01", end="2017-12-29")
    self.date_index = 0
    self.stock_companies = np.unique(self.db["SYMBOL"]) #list of company names
    #self._state = np.random.choice(self.stock_companies, stock_basket_size)  #randomly choose (5) stocks to begin
    #self._state =np.array([],0,0) #will hold [[names of companies, curr day price - prev day price]], amount spent (negative) + earned, and current value of held stocks
    self._state = [] #format is [company_name, current_stock_value]
    self._amt_earn_spend = 0  #amount earned from selling - amount spent by buying
    self._curr_portfolio_val = 0 #value of stocks in current portfolio
    self._episode_ended = False
    
    self._action_spec = array_spec.BoundedArraySpec(shape =(), dtype = np.int32, minimum = -1, maximum=1, name="action")
    self._observation_spec = array_spec.BoundedArraySpec(shape=(13,), name="observation", dtype = np.int32)
    self.batch_size = None
    self._reward_spec = 0 #initialize here, use later.
  
  
  # reset and step return a TimeStep named tuple. TimeStep is defined as:
  # TimeStep(step_type, reward, discount, observation)
  # step_type = FIRST, MID, LAST
  #@abc.abstractmethod
  def _reset(self):
    """Return initial TimeStep."""
    self.curr_day = self.first_day
    self._episode_ended = False
    #step_type = ts.StepType.FIRST;
    return ts.restart(observation=self.db.loc[self.db["TIMESTAMP"]==self.first_day]) #https://www.tensorflow.org/agents/api_docs/python/tf_agents/trajectories/restart
  

  #@abc.abstractmethod
  def _step(self, action):
    #daily_stocks = self.db["TIMESTAMP"==self.curr_day]
    #looked at: https://stackoverflow.com/questions/17071871/how-do-i-select-rows-from-a-dataframe-based-on-column-values
    daily_stocks = self.db.loc[self.db["TIMESTAMP"]==self.curr_day]

    """Apply action and return new time_step."""
    #ignore action for now
    if (self._episode_ended):
      return self.reset()
    
    if (self.curr_day == "2017-12-29"): #reach last date in dataset
      self._episode_ended = True

    if action == -1: #sell
    
      #min_change_stock = self._state[0][0]
      #min_change = self._state[0][0][1]
      if len(self._state) > 0:  #can't sell if not holding any stocks
        #choose the stock with max decrease in price
        min_change = self._state[0][1] #stock price of first stock in array
        min_index = 0
        #for i, stock in enumerate(self._state[0]):
        for i, stock in enumerate(self._state):
          if stock[1] < min_change:
            #min_change_stock = stock
            min_change = stock[1]
            min_index = i
      
      #remove stock from list
      self._state.pop(min_index)
      self._amt_earn_spend += min_change
      #self._state[0].pop(min_index)

    
    #elif action == 0: #do nothing

    elif action == 1: #buy
      #choose stock with max increase in price
      max_change_stock = daily_stocks[0] #entire row
      max_change = 0
      spending = 0
      for stock in daily_stocks:
        temp = stock["CLOSE"] - stock["PREVCLOSE"]
        if temp > max_change:
          max_change = temp
          max_change_stock = stock
          spending = stock["PREVCLOSE"] #amount to buy the stock = prev day's closing price
      
      self._state.append([max_change_stock["SYMBOL"], max_change])
      #self._state[0][max_change_stock["SYMBOL"]] = max_change #add stock to array of held stocks
      #self._state[1] -= spending
      self._amt_earn_spend -= spending


      
    else:
      raise ValueError("action should be -1, 0, or 1, not ", action)
    
    #update value of currently held stocks, regardless of action taken
    
    prev_state = self._state
    for i, stock in enumerate(prev_state): #get index and value
      stock_name = stock[0]
      #Update value of ind stocks
      self._state[i][1] = daily_stocks[np.where(stock_name)]["CLOSE"] #- daily_stocks[np.where(stock_name)]["PREVCLOSE"]
    
    #update total value of stocks
    old_val = self._curr_portfolio_val
    self._curr_portfolio_val = 0
    for stock in self._state:
      self._curr_portfolio_val += stock[1]


    self.date_index+=1
    self.curr_day = self.dates[self.date_index]
    self._reward_spec = self._curr_portfolio_val - old_val + self._amt_earn_spend #total amt of money earned is the increase in portfolio val 
    # - the amount spent by buying stocks + the amount gotten by selling stocks (no longer have those stocks in portfolio)
    return ts.transition(observation = self.db["TIMESTAMP"==self.curr_day], reward=self._reward_spec)




  # def reset(self):
  #   """Return initial_time_step."""
  #   self._current_time_step = self._reset()
  #   return self._current_time_step

  # def step(self, action):
  #   """Apply action and return new time_step."""
  #   if self._current_time_step is None:
  #       return self.reset()
  #   self._current_time_step = self._step(action)
  #   return self._current_time_step

  # def current_time_step(self):
  #   return self._current_time_step

  def time_step_spec(self):
    """Return time_step_spec."""
    return ts.TimeStep(step_type=np.array(0, dtype=np.int32), reward=np.array(0., dtype=np.float32), discount=np.array(1., dtype=np.float32), observation=self._observation_spec)
  # def time_step_spec(self):
  #   return None


  @abc.abstractmethod
  def observation_spec(self):
    """Return observation_spec."""
    return self._observation_spec


  @abc.abstractmethod
  def action_spec(self):
    """Return action_spec."""
    return self._action_spec
    

    




In [4]:
environment = StockMarket()
utils.validate_py_environment(environment, episodes=5)

: 

: 

In [3]:
#testing the environment: taken from environment tutorial: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial

sell_action = np.array(-1)
nothing_action = np.array(0)
buy_action = np.array(1)

env = StockMarket()
time_step = env.reset()
current_holdings= time_step.reward

for i in range(3):
    time_step = env.step(buy_action)
    print(time_step)
    current_holdings = time_step.reward

print("Final Reward: ",current_holdings)

AttributeError: 'StockMarket' object has no attribute 'reset'

In [1]:
# Building the model based on the env

## Ref Template:
# https://github.com/huseinzol05/Stock-Prediction-Models/blob/master/agent/5.q-learning-agent.ipynb
class Learner:
    LEARNING_RATE = 0.1
    NUM_OF_STEPS = 1000   ## Updating the target net every #

    ## window_size -> Number of days to consider as input to make a dec.
    ## skip -> number of steps skipped for making trading sec.
    def __init__(self, env, gamma, batch_size, window_size, trend, skip):
        self.gamma = gamma
        self.batch_size = batch_size
        
        self.env = env
        self.state_size = env.observation_spec().shape[0]
        self.action_size = env.action_spec().shape[0] ## 3?
        self.trend = trend
        self.skip = skip

    def get_state(self, t):
        pass

    def act(self, state):
        pass

    def buy(self, action, data):
        pass

    def sell(self, action, data):
        pass