# Group 34 Project: Stock Portfolio Recommender with Reinforcement Learning

In [16]:
#define environment
# from: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf
import numpy as np
import pandas as pd

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts


#stock_basket_size = 5 #number of stocks to choose by end


# from: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial
class StockMarket(object):
  
  def __init__(self):
    self.db = pd.read_csv("FINAL_FROM_DF.csv")   #read raw data
    self.first_day = "2016-01-01"                #first date. Given in dataset listing 
    self.curr_day = self.first_day

    #dates in dataset are out of order. Initialize array of dates 
    #based on start and end dates from dataset listing
    self.dates = pd.date_range(start="2016-01-01", end="2017-12-29")
    self.date_index = 0
    self.stock_companies = np.unique(self.db["SYMBOL"]) #list of company names
    #self._state = np.random.choice(self.stock_companies, stock_basket_size)  #randomly choose (5) stocks to begin
    self._state =[[],0,0] #will hold [[names of companies, curr day price - prev day price]], amount spent (negative) + earned, and current value of held stocks
    self._episode_ended = False
    
    self._action_spec = array_spec.BoundedArraySpec(shape =(), dtype = np.int32, minimum = -1, maximum=1, name="action")
    self._observation_spec = array_spec.BoundedArraySpec(shape=(13,), name="observation", dtype = np.int32)
    self.batch_size = None
  
  
  # reset and step return a TimeStep named tuple. TimeStep is defined as:
  # TimeStep(step_type, reward, discount, observation)
  # step_type = FIRST, MID, LAST
  #@abc.abstractmethod
  def _reset(self):
    """Return initial TimeStep."""
    self.curr_day = self.first_day
    #step_type = ts.StepType.FIRST;
    return ts.restart(observation=self.db.loc[self.db["TIMESTAMP"]==self.first_day]) #https://www.tensorflow.org/agents/api_docs/python/tf_agents/trajectories/restart
  

  #@abc.abstractmethod
  def _step(self, action):
    #daily_stocks = self.db["TIMESTAMP"==self.curr_day]
    #looked at: https://stackoverflow.com/questions/17071871/how-do-i-select-rows-from-a-dataframe-based-on-column-values
    daily_stocks = self.db.loc[self.db["TIMESTAMP"]==self.curr_day]

    """Apply action and return new time_step."""
    #ignore action for now
    if (self._episode_ended):
      return self.reset()
    
    if (self.curr_day == "2017-12-29"): #reach last date in dataset
      self._episode_ended = True

    if action == -1: #sell
      #choose the stock with max decrease in price
      #min_change_stock = self._state[0][0]
      min_change = self._state[0][0][1]
      min_index = 0
      for i, stock in enumerate(self._state[0]):
        if stock[1] < min_change:
          #min_change_stock = stock
          min_change = stock[1]
          min_index = i
      
      #remove stock from list
      self._state[0].pop(min_index)

    
    #elif action == 0: #do nothing

    elif action == 1: #buy
      #choose stock with max increase in price
      max_change_stock = daily_stocks[0]
      max_change = 0
      spending = 0
      for stock in daily_stocks:
        temp = stock["CLOSE"] - stock["PREVCLOSE"]
        if temp > max_change:
          max_change = temp
          max_change_stock = stock
          spending = stock["PREVCLOSE"] #amount to buy the stock = prev day's closing price
      
      self._state[0][max_change_stock["SYMBOL"]] = max_change #add stock to array of held stocks
      self._state[1] -= spending


      

    else:
      raise ValueError("action should be -1, 0, or 1, not ", action)
    
    #update value of currently held stocks
    
    prev_state = self._state
    for i, stock in enumerate(prev_state[0]): #get index and value
      stock_name = stock[0]
      #Update value of ind stocks
      self._state[0][i][1] = daily_stocks[np.where(stock_name)]["CLOSE"] - daily_stocks[np.where(stock_name)]["PREVCLOSE"]
    
    #update total value of stocks
    old_val = self._state[2]
    self._state[2] = 0
    for stock in self._state[0]:
      self._state[2] += stock[1] 


    self.date_index+=1
    self.curr_day = self.date_index[self.date_index]
    return ts.transition(observation = self.db["TIMESTAMP"==self.curr_day], reward=self._state[2] - old_val)




  def reset(self):
    """Return initial_time_step."""
    self._current_time_step = self._reset()
    return self._current_time_step

  def step(self, action):
    """Apply action and return new time_step."""
    if self._current_time_step is None:
        return self.reset()
    self._current_time_step = self._step(action)
    return self._current_time_step

  def current_time_step(self):
    return self._current_time_step

  def time_step_spec(self):
    """Return time_step_spec."""
    return ts.TimeStep(step_type=self._action_spec, reward=, discount=, observation=self._observation_spec)


  @abc.abstractmethod
  def observation_spec(self):
    """Return observation_spec."""
    return self._observation_spec


  @abc.abstractmethod
  def action_spec(self):
    """Return action_spec."""
    return self._action_spec
    

    




In [17]:
env = StockMarket()
utils.validate_py_environment(env, episodes=5)

TypeError: TimeStep.__new__() missing 4 required positional arguments: 'step_type', 'reward', 'discount', and 'observation'

In [None]:
#testing the environment: taken from environment tutorial: https://www.tensorflow.org/agents/tutorials/2_environments_tutorial

sell_action = np.array(-1)
nothing_action = np.array(0)
buy_action = np.array(1)

env = StockMarket()
time_step = env.reset()
current_holdings= time_step.reward

for i in range(3):
    time_step = env.step(buy_action)
    print(time_step)
    current_holdings = time_step.reward

print("Final Reward: ",current_holdings)