<a href="https://colab.research.google.com/github/duoduodad/AndroidViewAnimations/blob/master/StockTradingEnv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Position**

In [None]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)


from copy import copy


class Position(object):
    '''
    Keeps and updates the size and price of a position. The object has no
    relationship to any asset. It only keeps size and price.

    Member Attributes:
      - size (int): current size of the position
      - price (float): current price of the position

    The Position instances can be tested using len(position) to see if size
    is not null
    '''

    def __str__(self):
        items = list()
        items.append('--- Position Begin')
        items.append('- Size: {}'.format(self.size))
        items.append('- Price: {}'.format(self.price))
        items.append('- Price orig: {}'.format(self.price_orig))
        items.append('- Closed: {}'.format(self.upclosed))
        items.append('- Opened: {}'.format(self.upopened))
        items.append('- Adjbase: {}'.format(self.adjbase))
        items.append('--- Position End')
        return '\n'.join(items)

    def __init__(self, size=0, price=0.0):
        self.size = size
        if size:
            self.price = self.price_orig = price
        else:
            self.price = 0.0

        self.adjbase = None

        self.upopened = size
        self.upclosed = 0
        self.set(size, price)

        self.updt = None

    def fix(self, size, price):
        oldsize = self.size
        self.size = size
        self.price = price
        return self.size == oldsize

    def set(self, size, price):
        if self.size > 0:
            if size > self.size:
                self.upopened = size - self.size  # new 10 - old 5 -> 5
                self.upclosed = 0
            else:
                # same side min(0, 3) -> 0 / reversal min(0, -3) -> -3
                self.upopened = min(0, size)
                # same side min(10, 10 - 5) -> 5
                # reversal min(10, 10 - -5) -> min(10, 15) -> 10
                self.upclosed = min(self.size, self.size - size)

        elif self.size < 0:
            if size < self.size:
                self.upopened = size - self.size  # ex: -5 - -3 -> -2
                self.upclosed = 0
            else:
                # same side max(0, -5) -> 0 / reversal max(0, 5) -> 5
                self.upopened = max(0, size)
                # same side max(-10, -10 - -5) -> max(-10, -5) -> -5
                # reversal max(-10, -10 - 5) -> max(-10, -15) -> -10
                self.upclosed = max(self.size, self.size - size)

        else:  # self.size == 0
            self.upopened = self.size
            self.upclosed = 0

        self.size = size
        self.price_orig = self.price
        if size:
            self.price = price
        else:
            self.price = 0.0

        return self.size, self.price, self.upopened, self.upclosed

    def __len__(self):
        return abs(self.size)

    def __bool__(self):
        return bool(self.size != 0)

    __nonzero__ = __bool__

    def clone(self):
        return Position(size=self.size, price=self.price)

    def pseudoupdate(self, size, price):
        return Position(self.size, self.price).update(size, price)

    def update(self, size, price, dt=None):
        '''
        Updates the current position and returns the updated size, price and
        units used to open/close a position

        Args:
            size (int): amount to update the position size
                size < 0: A sell operation has taken place
                size > 0: A buy operation has taken place

            price (float):
                Must always be positive to ensure consistency

        Returns:
            A tuple (non-named) contaning
               size - new position size
                   Simply the sum of the existing size plus the "size" argument
               price - new position price
                   If a position is increased the new average price will be
                   returned
                   If a position is reduced the price of the remaining size
                   does not change
                   If a position is closed the price is nullified
                   If a position is reversed the price is the price given as
                   argument
               opened - amount of contracts from argument "size" that were used
                   to open/increase a position.
                   A position can be opened from 0 or can be a reversal.
                   If a reversal is performed then opened is less than "size",
                   because part of "size" will have been used to close the
                   existing position
               closed - amount of units from arguments "size" that were used to
                   close/reduce a position

            Both opened and closed carry the same sign as the "size" argument
            because they refer to a part of the "size" argument
        '''
        self.datetime = dt  # record datetime update (datetime.datetime)

        self.price_orig = self.price
        oldsize = self.size
        self.size += size

        if not self.size:
            # Update closed existing position
            opened, closed = 0, size
            self.price = 0.0
        elif not oldsize:
            # Update opened a position from 0
            opened, closed = size, 0
            self.price = price
        elif oldsize > 0:  # existing "long" position updated

            if size > 0:  # increased position
                opened, closed = size, 0
                self.price = (self.price * oldsize + size * price) / self.size

            elif self.size > 0:  # reduced position
                opened, closed = 0, size
                # self.price = self.price

            else:  # self.size < 0 # reversed position form plus to minus
                opened, closed = self.size, -oldsize
                self.price = price

        else:  # oldsize < 0 - existing short position updated

            if size < 0:  # increased position
                opened, closed = size, 0
                self.price = (self.price * oldsize + size * price) / self.size

            elif self.size < 0:  # reduced position
                opened, closed = 0, size
                # self.price = self.price

            else:  # self.size > 0 - reversed position from minus to plus
                opened, closed = self.size, -oldsize
                self.price = price

        self.upopened = opened
        self.upclosed = closed

        return self.size, self.price, opened, closed

# **ENV Define**

In [None]:
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np
from enum import Enum

MAX_ACCOUNT_BALANCE = 2147483647
MAX_NUM_SHARES = 2147483647
MAX_SHARE_PRICE = 5000
MAX_OPEN_POSITIONS = 5
MAX_STEPS = 20000

INITIAL_ACCOUNT_BALANCE = 10000

class Actions(Enum):
    Hold = 0
    Buy = 1
    Sell = 2


class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df, window_size):
        super(StockTradingEnv, self).__init__()

        self.df = df
        self.window_size = window_size
        self.end_step = len(df) - 1
        self.shape = (window_size, 5)
        self.reward_range = (0, MAX_ACCOUNT_BALANCE)

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = spaces.Discrete(len(Actions))

        # Prices contains the OHCLV values for the last five prices
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float64)


    def _next_observation(self):
        # Get the stock data points for the last X days and scale to between 0-1
        '''
        frame = np.array([
            self.df.loc[self.current_step - self.window_size:self.current_step, 'Open'].values,
            self.df.loc[self.current_step - self.window_size:self.current_step, 'High'].values,
            self.df.loc[self.current_step - self.window_size:self.current_step, 'Low'].values,
            self.df.loc[self.current_step - self.window_size:self.current_step, 'Close'].values,
            self.df.loc[self.current_step - self.window_size:self.current_step, 'Volume'].values
        ])
        '''
        frame = self.df.loc[self.current_step - self.window_size + 1:self.current_step, ['Open','High','Low','Close','Volume']]
        # Append additional data and scale each value to between 0-1
        '''
        obs = np.append(frame, [[
            self.balance / MAX_ACCOUNT_BALANCE,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
        ]], axis=0)
        '''
        #print(frame.shape())
        return frame

    def _take_action(self, action):
        # Set the current price to a random price within the time step
        #current_price = random.uniform(
        #    self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"])
        current_price = self.df.loc[self.current_step, "Close"]

        action_type = action
        amount = 0.1

        if action_type == Actions.Buy.value:
            # Buy amount % of balance in shares
            total_possible = int(self.balance / current_price)
            shares_bought =  int(total_possible * amount)

            size, price, opened, closed = self.position.update(shares_bought, current_price)
            #prev_cost = self.cost_basis * self.shares_held
            #additional_cost = shares_bought * current_price

            self.balance -= shares_bought * current_price
            #self.cost_basis = (
            #    prev_cost + additional_cost) / (self.shares_held + shares_bought)
            self.cost_basis = price
            self.shares_held = size

        elif action_type == Actions.Sell.value:
            # Sell amount % of shares held
            shares_sold = int(self.shares_held * amount)

            size, price, opened, closed = self._position.update(-shares_sold, current_price)

            self.balance += shares_sold * current_price          
            self.cost_basis = price
            self.shares_held = size

            self.total_shares_sold += shares_sold
            self.total_sales_value += shares_sold * current_price

        self.net_worth = self.balance + self.shares_held * self.df.loc[self.current_step, "Close"]

        if self.net_worth > self.max_net_worth:
            self.max_net_worth = self.net_worth

        if self.shares_held == 0:
            self.cost_basis = 0

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        if self.current_step == self.end_step:
            done = True
        else:
            done = self.net_worth <= 0

        delay_modifier = (self.current_step / MAX_STEPS)

        reward = self.balance * delay_modifier

        obs = self._next_observation()

        info = dict(
            balance = self.balance,
            shares_held = self.shares_held,
            net_worth = self.net_worth,
        )

        return obs, reward, done, info

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = INITIAL_ACCOUNT_BALANCE
        self.net_worth = INITIAL_ACCOUNT_BALANCE
        self.max_net_worth = INITIAL_ACCOUNT_BALANCE
        self.shares_held = 0
        self.cost_basis = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Set the current step to a random point within the data frame
        self.current_step = self.window_size - 1

        return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = self.net_worth - INITIAL_ACCOUNT_BALANCE

        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(
            f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
        print(
            f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
        print(
            f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
        print(f'Profit: {profit}')

In [None]:
!pip install gym-anytrading stable_baselines3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym-anytrading
  Downloading gym_anytrading-1.3.2-py3-none-any.whl (171 kB)
[K     |████████████████████████████████| 171 kB 4.9 MB/s 
[?25hCollecting stable_baselines3
  Downloading stable_baselines3-1.6.2-py3-none-any.whl (170 kB)
[K     |████████████████████████████████| 170 kB 69.6 MB/s 
Collecting gym>=0.12.5
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 52.3 MB/s 
Building wheels for collected packages: gym
  Building wheel for gym (setup.py) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616822 sha256=bc9c16883f2f018c3d2d88f19fd1b7d017f18d2b7f4a143f13c0f38e61235e1e
  Stored in directory: /root/.cache/pip/wheels/27/6d/b3/a3a6e10704795c9b9000f1ab2dc480dfe7bed42f5972806e73
Successfully built gym
Installing collected packages: gym, stable-baselines3, gym-anytrading
  Attempting uninstal

In [None]:
import numpy as np
import pandas as pd
import os
import random
import copy
import matplotlib.pyplot as plt
from tqdm import tqdm

import gym
import gym_anytrading

from stable_baselines3 import A2C,DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
df = gym_anytrading.datasets.STOCKS_GOOGL.copy()
df = df.reset_index()

print(df.head(5))
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv(df, 20)])

        Date        Open        High         Low       Close   Adj Close  \
0 2009-05-22  198.528534  199.524521  196.196198  196.946945  196.946945   
1 2009-05-26  196.171173  202.702698  195.195190  202.382385  202.382385   
2 2009-05-27  203.023026  206.136139  202.607605  202.982986  202.982986   
3 2009-05-28  204.544540  206.016022  202.507507  205.405411  205.405411   
4 2009-05-29  206.261261  208.823822  205.555557  208.823822  208.823822   

    Volume  
0  3433700  
1  6202700  
2  6062500  
3  5332200  
4  5291100  


In [None]:
frame = df.loc[0:19, ['Open','High','Low','Close','Volume']]
print(frame.shape)

(20, 5)


In [None]:
policy_kwargs = dict(net_arch=[64, 64])
model = DQN('MlpPolicy', env, 
            #learning_rate=5e-4,
            #batch_size=128,
            #buffer_size=2000,
            #learning_starts=200,
            exploration_fraction=0.9,
            target_update_interval=2000,
            verbose=1, policy_kwargs=policy_kwargs)
model.learn(total_timesteps=1e4)
model.save("dqn_stock")

Using cpu device


AttributeError: ignored

In [None]:
def model_test(env, model, count, render=False):
    num_episodes = count
    rewards_all_episodes = []

    for episode in range(num_episodes):
      observation = env.reset()
      while True:
          action, _states = model.predict(observation)
          observation, reward, done, info = env.step(action)

          if render:
                env.render()
          if done:
              if render:
                  env.render()
              break

In [None]:
model_test(env, model, 1, True)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2277
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2278
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2279
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2280
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2281
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 2282
Balance: 10000
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sal

KeyboardInterrupt: ignored