In [26]:
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np

MAX_STEPS = 20000
class TradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(TradingEnv, self).__init__()
        self.last_sig = 0
        self.usdt_holdings = 1000
        self.btc_holdings = 0
        self.current_step = 0
        #self.init_btc = self.btc_holdings
        self.max_btc = 0
        self.max_usdt = 0
        self.btc_returns = 0
        self.current_act = 0
        

        self.df = df
        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = spaces.Box(
            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(6, 6), dtype=np.float16)
        

    def _next_observation(self):
        # Get the stock data points for the last 5 days and scale to between 0-1
        frame = np.array([
            self.df["Open"].iloc[self.current_step],
            self.df["High"].iloc[self.current_step],
            self.df["Low"].iloc[self.current_step],
            self.df["Close"].iloc[self.current_step],
            self.df["Volume"].iloc[self.current_step],
        ])

        # Append additional data and scale each value to between 0-1
        #print(self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE))
        """obs = np.append(frame, [[
            self.balance / 1,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
        ]], axis=0)"""
        return frame

    def _take_action(self, action):
        # Set the current price to a random price within the time step
        current_price = random.uniform(self.df["Open"].iloc[self.current_step], self.df["Close"].iloc[self.current_step])
        #print(current_price,self.df["Open"].iloc[self.current_step])
        action_type = action[0]
        self.current_act = action_type
        amount = action[1]
        #BUY condition
        if (action_type > 0 and self.usdt_holdings !=0 and self.last_sig!=action_type ):
            
            # Buy amount % of balance in shares
            total_possible = float(self.usdt_holdings / current_price)
            self.usdt_holdings = self.usdt_holdings -(total_possible*current_price)
            total_possible = round(total_possible,5)
            #update balances
            self.btc_holdings = total_possible
            if total_possible > self.max_btc:
                self.max_btc = self.btc_holdings
            print("buying {}btc at {}".format(total_possible,str(current_price)))
            print("{} remaining usdt".format(self.usdt_holdings))
            print("\n====\n")
            

        elif (action_type < 0 and self.btc_holdings !=0 and self.last_sig!=action_type):
            # Buy amount % of balance in shares
            total_possible = float(self.btc_holdings)#/float(last_price["bidPrice"])
            
            #update balances
            self.btc_holdings = self.btc_holdings -self.btc_holdings
            self.usdt_holdings = float(total_possible*current_price)
            if self.usdt_holdings > self.max_usdt:
                self.max_usdt = self.usdt_holdings
            print("Selling {}btc at {}".format(total_possible,str(current_price)))
            print("{} remaining usdt".format(self.usdt_holdings))
            print("\n====\n")
            
            
        self.last_sig  = action[0]
        if(self.btc_holdings!=0):
            self.btc_net = (self.btc_holdings/self.init_btc)*100

        if self.btc_holdings > self.max_btc:
            self.max_btc = self.btc_holdings
        if self.btc_holdings>0:
            curr_profit = (self.btc_holdings/ self.init_btc)
            print(self.btc_holdings,self.init_btc)
            print("Current profit: ",curr_profit)


    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        if self.current_step > len(self.df['Open'].values) - 6:
            self.current_step = 0

        delay_modifier = (self.current_step / MAX_STEPS)

        #reward = self.btc_net
        done = (self.btc_holdings/self.init_btc == 0 and self.usdt_holdings==0)

        obs = self._next_observation()

        return obs,  done, {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.usdt_holdings = 1000
        self.btc_holdings = 0
        self.max_usdt = self.usdt_holdings
        self.max_btc = 0
        self.max_usdt = 0
        self.btc_returns = 0
        self.current_act = 0
        self.init_btc = (self.usdt_holdings/df['Open'].iloc[self.current_step])

        # Set the current step to a random point within the data frame
        #print(self.df.columns)
        self.current_step = random.randint(
            0, len(self.df['Open'].values) - 6)

        return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = (self.btc_holdings/ self.init_btc)
        if(profit>0):
            profit*=100
        else:
            profit = (1-profit)
        max_curr = (self.max_btc*self.df['Open'].iloc[-1])
        

        print(f'Step: {self.current_step}')
        print(f'BTC Balance: {self.btc_holdings}')
        print('Max usdt: {}'.format(self.max_usdt))
        print(f'Max btc: {self.max_btc}')
        print('Max btc at current price: {}'.format(max_curr))
        print(f'Profit: {profit}')

In [2]:
import gym
import json
import datetime as dt

#from env.TradingEnv import TradingEnv

import pandas as pd
import portmaker

end = int(portmaker.current_milli_time())
d_intv = 9
st = end-(86400000*d_intv)
coin = "BTCUSDT"
candle_k='5m'
data = portmaker.get_data(coin,st,end,candle_k)
t1 = portmaker.current_milli_time()
all_prices,df = portmaker.get_intv(data,coin,d_intv,candle_k)
print(df.columns)
all_sigs = df.set_index("Close time") 
# The algorithms require a vectorized environment to run


Creating: 3 day intervals over 9 days data 
Using 5m granualarity
2019-08-06 05:20:00 2019-08-09 05:20:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices['-1'] = [prices["Close"][:i].mean() for i in range(len(prices))]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices['2'] = [k+v for k,v in zip(prices['-1'], vol)]#addition
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices['1'] = ch
A value is trying to be set on a copy of a slice from a 

2019-08-09 05:20:00 2019-08-12 05:20:00
2019-08-12 05:20:00 2019-08-14 22:17:42.511000
Index(['Open', 'High', 'Low', 'Close', 'Close time', 'Volume', '-1', '2', '1',
       '-2', 'Short', 'Long', 'Position', 'pos_diff'],
      dtype='object')


In [27]:
env = TradingEnv(df)
#model = PPO2(MlpPolicy, env, verbose=1)
#model.learn(total_timesteps=20000)
max_iter = 100
obs = env.reset()
rc = 1
for i in range(max_iter):
    rand_agent = np.random.randint(-1,2)
    #print(rc==(max_iter),rc)
    if(rc%50==0 or rc==max_iter):
        print("\n========================\n")
        env.render()
    #if(rand_agent ==0):
    #    continue
    #action, _states = model.predict(obs)
    obs, done, info = env.step([rand_agent,1])
    rc+=1
    

buying 0.08475btc at 11799.108074344049
0.0 remaining usdt

====

0.08475 0.08511109125185648
Current profit:  0.9957574125000002
Selling 0.08475btc at 11810.741773055153
1000.9603652664243 remaining usdt

====

buying 0.08475btc at 11811.213507287488
0.0 remaining usdt

====

0.08475 0.08511109125185648
Current profit:  0.9957574125000002
Selling 0.08475btc at 11810.278877255656
1000.921134847417 remaining usdt

====

buying 0.08481btc at 11802.049157025567
-1.1368683772161603e-13 remaining usdt

====

0.08481 0.08511109125185648
Current profit:  0.9964623735
Selling 0.08481btc at 11802.494656622739
1000.9695718281745 remaining usdt

====

buying 0.08489btc at 11791.415116424516
0.0 remaining usdt

====

0.08489 0.08511109125185648
Current profit:  0.9974023215
Selling 0.08489btc at 11792.578191886272
1001.0719627092255 remaining usdt

====

buying 0.08503btc at 11772.595985533568
-1.1368683772161603e-13 remaining usdt

====

0.08503 0.08511109125185648
Current profit:  0.9990472305
S