In [53]:
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np

MAX_STEPS = 20000
class TradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(TradingEnv, self).__init__()
        self.last_sig = 0
        self.usdt_holdings = 1000
        self.btc_holdings = 0
        self.current_step = 0
        self.prev_usdt,self.prev_btc = 0,0
        #self.init_btc = self.btc_holdings
        self.max_btc = 0
        self.max_usdt = 0
        self.btc_returns = 0
        self.current_act = 0
        

        self.df = df
        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = spaces.Box(
            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(6, 6), dtype=np.float16)
        

    def _next_observation(self):
        # Get the stock data points for the last 5 days and scale to between 0-1
        frame = np.array([
            self.df["Open"].iloc[self.current_step],
            self.df["High"].iloc[self.current_step],
            self.df["Low"].iloc[self.current_step],
            self.df["Close"].iloc[self.current_step],
            self.df["Volume"].iloc[self.current_step],
        ])

        # Append additional data and scale each value to between 0-1
        #print(self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE))
        """obs = np.append(frame, [[
            self.balance / 1,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
        ]], axis=0)"""
        return frame

    def _take_action(self, action):
        # Set the current price to a random price within the time step
        current_price = random.uniform(self.df["Open"].iloc[self.current_step], self.df["Close"].iloc[self.current_step])
        #print(current_price,self.df["Open"].iloc[self.current_step])
        action_type = action[0]
        self.current_act = action_type
        amount = action[1]
        #BUY condition
        if (action_type > 0 and self.usdt_holdings !=0 and self.last_sig!=action_type ):
            
            # Buy amount % of balance in shares
            total_possible = float(self.usdt_holdings / current_price)
            self.usdt_holdings = self.usdt_holdings -(total_possible*current_price)
            total_possible = round(total_possible,5)
            #update balances
            self.btc_holdings = total_possible
            if total_possible > self.max_btc:
                self.max_btc = self.btc_holdings
            print("buying {}btc at {}".format(total_possible,str(current_price)))
            print("{} remaining usdt".format(self.usdt_holdings))
            print("\n====\n")
            

        elif (action_type < 0 and self.btc_holdings !=0 and self.last_sig!=action_type):
            # Buy amount % of balance in shares
            total_possible = float(self.btc_holdings)#/float(last_price["bidPrice"])
            
            #update balances
            self.btc_holdings = self.btc_holdings -self.btc_holdings
            self.usdt_holdings = float(total_possible*current_price)
            if self.usdt_holdings > self.max_usdt:
                self.max_usdt = self.usdt_holdings
            print("Selling {}btc at {}".format(total_possible,str(current_price)))
            print("{} remaining usdt".format(self.usdt_holdings))
            print("\n====\n")
            
            
        self.last_sig  = action[0]
        if(self.btc_holdings!=0):
            self.btc_net = (self.btc_holdings/self.init_btc)*100

        if self.btc_holdings > self.max_btc:
            self.max_btc = self.btc_holdings
        if self.btc_holdings>0:
            curr_profit = (self.btc_holdings/ self.init_btc)
            #print(self.btc_holdings,self.init_btc)
        else:
            #print("USDT: {}  Init btc: {}".format(self.usdt_holdings,self.init_btc))
            if(self.usdt_holdings != self.prev_usdt):
                print("USDT: {}  Init btc: {}".format(self.usdt_holdings,self.init_btc))
            self.prev_usdt,self.prev_btc = self.usdt_holdings,self.btc_holdings
            #print("Current profit: ",curr_profit)


    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        if self.current_step > len(self.df['Open'].values) - 6:
            self.current_step = 0

        delay_modifier = (self.current_step / MAX_STEPS)

        #reward = self.btc_net
        done = (self.btc_holdings/self.init_btc == 0 and self.usdt_holdings==0)

        obs = self._next_observation()

        return obs,  done, {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.usdt_holdings = 1000
        self.btc_holdings = 0
        self.max_usdt = self.usdt_holdings
        self.max_btc = 0
        self.max_usdt = 0
        self.btc_returns = 0
        self.current_act = 0
        self.init_btc = (self.usdt_holdings/df['Open'].iloc[self.current_step])

        # Set the current step to a random point within the data frame
        #print(self.df.columns)
        self.current_step = random.randint(
            0, len(self.df['Open'].values) - 6)

        return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = (self.btc_holdings/ self.init_btc)
        if(profit>0):
            profit*=100
        else:
            profit = (1-profit)
        max_curr = (self.max_btc*self.df['Open'].iloc[-1])
        
        
        print(f'Step: {self.current_step}')
        print(f'BTC Balance: {self.btc_holdings}')
        print("USDT Balance: {}".format(self.usdt_holdings))
        print('Max usdt: {}'.format(self.max_usdt))
        print(f'Max btc: {self.max_btc}')
        print('Max btc at current price: {}'.format(max_curr))
        print(f'Profit: {profit}')
        #print(df.iloc[self.current_step].index)
        return {"USDT":self.usdt_holdings,"BTC":self.btc_holdings}

In [49]:
import gym
import json
import datetime as dt

#from env.TradingEnv import TradingEnv

import pandas as pd
import portmaker

end = int(portmaker.current_milli_time())
d_intv = 90
st = end-(86400000*d_intv)
coin = "BTCUSDT"
candle_k='5m'
data = portmaker.get_data(coin,st,end,candle_k)
t1 = portmaker.current_milli_time()
all_prices,df = portmaker.get_intv(data,coin,d_intv,candle_k)
print(df.columns)
all_sigs = df.set_index("Close time") 
# The algorithms require a vectorized environment to run


Creating: 3 day intervals over 90 days data 
Using 5m granualarity
2019-05-17 07:05:00 2019-05-20 07:05:00
2019-05-20 07:05:00 2019-05-23 07:05:00
2019-05-23 07:05:00 2019-05-26 07:05:00
2019-05-26 07:05:00 2019-05-29 07:05:00
2019-05-29 07:05:00 2019-06-01 07:05:00
2019-06-01 07:05:00 2019-06-04 07:05:00
2019-06-04 07:05:00 2019-06-07 07:05:00
2019-06-07 07:05:00 2019-06-10 08:05:00
2019-06-10 08:05:00 2019-06-13 08:05:00
2019-06-13 08:05:00 2019-06-16 08:05:00
2019-06-16 08:05:00 2019-06-19 08:05:00
2019-06-19 08:05:00 2019-06-22 08:05:00
2019-06-22 08:05:00 2019-06-25 08:05:00
2019-06-25 08:05:00 2019-06-28 08:05:00
2019-06-28 08:05:00 2019-07-01 08:05:00
2019-07-01 08:05:00 2019-07-04 08:05:00
2019-07-04 08:05:00 2019-07-07 08:05:00
2019-07-07 08:05:00 2019-07-10 08:05:00
2019-07-10 08:05:00 2019-07-13 08:05:00
2019-07-13 08:05:00 2019-07-16 08:05:00
2019-07-16 08:05:00 2019-07-19 08:05:00
2019-07-19 08:05:00 2019-07-22 08:05:00
2019-07-22 08:05:00 2019-07-25 08:05:00
2019-07-25 08

In [54]:
env = TradingEnv(df)
#model = PPO2(MlpPolicy, env, verbose=1)
#model.learn(total_timesteps=20000)
max_iter = 10000
obs = env.reset()
rc = 1
for i in range(max_iter):
    rand_agent = np.random.randint(-1,2)
    BWA_sig = env.df.Position.iloc[env.current_step]
    #print(rc==(max_iter),rc)
    if(rc%500==0 or rc==max_iter):
        print("\n========================\n")
        env.render()
    
    #random agent for testing
    #obs, done, info = env.step([rand_agent,1])
    #portmaker signal for benchmark
    obs, done, info = env.step([BWA_sig,1])
    rc+=1
    

USDT: 1000  Init btc: 0.13719717154311148
buying 0.12958btc at 7717.523881141874
0.0 remaining usdt

====

Selling 0.12958btc at 7718.001355869524
1000.0986156935729 remaining usdt

====

USDT: 1000.0986156935729  Init btc: 0.13719717154311148
buying 0.13004btc at 7690.924566552964
0.0 remaining usdt

====

Selling 0.13004btc at 7697.259497359661
1000.9516250366502 remaining usdt

====

USDT: 1000.9516250366502  Init btc: 0.13719717154311148
buying 0.13031btc at 7681.222508083003
0.0 remaining usdt

====

Selling 0.13031btc at 7688.442126131922
1001.8808934562509 remaining usdt

====

USDT: 1001.8808934562509  Init btc: 0.13719717154311148
buying 0.13093btc at 7651.901306044829
0.0 remaining usdt

====

Selling 0.13093btc at 7752.9728773092265
1015.096738826097 remaining usdt

====

USDT: 1015.096738826097  Init btc: 0.13719717154311148
buying 0.12978btc at 7821.926657043463
1.1368683772161603e-13 remaining usdt

====

Selling 0.12978btc at 7916.21031371552
1027.3657745140001 remaining

Selling 0.1315btc at 9260.577249765714
1217.7659083441915 remaining usdt

====

USDT: 1217.7659083441915  Init btc: 0.13719717154311148
buying 0.1345btc at 9054.194884414192
0.0 remaining usdt

====

Selling 0.1345btc at 9048.476098714336
1217.0200352770783 remaining usdt

====

USDT: 1217.0200352770783  Init btc: 0.13719717154311148
buying 0.1342btc at 9069.006279837613
0.0 remaining usdt

====

Selling 0.1342btc at 9068.785096005648
1217.030959883958 remaining usdt

====

USDT: 1217.030959883958  Init btc: 0.13719717154311148
buying 0.13446btc at 9051.343685266302
0.0 remaining usdt

====

Selling 0.13446btc at 9079.276990665594
1220.7995841648958 remaining usdt

====

USDT: 1220.7995841648958  Init btc: 0.13719717154311148
buying 0.13481btc at 9055.65290791069
0.0 remaining usdt

====

Selling 0.13481btc at 9056.345986492712
1220.8860024390826 remaining usdt

====

USDT: 1220.8860024390826  Init btc: 0.13719717154311148
buying 0.135btc at 9043.88552057925
0.0 remaining usdt

====

S

buying 0.13737btc at 11039.43752828016
0.0 remaining usdt

====

Selling 0.13737btc at 11015.987699811987
1513.2662303231725 remaining usdt

====

USDT: 1513.2662303231725  Init btc: 0.13719717154311148
buying 0.13786btc at 10977.046769671892
0.0 remaining usdt

====

Selling 0.13786btc at 10980.574273395403
1513.7819693302904 remaining usdt

====

USDT: 1513.7819693302904  Init btc: 0.13719717154311148
buying 0.13775btc at 10989.413058615839
0.0 remaining usdt

====

Selling 0.13775btc at 11378.679408495835
1567.4130885203012 remaining usdt

====

USDT: 1567.4130885203012  Init btc: 0.13719717154311148
buying 0.13787btc at 11368.90128628897
0.0 remaining usdt

====

Selling 0.13787btc at 11361.141537860978
1566.360583824893 remaining usdt

====

USDT: 1566.360583824893  Init btc: 0.13719717154311148
buying 0.13809btc at 11342.714480695764
-2.2737367544323206e-13 remaining usdt

====

Selling 0.13809btc at 11345.521776256393
1566.7031020832453 remaining usdt

====

USDT: 1566.703102083



Step: 12914
BTC Balance: 0.0
USDT Balance: 1718.7515261982633
Max usdt: 1806.8647423722357
Max btc: 0.1468
Max btc at current price: 1476.097488
Profit: 1.0
buying 0.15653btc at 10980.068654736997
2.2737367544323206e-13 remaining usdt

====

Selling 0.15653btc at 11006.890346375887
1722.9085459182177 remaining usdt

====

USDT: 1722.9085459182177  Init btc: 0.13719717154311148
buying 0.15576btc at 11061.307284215443
0.0 remaining usdt

====

Selling 0.15576btc at 11115.90941301804
1731.41405017169 remaining usdt

====

USDT: 1731.41405017169  Init btc: 0.13719717154311148
buying 0.15747btc at 10995.07645570592
0.0 remaining usdt

====

Selling 0.15747btc at 10993.260569293245
1731.1087418466072 remaining usdt

====

USDT: 1731.1087418466072  Init btc: 0.13719717154311148
buying 0.15754btc at 10988.190151755136
0.0 remaining usdt

====

Selling 0.15754btc at 10967.860196030348
1727.876695282621 remaining usdt

====

USDT: 1727.876695282621  Init btc: 0.13719717154311148
buying 0.15755

buying 0.17687btc at 11130.613179398177
0.0 remaining usdt

====

Selling 0.17687btc at 11116.787680980831
1966.2262371350796 remaining usdt

====

USDT: 1966.2262371350796  Init btc: 0.13719717154311148
buying 0.17668btc at 11128.566073770007
0.0 remaining usdt

====

Selling 0.17668btc at 11126.966688012437
1965.9124744380374 remaining usdt

====

USDT: 1965.9124744380374  Init btc: 0.13719717154311148
buying 0.17623btc at 11155.148513341965
0.0 remaining usdt

====

Selling 0.17623btc at 11128.17775341842
1961.1187654849282 remaining usdt

====

USDT: 1961.1187654849282  Init btc: 0.13719717154311148
buying 0.17678btc at 11093.819816846788
0.0 remaining usdt

====

Selling 0.17678btc at 11079.762059952067
1958.6803369583263 remaining usdt

====

USDT: 1958.6803369583263  Init btc: 0.13719717154311148
buying 0.17728btc at 11048.611574988716
0.0 remaining usdt

====

Selling 0.17728btc at 11043.254211618263
1957.7481066356856 remaining usdt

====

USDT: 1957.7481066356856  Init btc: 0

Selling 0.20467btc at 11320.739057388231
2317.015662875649 remaining usdt

====

USDT: 2317.015662875649  Init btc: 0.13719717154311148
buying 0.20493btc at 11306.433663199492
0.0 remaining usdt

====

Selling 0.20493btc at 11285.543199150034
2312.746367801816 remaining usdt

====

USDT: 2312.746367801816  Init btc: 0.13719717154311148
buying 0.20732btc at 11155.369600698898
0.0 remaining usdt

====

Selling 0.20732btc at 11161.477433687898
2313.997501552175 remaining usdt

====

USDT: 2313.997501552175  Init btc: 0.13719717154311148
buying 0.20723btc at 11166.260189351655
0.0 remaining usdt

====

Selling 0.20723btc at 11155.032458473805
2311.6573763695264 remaining usdt

====

USDT: 2311.6573763695264  Init btc: 0.13719717154311148
buying 0.20727btc at 11153.083808095975
0.0 remaining usdt

====

Selling 0.20727btc at 11142.727089650582
2309.553043871876 remaining usdt

====

USDT: 2309.553043871876  Init btc: 0.13719717154311148
buying 0.20722btc at 11145.525710572238
0.0 remaining 