# [Introduction to trading Crypto with Reinforcement Learning](https://github.com/pythonlessons/RL-Bitcoin-trading-bot)

* [Website](https://pylessons.com/)
* [YouTube](https://www.youtube.com/watch?v=QQp1KDlQ3v8)


In [1]:
import random
import numpy as np
import pandas as pd

from collections import deque


## Custom Bitcoin trading environment construction

In [2]:
class CryptoEnv:
    def __init__(self, df, initial_balance=1000, lookback_window_size=50):
        # Define action space and state size and other custom parameters
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df) - 1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = np.array([0, 1, 2])

        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values
        # For the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)

        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market (5 items) + Orders (5 items) history
        # For the last lookback_window_size steps
        self.state_size = (self.lookback_window_size, 10)


    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size=0):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0

        if env_steps_size > 0:  # For TRAIN dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size
            )
            self.end_step = self.start_step + env_steps_size

        else:  # For TEST dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i

            self.orders_history.append(
                [
                    self.balance,
                    self.net_worth,
                    self.crypto_bought,
                    self.crypto_sold,
                    self.crypto_held
                ]
            )

            self.market_history.append(
                [
                    self.df.loc[current_step, 'Open'],
                    self.df.loc[current_step, 'High'],
                    self.df.loc[current_step, 'Low'],
                    self.df.loc[current_step, 'Close'],
                    self.df.loc[current_step, 'Volume']
                ]
            )

        state = np.concatenate((self.market_history, self.orders_history), axis=1)
        return state


    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append(
            [
                self.df.loc[self.current_step, 'Open'],
                self.df.loc[self.current_step, 'High'],
                self.df.loc[self.current_step, 'Low'],
                self.df.loc[self.current_step, 'Close'],
                self.df.loc[self.current_step, 'Volume']
            ]
        )
        obs = np.concatenate((self.market_history, self.orders_history), axis=1)
        return obs


    # Execute one time step within the environment
    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0
        self.current_step += 1

        # Set the current price to a random price between open and close
        current_price = random.uniform(
            self.df.loc[self.current_step, 'Open'],
            self.df.loc[self.current_step, 'Close']
        )

        if action == 0:  # Hold
            pass

        elif action == 1 and self.balance > self.initial_balance / 100:
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought

        elif action == 2 and self.crypto_held > 0:
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append(
            [
                self.balance,
                self.net_worth,
                self.crypto_bought,
                self.crypto_sold,
                self.crypto_held
            ]
        )

        # Calculate reward
        reward = self.net_worth - self.prev_net_worth

        if self.net_worth <= self.initial_balance / 2:
            done = True
        else:
            done = False

        obs = self._next_observation()
        
        return obs, reward, done

    # render environment
    def render(self):
        print(f'Step {self.current_step}\n\t\tNET WORTH = {self.net_worth}')


## RANDOM Agent definition

In [3]:
def random_games(env, train_episodes=50, training_batch_size=500):
    average_net_worth = 0
    for episode in range(train_episodes):
        state = env.reset(env_steps_size=training_batch_size)

        while True:
            env.render()

            action = np.random.randint(3, size=1)[0]

            state, reward, done = env.step(action)

            if env.current_step == env.end_step:
                average_net_worth += env.net_worth
                print("=" * 80)
                print("Episode", episode, "\tNET WORTH =", env.net_worth.round(2))
                print("=" * 80)
                break

    print("\nAverage NET WORTH =", (average_net_worth / train_episodes).round(2))


## Input data

In [4]:
df = pd.read_csv('../data/input/BTCUSD_BitfinexHourly.csv')
df = df.sort_values('Date')
df.describe()


Unnamed: 0,Open,High,Low,Close,Volume
count,9995.0,9995.0,9995.0,9995.0,9995.0
mean,45724.813504,45961.944924,45472.808454,45722.487116,292.236423
std,9105.466516,9120.799954,9085.68225,9106.31162,459.148559
min,26925.0,27938.0,26591.0,26925.0,1.4e-05
25%,38791.514011,39016.0,38576.242984,38794.0,87.829247
50%,44065.0,44290.0,43851.0,44062.0,162.937913
75%,51584.0,51784.0,51323.0,51565.531955,316.169747
max,68601.0,68958.0,68450.0,68601.0,11111.599499


In [5]:
lookback_window_size = 50

train_df = df[:- 720 - lookback_window_size]  # 30 days
train_df.describe()


Unnamed: 0,Open,High,Low,Close,Volume
count,9225.0,9225.0,9225.0,9225.0,9225.0
mean,46473.543171,46715.23215,46216.005301,46472.111013,293.063091
std,9005.153996,9019.521442,8986.862932,9004.965542,454.638966
min,29446.0,29718.0,29247.0,29447.0,1.4e-05
25%,39177.0,39417.0,38968.478237,39178.0,90.329773
50%,45482.0,45761.526584,45224.0,45476.0,165.106459
75%,54380.0,54671.0,54068.0,54376.0,317.974903
max,68601.0,68958.0,68450.0,68601.0,11111.599499


In [6]:
test_df = df[- 720 - lookback_window_size:]  # 30 days
test_df.describe()


Unnamed: 0,Open,High,Low,Close,Volume
count,770.0,770.0,770.0,770.0,770.0
mean,36754.643145,36937.172629,36568.924157,36741.603418,282.332506
std,4185.091644,4120.048217,4250.939313,4188.924091,510.307184
min,26925.0,27938.0,26591.0,26925.0,0.006568
25%,32928.25,33167.785564,32633.25,32850.0,63.13446
50%,38584.5,38714.5,38471.467884,38582.0,128.874807
75%,39804.001012,39879.75,39697.25,39794.75,290.307817
max,42714.0,42989.0,42622.541465,42714.0,5814.788133


## Agent training

In [7]:
train_env = CryptoEnv(train_df, lookback_window_size=lookback_window_size)
test_env = CryptoEnv(test_df, lookback_window_size=lookback_window_size)
random_games(train_env, train_episodes=10, training_batch_size=500)


Step 820
		NET WORTH = 1000
Step 821
		NET WORTH = 1000.0
Step 822
		NET WORTH = 1001.0982542844348
Step 823
		NET WORTH = 1001.0982542844348
Step 824
		NET WORTH = 1003.1085342531746
Step 825
		NET WORTH = 1003.5287076489682
Step 826
		NET WORTH = 1003.5287076489682
Step 827
		NET WORTH = 1003.5287076489682
Step 828
		NET WORTH = 999.3004152274293
Step 829
		NET WORTH = 996.881413406088
Step 830
		NET WORTH = 996.881413406088
Step 831
		NET WORTH = 996.881413406088
Step 832
		NET WORTH = 996.881413406088
Step 833
		NET WORTH = 996.881413406088
Step 834
		NET WORTH = 996.881413406088
Step 835
		NET WORTH = 996.881413406088
Step 836
		NET WORTH = 996.881413406088
Step 837
		NET WORTH = 996.881413406088
Step 838
		NET WORTH = 996.881413406088
Step 839
		NET WORTH = 996.881413406088
Step 840
		NET WORTH = 996.881413406088
Step 841
		NET WORTH = 996.881413406088
Step 842
		NET WORTH = 996.881413406088
Step 843
		NET WORTH = 996.881413406088
Step 844
		NET WORTH = 996.881413406088
Step 845


Step 1072
		NET WORTH = 1030.8051794718024
Step 1073
		NET WORTH = 1030.8051794718024
Step 1074
		NET WORTH = 1030.8051794718024
Step 1075
		NET WORTH = 1030.8051794718024
Step 1076
		NET WORTH = 1030.8051794718024
Step 1077
		NET WORTH = 1030.8051794718024
Step 1078
		NET WORTH = 1007.5587456368992
Step 1079
		NET WORTH = 1007.5587456368992
Step 1080
		NET WORTH = 1007.5587456368992
Step 1081
		NET WORTH = 1004.8623084658416
Step 1082
		NET WORTH = 996.9697863399681
Step 1083
		NET WORTH = 996.8517428866948
Step 1084
		NET WORTH = 997.1953285796573
Step 1085
		NET WORTH = 983.9451259596282
Step 1086
		NET WORTH = 991.2085627338437
Step 1087
		NET WORTH = 991.2085627338437
Step 1088
		NET WORTH = 991.2085627338437
Step 1089
		NET WORTH = 1004.7303332347428
Step 1090
		NET WORTH = 1003.5146896012069
Step 1091
		NET WORTH = 1003.5146896012069
Step 1092
		NET WORTH = 1006.1672618711336
Step 1093
		NET WORTH = 998.5553732312707
Step 1094
		NET WORTH = 994.5577652952119
Step 1095
		NET WORT

Step 7848
		NET WORTH = 1036.5243967636686
Step 7849
		NET WORTH = 1036.5243967636686
Step 7850
		NET WORTH = 1036.4920260252766
Step 7851
		NET WORTH = 1036.4920260252766
Step 7852
		NET WORTH = 1036.4920260252766
Step 7853
		NET WORTH = 1036.4920260252766
Step 7854
		NET WORTH = 1036.4920260252766
Step 7855
		NET WORTH = 1031.3030698748987
Step 7856
		NET WORTH = 1030.3610149404533
Step 7857
		NET WORTH = 1032.6785327477555
Step 7858
		NET WORTH = 1021.5429922626139
Step 7859
		NET WORTH = 1021.5429922626139
Step 7860
		NET WORTH = 1018.678427218422
Step 7861
		NET WORTH = 1018.678427218422
Step 7862
		NET WORTH = 1018.2915704046547
Step 7863
		NET WORTH = 1018.2915704046547
Step 7864
		NET WORTH = 1015.1073274773283
Step 7865
		NET WORTH = 1011.0531637974093
Step 7866
		NET WORTH = 1004.9757672601067
Step 7867
		NET WORTH = 1004.9757672601067
Step 7868
		NET WORTH = 1004.9757672601067
Step 7869
		NET WORTH = 1003.8028782515933
Step 7870
		NET WORTH = 1003.8028782515933
Step 7871
		N

Step 4831
		NET WORTH = 966.7384968454036
Step 4832
		NET WORTH = 966.7384968454036
Step 4833
		NET WORTH = 966.7384968454036
Step 4834
		NET WORTH = 966.7384968454036
Step 4835
		NET WORTH = 966.7384968454036
Step 4836
		NET WORTH = 966.7384968454036
Step 4837
		NET WORTH = 966.7384968454036
Step 4838
		NET WORTH = 966.7384968454036
Step 4839
		NET WORTH = 966.7384968454036
Step 4840
		NET WORTH = 966.7384968454036
Step 4841
		NET WORTH = 964.0515302025268
Step 4842
		NET WORTH = 961.0612424881178
Step 4843
		NET WORTH = 961.1672604602563
Step 4844
		NET WORTH = 956.2891784536054
Step 4845
		NET WORTH = 953.841769189496
Step 4846
		NET WORTH = 951.6428363694054
Step 4847
		NET WORTH = 962.0066592354215
Step 4848
		NET WORTH = 985.0982816571035
Step 4849
		NET WORTH = 992.0456177469232
Step 4850
		NET WORTH = 992.0456177469232
Step 4851
		NET WORTH = 992.0456177469232
Step 4852
		NET WORTH = 986.7636320602699
Step 4853
		NET WORTH = 986.7636320602699
Step 4854
		NET WORTH = 986.7636320

Step 6279
		NET WORTH = 931.0373171543682
Step 6280
		NET WORTH = 931.0373171543682
Step 6281
		NET WORTH = 931.0373171543682
Step 6282
		NET WORTH = 931.0373171543682
Step 6283
		NET WORTH = 931.0373171543682
Step 6284
		NET WORTH = 928.872109998816
Step 6285
		NET WORTH = 933.7670154083694
Step 6286
		NET WORTH = 934.4113695389947
Step 6287
		NET WORTH = 934.2583594422601
Step 6288
		NET WORTH = 935.4152577705585
Step 6289
		NET WORTH = 934.2651035469893
Step 6290
		NET WORTH = 933.305394565064
Step 6291
		NET WORTH = 928.4075047906422
Step 6292
		NET WORTH = 932.7280858606175
Step 6293
		NET WORTH = 939.9247780160968
Step 6294
		NET WORTH = 939.9247780160968
Step 6295
		NET WORTH = 939.9247780160968
Step 6296
		NET WORTH = 939.9247780160968
Step 6297
		NET WORTH = 939.9247780160968
Step 6298
		NET WORTH = 939.9247780160968
Step 6299
		NET WORTH = 939.9247780160968
Step 6300
		NET WORTH = 939.9247780160968
Step 6301
		NET WORTH = 939.9247780160968
Step 6302
		NET WORTH = 939.92477801

Step 5460
		NET WORTH = 1074.3300356902428
Step 5461
		NET WORTH = 1069.5235075411354
Step 5462
		NET WORTH = 1069.6560683924913
Step 5463
		NET WORTH = 1069.9833988665903
Step 5464
		NET WORTH = 1070.226181158332
Step 5465
		NET WORTH = 1074.4394493771072
Step 5466
		NET WORTH = 1082.9696622886834
Step 5467
		NET WORTH = 1082.9696622886834
Step 5468
		NET WORTH = 1082.9696622886834
Step 5469
		NET WORTH = 1080.0079013453328
Step 5470
		NET WORTH = 1080.4971821911533
Step 5471
		NET WORTH = 1078.6056367061065
Step 5472
		NET WORTH = 1074.761398284277
Step 5473
		NET WORTH = 1073.818755192685
Step 5474
		NET WORTH = 1070.4748464581523
Step 5475
		NET WORTH = 1070.4748464581523
Step 5476
		NET WORTH = 1070.4748464581523
Step 5477
		NET WORTH = 1070.5647593600618
Step 5478
		NET WORTH = 1073.3114587748182
Step 5479
		NET WORTH = 1073.8166431450397
Step 5480
		NET WORTH = 1071.5717340482383
Step 5481
		NET WORTH = 1069.5443098275614
Step 5482
		NET WORTH = 1089.5837376187415
Step 5483
		NE

Step 8403
		NET WORTH = 1098.0956889065526
Step 8404
		NET WORTH = 1097.174544575904
Step 8405
		NET WORTH = 1097.4640641482176
Step 8406
		NET WORTH = 1098.144514403021
Step 8407
		NET WORTH = 1100.4622679917431
Step 8408
		NET WORTH = 1100.1832309459212
Step 8409
		NET WORTH = 1099.944777621863
Step 8410
		NET WORTH = 1098.0436482335751
Step 8411
		NET WORTH = 1098.0436482335751
Step 8412
		NET WORTH = 1099.9322894058673
Step 8413
		NET WORTH = 1099.9322894058673
Step 8414
		NET WORTH = 1099.9322894058673
Step 8415
		NET WORTH = 1098.4238972429698
Step 8416
		NET WORTH = 1097.1195729030467
Step 8417
		NET WORTH = 1098.5786838159497
Step 8418
		NET WORTH = 1098.319158266623
Step 8419
		NET WORTH = 1098.3246152691734
Step 8420
		NET WORTH = 1098.3246152691734
Step 8421
		NET WORTH = 1098.3246152691734
Step 8422
		NET WORTH = 1100.4660058222714
Step 8423
		NET WORTH = 1104.69170975273
Step 8424
		NET WORTH = 1104.69170975273
Step 8425
		NET WORTH = 1103.9059519764628
Step 8426
		NET WOR

Step 1883
		NET WORTH = 878.6213626178757
Step 1884
		NET WORTH = 878.6213626178757
Step 1885
		NET WORTH = 878.6213626178757
Step 1886
		NET WORTH = 878.0235681504006
Step 1887
		NET WORTH = 878.0235681504006
Step 1888
		NET WORTH = 878.0235681504006
Step 1889
		NET WORTH = 878.0235681504006
Step 1890
		NET WORTH = 878.0235681504006
Step 1891
		NET WORTH = 878.0235681504006
Step 1892
		NET WORTH = 882.5154048216931
Step 1893
		NET WORTH = 882.5154048216931
Step 1894
		NET WORTH = 922.0864777894765
Step 1895
		NET WORTH = 922.0864777894765
Step 1896
		NET WORTH = 919.8700251359996
Step 1897
		NET WORTH = 919.8700251359996
Step 1898
		NET WORTH = 919.8700251359996
Step 1899
		NET WORTH = 922.3931147919895
Step 1900
		NET WORTH = 918.7017440450443
Step 1901
		NET WORTH = 918.3969250870151
Step 1902
		NET WORTH = 918.3969250870151
Step 1903
		NET WORTH = 918.3969250870151
Step 1904
		NET WORTH = 918.3969250870151
Step 1905
		NET WORTH = 918.3969250870151
Step 1906
		NET WORTH = 918.396925

Step 5884
		NET WORTH = 1008.2097182146562
Step 5885
		NET WORTH = 1008.2097182146562
Step 5886
		NET WORTH = 1008.2097182146562
Step 5887
		NET WORTH = 1009.4275044035588
Step 5888
		NET WORTH = 1010.3808597623431
Step 5889
		NET WORTH = 1008.5548119664419
Step 5890
		NET WORTH = 1008.5548119664419
Step 5891
		NET WORTH = 1008.5548119664419
Step 5892
		NET WORTH = 1008.5548119664419
Step 5893
		NET WORTH = 1008.5548119664419
Step 5894
		NET WORTH = 1008.5548119664419
Step 5895
		NET WORTH = 1008.5548119664419
Step 5896
		NET WORTH = 1008.5548119664419
Step 5897
		NET WORTH = 1008.5548119664419
Step 5898
		NET WORTH = 1008.5548119664419
Step 5899
		NET WORTH = 1008.5548119664419
Step 5900
		NET WORTH = 1012.5025593938586
Step 5901
		NET WORTH = 1010.7263214762028
Step 5902
		NET WORTH = 1007.9257856246729
Step 5903
		NET WORTH = 1002.5543901734818
Step 5904
		NET WORTH = 1001.022020994021
Step 5905
		NET WORTH = 999.8279701839167
Step 5906
		NET WORTH = 1003.6218203397767
Step 5907
		N

Step 6419
		NET WORTH = 1018.7677574693356
Step 6420
		NET WORTH = 1012.4569947719443
Step 6421
		NET WORTH = 1011.2167170732527
Step 6422
		NET WORTH = 1010.4877147322978
Step 6423
		NET WORTH = 1009.9677366198792
Step 6424
		NET WORTH = 1009.9677366198792
Step 6425
		NET WORTH = 1009.9677366198792
Step 6426
		NET WORTH = 1009.9677366198792
Step 6427
		NET WORTH = 1009.9677366198792
Step 6428
		NET WORTH = 1009.9677366198792
Step 6429
		NET WORTH = 1009.9677366198792
Step 6430
		NET WORTH = 1009.9677366198792
Step 6431
		NET WORTH = 1009.9677366198792
Step 6432
		NET WORTH = 1009.9677366198792
Step 6433
		NET WORTH = 1009.9677366198792
Step 6434
		NET WORTH = 1009.9677366198792
Step 6435
		NET WORTH = 1009.9677366198792
Step 6436
		NET WORTH = 1009.9677366198792
Step 6437
		NET WORTH = 1009.9677366198792
Step 6438
		NET WORTH = 1009.9677366198792
Step 6439
		NET WORTH = 1009.2525707685757
Step 6440
		NET WORTH = 1012.485704407278
Step 6441
		NET WORTH = 1008.6182781419451
Step 6442
		