In [2]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers

# Define Bitcoin trading environment
class BitcoinTradingEnv(gym.Env):
    def __init__(self, df):
        super(BitcoinTradingEnv, self).__init__()
        self.df = df
        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(df.columns),), dtype=np.float32)
        self.current_step = 0
        self.scaler = MinMaxScaler()
        self.current_price = self.df.iloc[self.current_step]['Close']
        self.initial_worth = 10000
        self.balance = self.initial_worth
        self.btc_held = 0
        self.net_worth = self.balance

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_worth
        self.btc_held = 0
        self.net_worth = self.balance
        self.state = self._next_observation()
        return self.state

    def _next_observation(self):
        obs = self.df.iloc[self.current_step].values
        return obs

    def step(self, action):
        self.current_step += 1
        if self.current_step > len(self.df) - 1:
            self.current_step = 0  # reset

        self._take_action(action)

        reward = self._calculate_reward()

        self.state = self._next_observation()

        return self.state, reward, False, {}

    def _take_action(self, action):
        current_price = self.df.iloc[self.current_step]['Close']
        if action == 0:  # Buy
            btc_to_buy = self.balance / current_price
            self.balance -= btc_to_buy * current_price
            self.btc_held += btc_to_buy
        elif action == 1:  # Sell
            self.balance += self.btc_held * current_price
            self.btc_held = 0

        self.net_worth = self.balance + self.btc_held * current_price
        self.current_price = current_price

    def _calculate_reward(self):
        return self.net_worth - self.initial_worth

    def render(self, mode='human'):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'BTC held: {self.btc_held}')
        print(f'Net worth: {self.net_worth}')


# Load Bitcoin price data
df = pd.read_csv('bitcoin_price.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Normalize data
scaler = MinMaxScaler()
df[df.columns] = scaler.fit_transform(df[df.columns])

# Create Bitcoin trading environment
env = BitcoinTradingEnv(df)
env.render()

# Define deep Q-learning agent
num_actions = env.action_space.n
num_features = len(df.columns)
model = tf.keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(num_features,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_actions)
])

model.compile(optimizer='adam', loss='mse')

# Train the agent
NUM_EPISODES = 100
for episode in range(NUM_EPISODES):
    state = env.reset()
    done = False
    while not done:
        # Choose action
        action = np.argmax(model.predict(state.reshape(1, -1)))
        next_state, reward, done, _ = env.step(action)
        # Update model
        target = reward + 0.95 * np.max(model.predict(next_state.reshape(1, -1)))
        target_vec = model.predict(state.reshape(1, -1))[0]
        target_vec[action] = target
        model.fit(state.reshape(1, -1), target_vec.reshape(-1, num_actions), epochs=1, verbose=0)
        state = next_state


Step: 0
Balance: 10000
BTC held: 0
Net worth: 10000


  btc_to_buy = self.balance / current_price


