In [None]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [None]:
df = pd.read_csv('BTC.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index('datetime', inplace=True)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
data = df[['Close']].values
data

In [None]:
data.mean()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

In [None]:
SEQ_LENGTH = 60
EPISODES = 1000
GAMMA = 0.95
EPSILON = 1.0
EPSILON_DECAY = 0.995
EPSILON_MIN = 0.01
LEARNING_RATE = 0.001
MEMORY_SIZE = 2000
BATCH_SIZE = 32

In [None]:
def create_state_sequences(data, seq_length):
    states = []
    for i in range(len(data) - seq_length):
        states.append(data[i:i + seq_length])
    return np.array(states)

In [None]:
states = create_state_sequences(data_scaled, SEQ_LENGTH)
actions = ["BUY", "SELL", "HOLD"]

In [None]:
def build_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(SEQ_LENGTH, 1)),
        Dense(64, activation='relu'),
        Dense(len(actions), activation='linear')
    ])
    model.compile(loss='mse', optimizer=Adam(learning_rate=LEARNING_RATE))
    return model

In [None]:
class DQNAgent:
    def __init__(self):
        self.model = build_model()
        self.memory = deque(maxlen=MEMORY_SIZE)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= EPSILON:
            return random.randint(0, len(actions) - 1)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def train(self):
        if len(self.memory) < BATCH_SIZE:
            return

        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target += GAMMA * np.amax(self.model.predict(next_state, verbose=0)[0])

            q_values = self.model.predict(state, verbose=0)
            q_values[0][action] = target
            self.model.fit(state, q_values, epochs=1, verbose=0)

In [None]:
agent = DQNAgent()

In [None]:
for episode in range(EPISODES):
    state_idx = 0
    total_reward = 0
    state = states[state_idx].reshape(1, SEQ_LENGTH, 1)

    while state_idx < len(states) - 1:
        action = agent.act(state)
        next_state = states[state_idx + 1].reshape(1, SEQ_LENGTH, 1)

        price_diff = data[state_idx + SEQ_LENGTH] - data[state_idx + SEQ_LENGTH - 1]
        if action == 0: 
            reward = price_diff
        elif action == 1: 
            reward = -price_diff
        else: 
            reward = 0

        total_reward += reward
        done = state_idx == len(states) - 2
        agent.remember(state, action, reward, next_state, done)

        state_idx += 1
        state = next_state

    agent.train()
    
    global EPSILON
    if EPSILON > EPSILON_MIN:
        EPSILON = float(EPSILON * EPSILON_DECAY)

    print(f"Episode {episode+1}/{EPISODES}, Reward: {total_reward:.2f}, Epsilon: {EPSILON:.3f}")

In [None]:
agent.model.save("bitcoin_rl_model.h5")