In [17]:
import numpy as np
import pandas as pd
import tensorflow as tf
import ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras import utils
from collections import deque
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
import random

## Data Exploration and Feature Generation


In [3]:
def addSMA(data, sma_start: int, sma_end: int, sma_step: int):
    sma_range = range(sma_start, sma_end, sma_step)

    for window in sma_range:
        # SMA/Close
        # relative value of SMA to closing price
        data.loc[:,f'SMA_{window}'] = ta.trend.SMAIndicator(data.Close, window=window).sma_indicator() / data.Close

    for i in range(len(sma_range)):
        for j in range(i+1, len(sma_range)):
            # (SMA_1 - SMA_2) / Close
            data.loc[:,f'SMA_DELTA_{sma_range[i]}_{sma_range[j]}'] = data.loc[:,f'SMA_{sma_range[i]}'] - data.loc[:,f'SMA_{sma_range[j]}']

    return data

In [4]:
def addEMA(data, ema_start: int, ema_end: int, ema_step: int):
    ema_range = range(ema_start, ema_end, ema_step)

    for window in ema_range:
        # EMA/Close
        # relative value of EMA to closing price
        data.loc[:,f'EMA_{window}'] = ta.trend.EMAIndicator(data.Close, window=window).ema_indicator() / data.Close

    for i in range(len(ema_range)):
        for j in range(i+1, len(ema_range)):
            # (EMA_1 - EMA_2) / Close
            data.loc[:,f'EMA_DELTA_{ema_range[i]}_{ema_range[j]}'] = data.loc[:,f'EMA_{ema_range[i]}'] - data.loc[:,f'EMA_{ema_range[j]}']

    return data

In [5]:
def feature_generation(
    data,
    sma_start: int = 20,
    sma_end  : int = 100,
    sma_step : int = 20,
    ema_start: int = 20,
    ema_end  : int = 100,
    ema_step : int = 20
    ):
    """
    Input
    --------
    X: pd.DataFrame


    Output
    --------
    X: pd.DataFrame
    """
    close = data.Close

    # SMA
    data = addSMA(data=data, sma_start=sma_start, sma_end=sma_end, sma_step=sma_step)

    # EMA
    data = addEMA(data=data, ema_start=ema_start, ema_end=ema_end, ema_step=ema_step)

    # RSI
    data['RSI'] = ta.momentum.RSIIndicator(data.Close).rsi()
  
    # MACD
    data['MACD'] = ta.trend.MACD(data.Close).macd()

    # ATR
    data['ATR'] = ta.volatility.AverageTrueRange(data.High, data.Low, data.Close).average_true_range()

    # BollingerBands
    upper = ta.volatility.BollingerBands(data.Close).bollinger_hband()
    lower = ta.volatility.BollingerBands(data.Close).bollinger_lband()

    data['BB_upper'] = (upper - close) / close
    data['BB_lower'] = (lower - close) / close
    data['BB_width'] = (upper - lower) / close

    # Garman Klass Volatility
    data['garman_klass_vol'] = ((np.log(data.High)-np.log(data.Low))**2)/2-(2*np.log(2)-1)*((np.log(data.Close)-np.log(data.Open))**2)

    # Cleansing
    data = data.dropna() 

    return data

In [6]:
def preprocess(data,
               PCA_components = 8):
  # Set index
  data.set_index('index', inplace=True)

  # Feature generation and remove na
  data_feature = feature_generation(data)
  
  # Price
  price = data_feature.Close.values

  # Feature
  remove_columns = ['Open',
                    'High',
                    'Low',
                    'Close']
  feature = data_feature.drop(remove_columns, axis=1)

  # normalize
  feature = normalize(feature)

  # PCA
  pca = PCA(n_components=PCA_components)
  feature = pca.fit_transform(feature)
  
  return feature, price

## Trading Environment

In [7]:
class StockTradingEnv:
    def __init__(self,features,prices):
        self.features = features
        self.prices = prices
        self.reset()

    def reset(self):
        self.current_step = 0
        self.done = False
        self.state = self.prices[self.current_step:self.current_step+sequence_length]
        return self.state

    def step(self, action):
        if self.done:
            return self.state, 0, self.done

        reward = 0
        self.current_step += 1

        if self.current_step >= len(self.prices) - sequence_length:
            self.done = True

        next_state = self.prices[self.current_step:self.current_step+sequence_length]

        # Reward calculation (simplified)
        if action == 1:  # Buy
            reward = self.prices[self.current_step + sequence_length - 1] - self.prices[self.current_step + sequence_length - 2]
        elif action == 2:  # Sell
            reward = self.prices[self.current_step + sequence_length - 2] - self.prices[self.current_step + sequence_length - 1]

        return next_state, reward, self.done

## Q Learning

In [8]:
# Q-Network
def create_model(input_shape, action_space):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=action_space, activation='linear'))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
    return model

In [9]:
# Main training loop
def train(env, model):
    global epsilon
    for episode in range(num_episodes):
        state = env.reset()
        state = np.reshape(state, (1, sequence_length, 1))

        total_reward = 0
        while True:
            if np.random.rand() <= epsilon:
                action = random.randrange(3)  # Long, Short, Hold
            else:
                q_values = model.predict(state, verbose=0)
                action = np.argmax(q_values[0])

            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, (1, sequence_length, 1))
            total_reward += reward

            memory.append((state, action, reward, next_state, done))
            state = next_state

            if done:
                print(f"Episode: {episode+1}/{num_episodes}, Reward: {total_reward}")
                break

            if len(memory) > batch_size:
                batch = random.sample(memory, batch_size)
                for state, action, reward, next_state, done in batch:
                    target = reward
                    if not done:
                        target += gamma * np.amax(model.predict(next_state, verbose=0)[0])
                    target_f = model.predict(state, verbose=0)
                    target_f[0][action] = target
                    model.fit(state, target_f, epochs=1, verbose=0)

        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

## Hyperparameters

In [None]:
num_episodes = 200
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.1
epsilon_decay = 0.995
learning_rate = 0.001
batch_size = 32
memory_size = 2000
sequence_length = 10  # LSTM input sequence length

In [None]:
memory = deque(maxlen=memory_size)

# Main

In [11]:
data = pd.read_csv("../Binance Data/1h/BTCUSDT.csv")

In [12]:
feature, price = preprocess(data)