# DQN LSTM

In [12]:
# imports
# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.utils import set_random_seed

# Processing libraries
import pandas as pd
import random
import numpy as np

seed = 42

random.seed(seed)
np.random.seed(seed)
set_random_seed(seed)

# gym trading and finta
from gym_anytrading.envs import StocksEnv
from sklearn.preprocessing import MinMaxScaler
import os

## 1. Data import and data scaling

In [13]:
df_train = pd.read_csv('data/df_train.csv', index_col=0)
df_train.index = pd.to_datetime(df_train.index)

df_test = pd.read_csv('data/df_test.csv', index_col=0)
df_test.index = pd.to_datetime(df_test.index)

# Removing duplicates in index (if any)
df_train = df_train[~df_train.index.duplicated(keep='first')]
df_test = df_test[~df_test.index.duplicated(keep='first')]

scaler = MinMaxScaler(feature_range=(0, 1))

df_train = pd.DataFrame(scaler.fit_transform(df_train), columns=df_train.columns, index=df_train.index)

df_test = pd.DataFrame(scaler.transform(df_test), columns=df_test.columns, index=df_test.index)

In [None]:
# features = df_train.drop('Price', axis=1).columns.tolist()
features = ['rsi', 'ema', 'roc']

## 2. Create the environment

In [None]:
def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Price'].to_numpy()[start:end]
    signal_features = env.df.loc[:, features].to_numpy()[start:end]
    
    return prices, signal_features

In [None]:
class MyCustomEnv(StocksEnv):
    _process_data = add_signals
    
env_train = MyCustomEnv(df=df_train, window_size=12, frame_bound=(12,440473))

## 3. Build Environment and Train

In [None]:
save_path = os.path.join('Training', 'Models')

In [None]:
env_wrapped = lambda: env_train
env_wrapped = DummyVecEnv([env_wrapped])

In [None]:
from stable_baselines3 import DQN

model = DQN("MlpPolicy", env_wrapped, verbose=2)

In [None]:
train_timesteps = 10000
model.learn(total_timesteps=train_timesteps)

## 4. Evaluation

In [None]:
env_test = MyCustomEnv(df=df_test, window_size=12, frame_bound=(12, 224933))
obs = env_test.reset()[0]

while True: 
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, extra, done, info = env_test.step(action)
    if done:
        print("info", info)
        break
        
short_ticks, long_ticks = env_test.render_all()

## 5. Store the results, Create plots

In [None]:
from helper_code.metrics_calculation import *
model_name = 'test'

In [None]:
final_results = calculate_metrics_create_plots(env_train, env_test, df_test, model.losses, short_ticks, long_ticks, model_name)

In [None]:
final_results