# Install Dependencies

In [None]:
#!pip install stable-baselines3 gym-anytrading gym
#!pip install finta
#!pip install quantstats

In [None]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv

# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from stable_baselines3 import A2C

#Quant Finance
from finta import TA
import quantstats as qs

# Processing libraries
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Source Trading Data

In [None]:
#Get Key for Alphavantage API
with open('key.txt', 'r') as f:
    key = f.readline()

In [None]:
#Download 3 months of data (Alphavantage only provides 1 month per call)
month_1 = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=AAPL&interval=5min&slice=year1month1&apikey={key}&datatype=csv'
month_2 = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=AAPL&interval=5min&slice=year1month2&apikey={key}&datatype=csv'
month_3 = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=AAPL&interval=5min&slice=year1month3&apikey={key}&datatype=csv'

#Grab the Data as CSV
csv_1 = pd.read_csv(month_1)
csv_2 = pd.read_csv(month_2)
csv_3 = pd.read_csv(month_3)

#Put it into a DataFrame
data_1 = pd.DataFrame(csv_1)
data_2 = pd.DataFrame(csv_2)
data_3 = pd.DataFrame(csv_3)

#Append the three months into one Dataframe
data = data_1.append([data_2, data_3])

data.head(3)

In [None]:
#Change the Time to proper type and set as index
data['time'] = pd.to_datetime(data['time'])
data.set_index('time', inplace=True)
data.head()

In [None]:
#Set the values so the earliest time is first and the latest is last in the dataframe
data.sort_values('time', ascending=True, inplace=True)
data.head()

In [None]:
#Title the columns for gym formatting reasons
data = data.rename(columns = {'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volume': 'Volume'})

In [None]:
data.shape

In [None]:
#Data tyeps neet to be numbers not objects
data.dtypes

# Add Custom Indicators

### SMA RSI OBV

In [None]:
#Create columns for technical indicators & add them to the dataframe
data['RSI'] = TA.RSI(data,16)
data['SMA'] = TA.SMA(data)
data['OBV'] = TA.OBV(data)
data.fillna(0, inplace=True)

# Build Gym

In [None]:
#Create a function to properly format data frame to be passed through environment
def signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:,'Low'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Low','Open','High','Volume','RSI','SMA','OBV',]].to_numpy()[start:end]
    return prices, signal_features

In [None]:
data.head()

In [None]:
#Replace default data process with custom function from above
class MyCustomEnv(StocksEnv):
    _process_data = signals
    
#Initialize an environment setting the window size and train data
env2 = MyCustomEnv(df=data, window_size=10, frame_bound=(10, 9000))

In [None]:
#Create a Dummy Vector of our environment
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

# Train Agent

In [None]:
# Create log dir to save path to store our callback and model
log_dir = "reinforcement_learning/tmp/"
save_path = os.path.join('reinforcement_learning','tmp')

In [None]:
#Stop training when the model reaches a stable training threshold
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=200, verbose=1)

#Check training & evaluate performance unitl threshold has been met
eval_callback = EvalCallback(env,
                            callback_on_new_best=stop_callback,
                            eval_freq=10000,
                            best_model_save_path=save_path,
                            verbose=1)

In [None]:
#initialize our model and train
model = A2C('MlpPolicy', env, verbose=1) 
model.learn(total_timesteps=1000000, callback=eval_callback)

# Test

In [None]:
#Load our trained model
model = A2C.load("reinforcement_learning/tmp/best_model.zip")

In [None]:
#Create a new environment with validation data
env = MyCustomEnv(df=data, window_size=10, frame_bound=(8900,11243))
obs = env.reset()

In [None]:
while True: 
    #obs = obs[np.newaxis, ...]
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        print("info", info)
        break

In [None]:
#Plot the results
plt.figure(figsize=(15,6),facecolor='w')
plt.cla()
env.render_all()
plt.show()

# Quant Reports

In [None]:
qs.extend_pandas()

net_worth = pd.Series(env.history['total_profit'], index=data.index[8900+1:])
returns = net_worth.pct_change().iloc[1:]

qs.reports.full(returns)

# Future Improvements

* ~~Create custom indicators for actions~~
* ~~Create a callback function to stop and save best training weights~~
* ~~Import a larger dataset for a longer period of time~~
* Try different RL Models like DQN or PPO
* ~~Bring in some quantstats to evaluate the performance better~~