# FrostAura Plutus
### Mark 10
This iteration of the decision engine will consist of neural network-based architecture. A Deep Q-learning agent.

## Import Data 

In [51]:
# Load features from pickle generated by the ./parse_market_data notebook.
import pickle

model_file_path = './data/featurized_market_data.p'

with open(model_file_path, 'rb') as fp:
    featurized_market_data = pickle.load(fp)

In [52]:
# Configure the pair we will work with for testing.
pair_name = 'AAVE_BTC'
price_movement_df = featurized_market_data[pair_name]

## Environment Setup

In [53]:
%run ./fa.intelligence.notebooks/utilities/reinforcement_learning/environments/crypto_pair_trading_environment.ipynb

In [54]:
hours_of_memory = 24
env = CryptoPairTradingEnv(price_movement_df, pair_name, max_stake_count=1, memory_window_size=hours_of_memory)
states = env.reset()
done = False

  self.memory_shape = (self.memory_window_size, self.data.copy().drop(PriceMovementColumns.Time.value, 1).shape[1])
  return self.current_window.drop(PriceMovementColumns.Time.value, 1).to_numpy()


## Setup a Deep Reinforcement Learning Agent

In [55]:
# Environmental dependencies.
import os

In [56]:
# Tensorforce dependencies.
from tensorforce import Agent, Environment

In [57]:
# Keras dependencies.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [58]:
# Function to create a TensorForce agent and environment wrapper.
def create_tensorforce_agent(env_name, gym_environment, model_path='./data/models/tf.{}.{}'):
    gym_environment.reset()
    normalized_path = model_path.format(env_name.lower(), env_name.lower().replace(' ','_'))
    environment = Environment.create(environment=gym_environment)
    does_model_exist = os.path.exists(normalized_path)
    agent = None
    
    if does_model_exist:
        print(f'Loading existing model.')
        agent = Agent.load(directory=normalized_path, format='checkpoint', environment=environment)
    else:
        print(f'No directory "{normalized_path}" exists. Creating a new model.')

        agent = Agent.create(
            saver=dict(
                directory=normalized_path,
                frequency=50,
                max_checkpoints=5
            ),
            agent='tensorforce', 
            environment=environment, 
            update=64,
            optimizer=dict(optimizer='adam', learning_rate=1e-3),
            objective='policy_gradient', 
            memory=15000,
            reward_estimation=dict(horizon=20)
        )
    
    return agent, environment

In [59]:
# Function to create a Keras agent.
def create_keras_agent(env):
    nb_actions = env.action_space.n
    obs_shape = (1,) + env.observation_space.shape
    
    print(f'Input shape for the network is {obs_shape}')

    # Next, we build a very simple model. This is the network structure.
    model = Sequential()
    model.add(Flatten(input_shape=obs_shape))
    model.add(Dense(28))
    model.add(Activation('relu'))
    model.add(Dense(28))
    model.add(Activation('relu'))
    model.add(Dense(28))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy)
    dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
    
    return dqn

In [60]:
# Create a TensorForce agent and environment wrapper. 
#tf_agent, tf_environment = create_tensorforce_agent(pair_name, env)

In [61]:
# Create a Keras agent.
keras_agent = create_keras_agent(env)

Input shape for the network is (1, 24, 36)
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_6 (Flatten)          (None, 864)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 28)                24220     
_________________________________________________________________
activation_24 (Activation)   (None, 28)                0         
_________________________________________________________________
dense_25 (Dense)             (None, 28)                812       
_________________________________________________________________
activation_25 (Activation)   (None, 28)                0         
_________________________________________________________________
dense_26 (Dense)             (None, 28)                812       
_________________________________________________________________
activation_

## Train the Model

In [62]:
keras = True

if keras:
    episode_count = 100

    for ei in range(episode_count):
        # Start the training.
        keras_agent.fit(env, nb_steps=50000, visualize=False, verbose=2)
        # Render after each episode.
        env.render()
        # Persist the model state.
        keras_agent.save_weights(f'./data/models/keras.{pair_name}.h5f', overwrite=True)

Training for 50000 steps ...


  return self.current_window.drop(PriceMovementColumns.Time.value, 1).to_numpy()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.loc[index, col] = value
  return self.current_window.drop(PriceMovementColumns.Time.value, 1).to_numpy()


ValueError: Error when checking input: expected flatten_6_input to have shape (1, 24, 36) but got array with shape (1, 25, 36)

In [None]:
tensorforce = False

if tensorforce:
    episode_count = 100

    for ei in range(episode_count):
        states = tf_environment.reset()
        
        while not done:
            actions = tf_agent.act(states=states)
            states, done, reward = tf_environment.execute(actions=actions)
            tf_agent.observe(terminal=done, reward=reward)

        print(f'Episode {ei + 1} Reward: {env.total_reward}. Balance: {env.balance}.')

## Evaluate the Model

In [None]:
if keras:
    keras_agent.test(env, nb_episodes=100, visualize=False)

In [None]:
if tensorforce:
    states = tf_environment.reset()

    while not done:
        actions = tf_agent.act(states=states, independent=True)
        states, done, reward = tf_environment.execute(actions=actions)

    print(f'Evaluation {ei + 1} Reward: {env.total_reward}. Balance: {env.balance}.')

# Next
### Mark 11
Add another dimension to the observable space to have all pairs available as to allow the agent to learn causal relationships between pairs. For example if ETH dips, BTC has a certain probability to respond to that in a certain way.