# FrostAura Plutus
### Mark 10
This iteration of the decision engine will consist of neural network-based architecture. A Deep Q-learning agent.

## Import Data 

In [None]:
# Load features from pickle generated by the ./parse_market_data notebook.
import pickle

model_file_path = './data/featurized_market_data.p'

with open(model_file_path, 'rb') as fp:
    featurized_market_data = pickle.load(fp)

In [None]:
# Configure the pair we will work with for testing.
pair_name = 'AAVE_BTC'
price_movement_df = featurized_market_data[pair_name]

## Environment Setup

In [None]:
%run ./fa.intelligence.notebooks/utilities/reinforcement_learning/environments/crypto_pair_trading_environment.ipynb

In [None]:
hours_of_memory = 24
env = CryptoPairTradingEnv(price_movement_df, pair_name, max_stake_count=1, memory_window_size=hours_of_memory)
states = env.reset()
done = False

## Setup a Deep Reinforcement Learning Agent

In [None]:
# Environmental dependencies.
import os

In [None]:
# Tensorforce dependencies.
from tensorforce import Agent, Environment

In [None]:
# Function to create a TensorForce agent and environment wrapper.
def create_tensorforce_agent(env_name, gym_environment, model_path='./data/models/tf.{}.{}'):
    gym_environment.reset()
    normalized_path = model_path.format(env_name.lower(), env_name.lower().replace(' ','_'))
    environment = Environment.create(environment=gym_environment)
    does_model_exist = os.path.exists(normalized_path)
    agent = None
    
    if does_model_exist:
        print(f'Loading existing model.')
        agent = Agent.load(directory=normalized_path, format='checkpoint', environment=environment)
    else:
        print(f'No directory "{normalized_path}" exists. Creating a new model.')

        agent = Agent.create(
            saver=dict(
                directory=normalized_path,
                frequency=50,
                max_checkpoints=5
            ),
            agent='tensorforce', 
            environment=environment, 
            update=64,
            optimizer=dict(optimizer='adam', learning_rate=1e-3),
            objective='policy_gradient', 
            memory=15000,
            reward_estimation=dict(horizon=20)
        )
    
    return agent, environment

In [None]:
# Create a TensorForce agent and environment wrapper. 
tf_agent, tf_environment = create_tensorforce_agent(pair_name, env)

## Train the Model

In [None]:
episode_count = 100

for ei in range(episode_count):
    states = tf_environment.reset()
    
    while not done:
        actions = tf_agent.act(states=states)
        states, done, reward = tf_environment.execute(actions=actions)
        tf_agent.observe(terminal=done, reward=reward)

    print(f'Episode {ei + 1} Reward: {env.total_reward}. Balance: {env.balance}.')

## Evaluate the Model

In [None]:
states = tf_environment.reset()

while not done:
    actions = tf_agent.act(states=states, independent=True)
    states, done, reward = tf_environment.execute(actions=actions)

print(f'Evaluation {ei + 1} Reward: {env.total_reward}. Balance: {env.balance}.')

# Next
### Mark 11
Add another dimension to the observable space to have all pairs available as to allow the agent to learn causal relationships between pairs. For example if ETH dips, BTC has a certain probability to respond to that in a certain way.