# **Influence Calculation Outline**

In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib

Necessary imports.

In [None]:
import gin
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import models
import utils

from dqn import DQN
from circle import CircleEnv

# Load configuration for DQN and model
gin.parse_config_file('configs/influence/influence.gin')

First we train our __oracle network__.

In [None]:
%%time
episodes = 80
steps = 25

oracle = DQN()
oracle.model.save('oracle_init_model.h5')
oracle.target_model.save('oracle_init_target_model.h5')

env = CircleEnv()
for episode in range(episodes):
    state = env.reset()
    start = state
    total_reward = 0
    for step in range(steps):
#         env.render()
        action = oracle.act(state)
        next_state, reward, done = env.step(action)
        oracle.remember((state, action, reward, next_state, episode, step, done))
        oracle.replay()
        state = next_state
        total_reward += reward
        
        if done or step==steps-1:
            print("Episode: {:2} | Start: ({:5.2f}, {:5.2f}) | Return: {:5.2f} | e: {:.4f}".format(episode, start[0], start[1], total_reward, oracle.epsilon))
            break

        if step%10==0 and step>0:
            if oracle.epsilon >= oracle.epsilon_min:
                oracle.epsilon *= oracle.epsilon_decay
            oracle.update_target_model()

oracle.save_training_data('oracle_training_data.h5')
oracle.model.save('oracle_model.h5')

Let's demo our oracle.

In [None]:
%matplotlib
demo_env = CircleEnv()
utils.demo_agent(oracle, demo_env, 10, 25)

We load in the necessary oracle data from training.

In [None]:
oracle_model = 'oracle_model.h5'
oracle_init_model = 'oracle_init_model.h5'
oracle_init_target_model = 'oracle_init_target_model.h5'
oracle_training_data = 'oracle_training_data.h5'

Loading the training data and setting up the test data.

In [None]:
training_data = pd.read_hdf(oracle_training_data, key='training')
%time test_data = utils.generate_agent_actions(oracle, n=10000)

print(training_data.info())
print('='*40)
print(test_data.info())

Inspect our training and test data.

Create a DataFrame with every unique state from the training data.

In [None]:
unique_states = training_data[['state_x', 'state_y']].drop_duplicates()
print(unique_states.info())

Now for the meat of the calculations: the __loop__. (Might want to consider creating an *influence function* and call apply on the rows of `unique_states` which may be faster than looping.

In [None]:
# Should return a DataFrame with influences for each unique_state
# unique_states.apply(utils.influence, {training_data: training_data, test_data: test_data})
# Combine unique_state and influence Series into a DataFrame (probably want this in or influence function...)

# TODO: Aggregate by taking the max influence per unique state.
for _, state in unique_states.iterrows():
    # Why is there a need to drop duplicates here?
    state_occurences = training_data[(training_data['state_x'] == state[0]) & 
                            (training_data['state_y'] == state[1])].drop_duplicates()

    for _, state_occurence in state_occurences.iterrows():
        episode, step = state_occurence['episode'], state_occurence['step']
        
        # Every state except those that occurs on or after the above step during the above episode.
        full_trace = training_data[(training_data['episode'] != episode) | 
                          (training_data['step'] < step)]
        # Every state except those that occur after the above step during the above episode.
        partial_trace = training_data[(training_data['episode'] != episode) | 
                             (training_data['step'] <= step)]
        
        # Setup our two agents to train on each of the modified training sets above.
        ft_agent = DQN()
        ft_agent.model.load_weights(oracle_init_model)
        ft_agent.target_model.load_weights(oracle_init_target_model)
        
        pt_agent = DQN()
        pt_agent.model.load_weights(oracle_init_model)
        pt_agent.target_model.load_weights(oracle_init_target_model)
        
        # Train our agents and get their optimal actions on testing data.
        utils.train_agent_offline(ft_agent, full_trace.to_numpy())
        utils.train_agent_offline(pt_agent, partial_trace.to_numpy())
        
        ft_agent_actions = utils.get_agent_actions(ft_agent, test_data[['state_x', 'state_y']])
        pt_agent_actions = utils.get_agent_actions(pt_agent, test_data[['state_x', 'state_y']])
        
        # Get accuracies.
        ft_agent_acc = utils.agent_accuracy(ft_agent_actions, test_data['action'].to_numpy())
        pt_agent_acc = utils.agent_accuracy(pt_agent_actions, test_data['action'].to_numpy())
        
        # Relative influence.
        delta_acc = ft_agent_acc - pt_agent_acc
        print("Influence of ({:5.2f}, {:5.2f}) at episode {:2}, step {:2}: {:.4f}".format(state_occurence['state_x'], 
                                                                        state_occurence['state_y'],
                                                                        state_occurence['episode'], 
                                                                        state_occurence['step'], 
                                                                        np.round(delta_acc, 4)))

Testing to make sure that our retraining step is training properly.

In [None]:
%%time
test_agent = DQN()
test_agent.model.load_weights(oracle_init_model)
test_agent.target_model.load_weights(oracle_init_target_model)

utils.train_agent_offline(test_agent, training_data.to_numpy())
test_agent_actions = utils.get_agent_actions(test_agent, test_data[['state_x', 'state_y']])
acc = utils.agent_accuracy(test_agent_actions, test_data['action'])
print(acc)
assert acc == 1.0