In [None]:
from agent import KalmanSR
from environment import SimpleMDP
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from dynamic_programming import value_iteration
import seaborn as sns
%matplotlib notebook

# Set up the problem 

In [None]:
env = SimpleMDP(5)

In [None]:
env.create_graph()

In [None]:
plt.figure()

positions = {0:(0,0), 1:(1,0), 2:(2,0), 3:(3,0), 4:(4,0)}

env.show_graph(layout=positions)

In [None]:
optimal_policy, optimal_values = value_iteration(env)

In [None]:
optimal_policy

In [None]:
optimal_values

In [None]:
env.get_transition_matrix(optimal_policy)

In [None]:
M = env.get_successor_representation(optimal_policy, gamma=.9)
M

In [None]:
plt.figure()
sns.heatmap(M)
plt.title('True successor representation')

# Learning the successor representation using Kalman TD 

In [None]:
agent = KalmanSR(environment=env)

In [None]:
all_results = {}
for trial in range(100):
    results = agent.train_one_episode()
    all_results[trial] = results

In [None]:
results = pd.DataFrame.from_dict({(trial, step): all_results[trial][step] 
                           for trial in all_results.keys() 
                           for step in all_results[trial].keys()},
                       orient='index')

results.index.names = ['trial', 't']

In [None]:
results.head()

In [None]:
np.around(results.iloc[-1]['SR'],decimals=3)

In [None]:
plt.figure()
sns.heatmap(results.iloc[-1]['SR'])
plt.title('Kalman estimate of successor representation')

In [None]:
results.iloc[-1]['cov']