# EnKF Experiments - GCS

## Imports

In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from scipy.stats import shapiro, kruskal, mode
import seaborn as sns
import sys

In [2]:
sys.path.append('../../../../stationsim/')
from ensemble_kalman_filter import (
    EnsembleKalmanFilter,
    AgentIncluder,
    Inflation,
    ExitRandomisation,
    GateEstimator
)
from stationsim_gcs_model import Model

In [3]:
sys.path.append('../')
from experiment_utils import Modeller, Processor, Visualiser

In [4]:
%matplotlib inline
np.random.seed(42)

## Data paths

In [5]:
data_dir = '../results/data/exp1/'
model_dir = '../results/models/exp1/'
fig_dir = '../results/figures/exp1/'

## Functions

In [6]:
def tidy_dataframe(df, independent_col: str, dependent_cols: list):
    output = list()
    
    for i, row in df.iterrows():
        for col in dependent_cols:
            d = {independent_col: row[independent_col],
                 'variable':  col,
                 'value': row[col]}
            output.append(d)
    
    output = pd.DataFrame(output)
    return output

In [7]:
def plot_path(output, aid, show_obs=True, show_od=True, show_fig=True,
              save_fig=False, ge=None, er=None):
    if save_fig and (ge is None or er is None):
        raise ValueError('Provide values for ge and er when saving figures')
    print(f'Agent {aid}')
    plt.figure(figsize=(10, 10))
    plt.plot(output[f'truth_x_{aid}'], output[f'truth_y_{aid}'], label=f'truth_{aid}', color='green')
    plt.plot(output[f'prior_x_{aid}'], output[f'prior_y_{aid}'], label=f'prior_{aid}', color='blue')
    plt.plot(output[f'posterior_x_{aid}'], output[f'posterior_y_{aid}'], label=f'posterior_{aid}', color='orange')

    if show_obs:
        plt.scatter(output[f'obs_x_{aid}'], output[f'obs_y_{aid}'], label=f'obs_{aid}', s=1, color='green')
        
    if show_od:
        plt.scatter(output[f'origin_x_{aid}'], output[f'origin_y_{aid}'], label=f'origin_{aid}', s=25, color='blue')
        plt.scatter(output[f'dest_x_{aid}'], output[f'dest_y_{aid}'], label=f'destination_{aid}', s=25, color='orange')

    plt.xlabel('$x$-location')
    plt.ylabel('$y$-location')
    plt.xlim((0, 740))
    plt.ylim((0, 700))
    plt.legend()
    
    if save_fig:
        plt.savefig(f'{fig_dir}path_agent_{aid}_{ge}_{er}.pdf')
    
    if show_fig:
        plt.show()
    else:
        plt.close()

In [8]:
def present_gate_estimates(enkf, agent_number, show_initial=True, show_true=True,
                           show_fig=True, save_fig=False, ge=None, er=None):
    if save_fig and (ge is None or er is None):
        raise ValueError('Provide values for ge and er when saving figures')
    
    idx = (2 * enkf.population_size) + agent_number
    true_gate = enkf.base_model.agents[agent_number].gate_out
    print(f'Base model gate value: {true_gate}')

    gate_vals = enkf.state_ensemble[idx]

    if hasattr(enkf, 'gate_angles'):
        gate_edges = enkf.gate_angles[true_gate]
        plt.xlabel('Target angle')
        print(f'Gate edge angles: {gate_edges}')
        if show_true:
            # Only need one label for legend
            plt.axvline(gate_edges[0], linestyle='dashed', c='black')
            plt.axvline(gate_edges[1], label='truth', linestyle='dashed', c='black')
    else:
        plt.xlabel('Target gate number')
        print(f'Ensemble modal gate value: {mode(gate_vals)}')
        if show_true:
            plt.axvline(true_gate, label='truth', linestyle='dashed', c='black')

    print(f'Gate value across ensemble: {gate_vals}')
    
    plt.hist(gate_vals, label='final', alpha=0.5, color='orange')
    if show_initial:
        plt.hist(enkf.initial_gates[agent_number], label='initial', alpha=0.5, color='blue')
    
    
    plt.ylabel('Frequency')
    plt.legend()

    if save_fig:
        plt.savefig(f'{fig_dir}gates_agent_{agent_number}_{ge}_{er}.pdf')

    if show_fig:
        plt.show()
    else:
        plt.close()

In [9]:
def master_func(p, e, a, s, ge, er, run_models=False, show_figs=False):
    if run_models:
        # Run modeller
        Modeller.run_experiment_1(pop_size=p, ensemble_size=e, assimilation_period=a, obs_noise_std=s,
                                  standardise_state=True, inclusion=AgentIncluder.MODE_EN,
                                  gate_estimator=ge, exit_randomisation=er)
    
    # Process results
    Processor.process_experiment_1(pop_size=p, gate_estimator=ge, exit_randomisation=er)
    
    # Read results
    results = pd.read_csv(data_dir + f'p{p}/{er.name}_{ge.name}/metrics.csv')
    
    # Plot exit gate accuracy
    plt.figure()
    plt.plot(results['time'], results['exit_accuracy'])
    plt.xlabel('Time')
    plt.ylabel('Proportion of agents with correct gates')
    plt.savefig(f'{fig_dir}accuracy_{ge.name}_{er.name}.pdf')
    if show_figs:
        plt.show()
    else:
        plt.close()
    
    # Plot error
#     print(f'forecast: {results["forecast"].max()}, analysis: {results["analysis"].max()}')
    plt.figure()
    plt.plot(results['time'], results['analysis'], label='posterior')
    plt.plot(results['time'], results['forecast'], label='prior')
    plt.legend()
    plt.xlabel('Time')
    plt.ylabel('Error in ensemble mean position')
    plt.ylim((0, 60))
    plt.savefig(f'{fig_dir}error_{ge.name}_{er.name}.pdf')
    if show_figs:
        plt.show()
    else:
        plt.close()
    
    # Read in enkf
    with open(f'../results/models/exp1/p{p}/model.pkl', 'rb') as f:
        enkf = pickle.load(f)
    
    # Plot gain matrix
    plt.figure()
    sns.heatmap(enkf.gain_matrix, vmin=0, vmax=1)
    plt.savefig(f'{fig_dir}gain_matrix_{ge.name}_{er.name}.pdf')
    if show_figs:
        plt.show()
    else:
        plt.close()
    
    # Process results
    output = list()
    for r in enkf.results:
        d = dict()
        # get time
        d['time'] = r['time']

        for i in range(enkf.population_size):        
            # get truth
            truth = r['ground_truth']
            d[f'truth_x_{i}'] = truth[2*i]
            d[f'truth_y_{i}'] = truth[(2*i)+1]

            # get prior
            prior = r['prior']
            d[f'prior_x_{i}'] = prior[i]
            d[f'prior_y_{i}'] = prior[i + enkf.population_size]

            # get posterior
            posterior = r['posterior']
            d[f'posterior_x_{i}'] = posterior[i]
            d[f'posterior_y_{i}'] = posterior[i + enkf.population_size]

            # get obs
            obs = r['observation']
            d[f'obs_x_{i}'] = obs[2*i]
            d[f'obs_y_{i}'] = obs[(2*i)+1]

            # get origin and destination
            origin = r['origin']
            d[f'origin_x_{i}'] = origin[2*i]
            d[f'origin_y_{i}'] = origin[(2*i)+1]
            destination = r['destination']
            d[f'dest_x_{i}'] = destination[2*i]
            d[f'dest_y_{i}'] = destination[(2*i)+1]

        output.append(d)

    output = pd.DataFrame(output)

    for i in range(enkf.population_size):
        plot_path(output, i, save_fig=True, ge=ge.name, er=er.name, show_fig=show_figs)
        present_gate_estimates(enkf, i, save_fig=True, ge=ge.name, er=er.name, show_fig=show_figs)
        print('\n')

## Experiment 1 - Setting Up EnKF

### Base model inclusion in error calculations

In [10]:
# # Set filter params
# p = 5
# e = 100
# a = 100
# s = 5
# ge = GateEstimator.ROUNDING
# er = ExitRandomisation.ALL_RANDOM

In [11]:
# param_list = [{'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ROUNDING, 'exit_randomisation': ExitRandomisation.BY_AGENT},
#               {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ROUNDING, 'exit_randomisation': ExitRandomisation.ALL_RANDOM},
#               {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ROUNDING, 'exit_randomisation': ExitRandomisation.ADJACENT},
#               {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.BY_AGENT},
#               {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.ALL_RANDOM},
#               {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.ADJACENT}]              

In [12]:
param_list = [{'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
               'gate_estimator': GateEstimator.ROUNDING, 'exit_randomisation': ExitRandomisation.ALL_RANDOM},
              {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
               'gate_estimator': GateEstimator.ROUNDING, 'exit_randomisation': ExitRandomisation.ADJACENT},
              {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
               'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.ALL_RANDOM},
              {'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
               'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.ADJACENT}]              

In [13]:
for pl in param_list:
    master_func(p=pl['pop_size'], e=pl['ens_size'], a=pl['assimilation_period'],
                s=pl['noise_std'], ge=pl['gate_estimator'], er=pl['exit_randomisation'],
                run_models=False, show_figs=False)

Agent 0
Base model gate value: 9
Gate edge angles: (-1.5707963267948966, -2.0570389598824015)
Gate value across ensemble: [-1.98453665 -1.11430868  0.61253756 -2.33082361 -2.00524525 -0.02702045
 -2.64604098  2.01926819 -1.28562642  1.99046114 -1.35208279  0.34702034
 -1.99719931 -2.00170783  1.90109368 -2.82772322 -2.01875491 -3.09611786
 -2.27643209  0.13432144 -1.18084337 -2.27432548 -2.7753623  -0.56343155
  0.41556591  1.26691648  1.29766726  0.61253756 -2.11231544 -1.99358228
 -0.76606237  0.46097539 -2.24187207 -2.81663072  0.32033796 -0.36236763
  1.90109368 -1.24351175 -0.95208079 -0.24218302 -0.60528739 -0.02702045
 -0.02702045  0.64917513  1.12757324 -2.23875779 -2.38402843 -1.66001322
  0.83012553  0.16711394  1.1179153   2.38396513 -2.38396513  0.76009684
  3.05734272 -2.0557317  -1.81231055 -1.07935079  0.83451178 -1.34984875
 -2.38396513 -2.28219951 -2.64604098  0.95214719 -0.06873903 -3.01025953
 -2.82650838  1.33255157  0.81480145 -1.11308811  0.2669745  -0.27167757
 -

In [14]:
# param_list = [{'pop_size': 5, 'ens_size': 100, 'assimilation_period': 100, 'noise_std': 5,
#                'gate_estimator': GateEstimator.ANGLE, 'exit_randomisation': ExitRandomisation.ADJACENT}]              

In [15]:
# for pl in param_list:
#     master_func(p=pl['pop_size'], e=pl['ens_size'], a=pl['assimilation_period'],
#                 s=pl['noise_std'], ge=pl['gate_estimator'], er=pl['exit_randomisation'],
#                 run_models=True, show_figs=False)

In [16]:
# Modeller.run_experiment_1(pop_size=p, ensemble_size=e, assimilation_period=a, obs_noise_std=s,
#                           standardise_state=True, inclusion=AgentIncluder.MODE_EN,
#                           gate_estimator=ge, exit_randomisation=er)

In [17]:
# Processor.process_experiment_1(pop_size=p)

#### Read data

In [18]:
# results = pd.read_csv(data_dir + f'p{p}/metrics.csv')
# results.head()

In [19]:
# plt.figure()
# plt.plot(results['time'], results['exit_accuracy'])
# plt.xlabel('Time')
# plt.ylabel('Proportion of agents with correct gates')
# plt.savefig(f'{fig_dir}accuracy_{ge.name}_{er.name}.pdf')
# plt.show()

In [20]:
# plt.figure()
# plt.plot(results['time'], results['analysis'], label='posterior')
# plt.plot(results['time'], results['forecast'], label='prior')
# plt.legend()
# plt.xlabel('Time')
# plt.ylabel('Error in ensemble mean position')
# plt.savefig(f'{fig_dir}error_{ge.name}_{er.name}.pdf')
# plt.show()

In [21]:
# with open(f'../results/models/exp1/p{p}/model.pkl', 'rb') as f:
#     enkf = pickle.load(f)

In [22]:
# plt.figure()
# sns.heatmap(enkf.gain_matrix)
# plt.savefig(f'{fig_dir}gain_matrix_{ge.name}_{er.name}.pdf')
# plt.show()

In [23]:
# print(enkf.gain_matrix)

In [24]:
# output = list()
# for r in enkf.results:
#     d = dict()
#     # get time
#     d['time'] = r['time']
    
#     for i in range(enkf.population_size):        
#         # get truth
#         truth = r['ground_truth']
#         d[f'truth_x_{i}'] = truth[2*i]
#         d[f'truth_y_{i}'] = truth[(2*i)+1]

#         # get prior
#         prior = r['prior']
#         d[f'prior_x_{i}'] = prior[i]
#         d[f'prior_y_{i}'] = prior[i + enkf.population_size]

#         # get posterior
#         posterior = r['posterior']
#         d[f'posterior_x_{i}'] = posterior[i]
#         d[f'posterior_y_{i}'] = posterior[i + enkf.population_size]

#         # get obs
#         obs = r['observation']
#         d[f'obs_x_{i}'] = obs[2*i]
#         d[f'obs_y_{i}'] = obs[(2*i)+1]

#         # get origin and destination
#         origin = r['origin']
#         d[f'origin_x_{i}'] = origin[2*i]
#         d[f'origin_y_{i}'] = origin[(2*i)+1]
#         destination = r['destination']
#         d[f'dest_x_{i}'] = destination[2*i]
#         d[f'dest_y_{i}'] = destination[(2*i)+1]
    
#     output.append(d)
    
# output = pd.DataFrame(output)


In [25]:
# output.head()

In [26]:
# for i in range(enkf.population_size):
#     plot_path(output, i, save_fig=True, ge=ge.name, er=er.name)
#     present_gate_estimates(i, save_fig=True, ge=ge.name, er=er.name)
#     print('\n')

* Look at standardisation
* scaling variables

In [27]:
# t = -1

# plt.figure(figsize=(10, 10))

# truth = enkf.results[t]['ground_truth']
# truth_x = truth[::2]
# truth_y = truth[1::2]
# plt.scatter(truth_x, truth_y, color='green', s=25)



# for i in range(e):
#     prior_label = f'prior_{i}'
#     posterior_label = f'posterior_{i}'
    
#     prior = enkf.results[t][prior_label]
#     posterior = enkf.results[t][posterior_label]
    
    
#     prior_x = prior[:2]
#     prior_y = prior[2:4]
#     posterior_x = posterior[:2]
#     posterior_y = posterior[2:4]
    
#     plt.scatter(prior_x, prior_y, color='blue', s=1)
#     plt.scatter(posterior_x, posterior_y, color='red', s=1)

# plt.xlim((0, 740))
# plt.ylim((0, 700))
# plt.show()