# Notebook
In this notebook, we present the major experimental findings.

## PE-UCRL in a polarising recommender engine

### Initialisations

In [1]:
# import packages
from agents import PeUcrlAgent
from json import load
import gymnasium as gym
!cd ..; pip3 install -e gym-cellular -q
import gym_cellular
import numpy as np
import pandas as pd

# import configurations
config_file = open("config_files/peucrl_polarisation_0.json", 'r')
config = load(config_file)

# instantiate environment
env = gym.make(
    config["environment_version"],
    n_users=config["n_users"],
    n_user_states=config["n_user_states"],
    n_recommendations=config["n_recommendations"],
    n_moderators=config["n_moderators"],
    seed=config["environment_seed"],
)

def reward_function(x,y):
    return 0

# instantiate agent
agt = PeUcrlAgent(
    confidence_level=config["confidence_level"],
    accuracy=config["accuracy"],
    n_cells=config["n_users"],
    n_intracellular_states=config["n_user_states"] * 2,
    cellular_encoding=env.cellular_encoding,
    n_intracellular_actions=config["n_recommendations"],
    cellular_decoding=env.cellular_decoding,
    reward_function=reward_function,
    cell_classes=config["cell_classes"],
    cell_labelling_function=config["cell_labelling_function"],
    regulatory_constraints=config["regulatory_constraints"],
    initial_policy=env.get_initial_policy(),
)


### Explore

In [2]:
previous_state, info = env.reset(seed=0)

#side_effects_incidence = np.zeros()
ns_between_time_steps = np.zeros(config["max_time_steps"])
ns_between_episodes = np.zeros(config["max_time_steps"])

for time_step in range(config["max_time_steps"]):

    action = agt.sample_action(previous_state)
    current_state, reward, terminated, truncated, info = env.step(action)
    #side_effects_incidence = env.get_side_effects_incidence()
    agt.update(current_state, reward, info["side_effects"])
    ns_between_time_steps[time_step] = agt.get_ns_between_time_steps()
    ns_between_episodes[time_step] = agt.get_ns_between_episodes()

    if terminated or truncated:
        break

### Results

In [3]:
table = [
    [round(np.mean(ns_between_time_steps)), round(np.std(ns_between_time_steps)), config["max_time_steps"]],
    [round(np.nanmean(ns_between_episodes)), round(np.nanstd(ns_between_episodes)), np.count_nonzero(~np.isnan(ns_between_episodes))],
]
df = pd.DataFrame(table, columns=["Mean (ns)", "Standard devation (ns)", "Number of samples"], index=["Between time steps", "Between episodes"])
print(df)

                    Mean (ns)  Standard devation (ns)  Number of samples
Between time steps   18503276                 1393269                  3
Between episodes     10473702                  969496                  2
