# Mouse Simulator


This agent represents a very simple mouse who maintains a generative bayesian model of its world consisting of 2 normal distributions and a non-linear transformation g(x) = x^2. The mouse's sensations of its world are very simplistic: it only has a single sensor that generates a scalar value representing light intensity.


In [69]:
import numpy as np

# CookieSensorEnv represents a world that has a cookie and a sensor in it.
# In this world, only one cookie exists at a time, and that cookie
# reflects light as a function of its size.
# An agent senses the light bouncing off the cookie by calling step(),
# which returns a scalar representing light intensity.
# Cookie size is random, determined by a normal distribution with
# with parameters specified via __init__() or left to their default values.
# Sensor output is also random, specified by a 2nd normal distribution,
# with mean set to a function g(x) of the output of the cookie_size
# distribution, and std specified via __init__(). To summarize the flow:
# 0) an agent calls step() on this env, receives a sample of light_intensity
#    generated per the following:
# 1) a cookie is generated with a random size per normal dist. with user
#    provided mean and std.
# 2) size transformed and used as light_mean via: light_mean = g(size) = size^2
# 3) a sensor reading is generated and returned with random intensity per
#    normal distribution using light_mean as mean and user provided std.
class CookieSensorEnv:
    def __init__(self,
                 cookie_size_mean=2,
                 cookie_size_std=1,
                 area_to_light_fn=lambda x: x*x,
                 light_intensity_std=1):
        self.cookie_size_mean = cookie_size_mean
        self.cookie_size_std = cookie_size_std
        self.area_to_light_fn = area_to_light_fn
        self.light_intensity_std = light_intensity_std

    # No actions accepted
    # Returns light intensity at this timestep.
    def step(self, action):
        assert not action
        cookie_size_sample = np.random.normal(self.cookie_size_mean, self.cookie_size_std)
        light_intensity_mean = self.area_to_light_fn(cookie_size_sample)
        light_intensity_sample = np.random.normal(light_intensity_mean, self.light_intensity_std)
        return light_intensity_sample, 0, False, {}

    
# The mouse brain has variables to track its beliefs about the world.
# Beliefs consist of estimates of the parameters in the environment,
# which from the mouses point of view are latent variables that must
# be learned through repeated experience of the end result of the physical
# processes, namely the output of the mouses noisy light sensor (its eye).
#
# These variables fall roughly into two categories:
# 1) beliefs that can be updated quickly in response to sensory input
#    such as the belief that a recently viewed cookie was 2cm.
#    These are analogous to the near-realtime changes to the output
#    of a Tensorflow DNN when new inputs are applied to it.
# 2) beliefs that evolve more slowly over time which in the brain
#    are implemented using synaptic plasticity, analogous to the way
#    params are updated via backprop in TensorFlow.

class Mouse:
    def __init__(self, env, steps=1):
        self.light_intensity_error_belief = 0  # epsilon_u
        self.cookie_size_error_belief = 0      # epsilon_p
        self.cookie_size_belief = 0            # phi
        
        self.cookie_size_std_belief = 1                     # sigma_p
        self.light_intensity_std_belief = 1                 # sigma_u
        self.cookie_size_mean_belief = 3                    # v_p
        self.area_to_light_belief_fn = lambda x: x**2       # g()
        self.area_to_light_deriv_belief_fn = lambda x: 2*x  # g'()
      
        self.step_size = 0.01
        self.env = env
        self.num_steps = 0
        while self.num_steps < steps:
            obs, reward, done, _ = self.env.step(None)
            self.update_world_model(obs)
            self.num_steps += 1
        
    def update_world_model(self, obs):
        # The following line makes the mouse always get the same sensor reading, i.e. 2.
        # This emulates Exercises 2 of the Free Energy Tutorial by Bogacz.
        # Delete this line to instead have the mouse get stochastic sensor readings
        # from the CookieSensorEnv.
        obs = 2
        def df_dphi(phi):
            # TODO: Replace appropriate terms below with epsilon_u and epsilon_p
            return (self.cookie_size_mean_belief - phi) / self.cookie_size_std_belief + \
                   (obs - phi * phi) / self.light_intensity_std_belief * self.area_to_light_deriv_belief_fn(phi)

        deriv_of_f = df_dphi(self.cookie_size_belief)
        self.cookie_size_belief = self.cookie_size_belief + self.step_size * deriv_of_f
        if self.num_steps % 5 == 0:
            print(f"{self.num_steps}: self.cookie_size_belief is {self.cookie_size_belief}")


# First, just test getting a noisy sensor reading from our stochastic env.
print(f"Random CookieSensorEnv reading: {CookieSensorEnv().step(None)}\n")       

# Next, create a mouse agent and see what it learns as its best guess of the
# size of cookies it is seeing.
num_steps = 100
print(f"Running mouse simulator for {num_steps} steps...")
print("------------------------------------------------")
Mouse(CookieSensorEnv(), steps=num_steps)
        

Random CookieSensorEnv reading: (8.997893869087033, 0, False, {})

Running mouse simulator for 100 steps...
------------------------------------------------
0: self.cookie_size_belief is 0.03
5: self.cookie_size_belief is 0.1939084146487336
10: self.cookie_size_belief is 0.381857572077597
15: self.cookie_size_belief is 0.5909967376995323
20: self.cookie_size_belief is 0.8107515778311672
25: self.cookie_size_belief is 1.0221566106732736
30: self.cookie_size_belief is 1.2036012686708553
35: self.cookie_size_belief is 1.3411792263052433
40: self.cookie_size_belief is 1.434333423087635
45: self.cookie_size_belief is 1.4920879420631021
50: self.cookie_size_belief is 1.5258025907963861
55: self.cookie_size_belief is 1.544760968035532
60: self.cookie_size_belief is 1.555191255154537
65: self.cookie_size_belief is 1.5608596238069126
70: self.cookie_size_belief is 1.5639193782464216
75: self.cookie_size_belief is 1.565564966733342
80: self.cookie_size_belief is 1.5664482411908764
85: self.cooki

<__main__.Mouse at 0x114253850>