# Mouse Simulator


This agent represents a very simple mouse who maintains a generative bayesian model of its world consisting of 2 normal distributions and a non-linear transformation g(x) = x^2. The mouse's sensations of its world are very simplistic: it only has a single sensor that generates a scalar value representing light intensity.


In [154]:
import numpy as np

# CookieSensorEnv represents a world that has a cookie and a sensor in it.
# In this world, only one cookie exists at a time, and that cookie
# reflects light as a function of its size.
# An agent senses the light bouncing off the cookie by calling step(),
# which returns a scalar representing light intensity.
# Cookie size is random, determined by a normal distribution with
# with parameters specified via __init__() or left to their default values.
# Sensor output is also random, specified by a 2nd normal distribution,
# with mean set to a function g(x) of the output of the cookie_size
# distribution, and variance specified via __init__(). To summarize the flow:
# 0) an agent calls step() on this env, receives a sample of light_intensity
#    generated per the following:
# 1) a cookie is generated with a random size per normal dist. with user
#    provided mean and variance.
# 2) size transformed and used as light_mean via: light_mean = g(size) = size^2
# 3) a sensor reading is generated and returned with random intensity per
#    normal distribution using light_mean as mean and user provided variance.
class CookieSensorEnv:
    def __init__(self,
                 cookie_size_mean=3,
                 cookie_size_var=1,
                 area_to_light_fn=lambda x: x*x,
                 light_intensity_var=1):
        self.cookie_size_mean = cookie_size_mean
        self.cookie_size_var = cookie_size_var
        self.area_to_light_fn = area_to_light_fn
        self.light_intensity_var = light_intensity_var

    # No actions accepted
    # Returns light intensity at this timestep.
    def step(self, action):
        assert not action
        cookie_size_sample = np.random.normal(self.cookie_size_mean, self.cookie_size_var)
        light_intensity_mean = self.area_to_light_fn(cookie_size_sample)
        light_intensity_sample = np.random.normal(light_intensity_mean, self.light_intensity_var)
        return light_intensity_sample, 0, False, {}

    
# The mouse brain has variables to track its beliefs about the world.
# Beliefs consist of estimates of the parameters in the environment,
# which from the mouses point of view are latent variables that must
# be learned through repeated experience of the end result of the physical
# processes, namely the output of the mouses noisy light sensor (its eye).
#
# These variables fall roughly into two categories:
# 1) beliefs that can be updated quickly in response to sensory input
#    such as the belief that a recently viewed cookie was 2cm.
#    These are analogous to the near-realtime changes to the output
#    of a Tensorflow DNN when new inputs are applied to it.
# 2) beliefs that evolve more slowly over time which in the brain
#    are implemented using synaptic plasticity, analogous to the way
#    params are updated via backprop in TensorFlow.

class Mouse:
    def __init__(self, env, steps=1):
        self.light_intensity_error_belief = 0  # epsilon_u
        self.cookie_size_error_belief = 0      # epsilon_p
        self.cookie_size_belief = 0            # phi
        
        self.cookie_size_var_belief = 1                     # sigma_p
        self.light_intensity_var_belief = 1                 # sigma_u
        self.cookie_size_mean_belief = 3                    # v_p
        self.area_to_light_belief_fn = lambda x: x**2       # g()
        self.area_to_light_deriv_belief_fn = lambda x: 2*x  # g'()
      
        self.step_size = 0.05
        self.env = env
        self.num_steps = 0
        while self.num_steps < steps:
            obs, reward, done, _ = self.env.step(None)
            self.update_world_model(obs)
            self.num_steps += 1
        
    def update_world_model(self, obs):
        obs = 2
        # update neural network node vals (belief type 1)
        epsilon_p = (self.cookie_size_belief - self.cookie_size_mean_belief) / self.cookie_size_var_belief 
        epsilon_u = (obs - self.area_to_light_belief_fn(self.cookie_size_belief)) / self.light_intensity_var_belief
        
        dF_dPhi = epsilon_u * self.area_to_light_deriv_belief_fn(self.cookie_size_belief) - epsilon_p     
        self.cookie_size_belief += self.step_size * dF_dPhi
        
        # update neural network synaptic weights (belief type 2)
        dF_dSigma_p = 0.5 * (epsilon_p**2 - 1 / self.cookie_size_var_belief)      # dF/dSigma_p
        dF_dSigma_u = 0.5 * (epsilon_u**2 - 1 / self.light_intensity_var_belief)  # dF/dSigma_u
        dF_dvp = epsilon_p                                                       # dF/dv_p

        self.cookie_size_var_belief += self.step_size * dF_dSigma_p
        self.light_intensity_var_belief += self.step_size * dF_dSigma_u
        self.cookie_size_mean_belief += self.step_size * dF_dvp

        
        # print updated neural network values
        if self.num_steps % 1000 == 0:
            print(f"{self.num_steps:7}: e_p={epsilon_p:4.3}, e_u={epsilon_u:4.3}, " +
                  f"size={self.cookie_size_belief:4.3}, " + 
                  f"size_var={self.cookie_size_var_belief:4.3}, " +
                  f"light_var={self.light_intensity_var_belief:4.3}, " + 
                  f"size_mean={self.cookie_size_mean_belief:4.3}")




# Create a mouse agent and see what it learns as its best guess of the
# size of cookies it is seeing.
num_steps = 100000
print(f"Running mouse simulator for {num_steps} steps...")
print("------------------------------------------------")
Mouse(CookieSensorEnv(), steps=num_steps)
        

Running mouse simulator for 100000 steps...
------------------------------------------------
      0: e_p=-3.0, e_u= 2.0, size=0.15, size_var= 1.2, light_var=1.07, size_mean=2.85
   1000: e_p=-0.0322, e_u=-0.0103, size=1.66, size_var=93.7, light_var=71.7, size_mean=4.68
   2000: e_p=-0.0196, e_u=-0.00667, size=1.57, size_var=93.5, light_var=71.3, size_mean=3.41
   3000: e_p=-0.012, e_u=-0.00425, size=1.52, size_var=93.2, light_var=71.0, size_mean=2.63
   4000: e_p=-0.00732, e_u=-0.00268, size=1.48, size_var=92.9, light_var=70.6, size_mean=2.16
   5000: e_p=-0.00448, e_u=-0.00167, size=1.46, size_var=92.7, light_var=70.2, size_mean=1.87
   6000: e_p=-0.00275, e_u=-0.00104, size=1.44, size_var=92.4, light_var=69.9, size_mean=1.69
   7000: e_p=-0.00168, e_u=-0.000641, size=1.43, size_var=92.1, light_var=69.5, size_mean=1.58
   8000: e_p=-0.00103, e_u=-0.000395, size=1.42, size_var=91.9, light_var=69.2, size_mean=1.52
   9000: e_p=-0.000629, e_u=-0.000242, size=1.42, size_var=91.6, light_v

<__main__.Mouse at 0x1157281c0>