In [237]:
import numpy as np
from collections import defaultdict, Counter
from tabulate import tabulate
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [238]:
# A world of infinite cheese
class World(object):
    
    def __init__(self):
        self.cheese = False
    
    def step(self, eat):
        if eat:
            self.cheese = True
        else:
            self.cheese = False
        
        return self.cheese

In [239]:
class Mouse(object):
    def __init__(self, action_learning_rate=0.01):
        self._action_learning_rate = action_learning_rate
        
        self.reset()
    
    def reset(self):
        self.energy = self.initial_energy = 5
        self.cheese = self.initial_cheese = None
        self.action = self.initial_action = None
        self.state = None
        self.previous_state = None
        
        self._action_table = {}
        for i in range(11):
            for v in [True, False]:
                k = (i, v)
                self._action_table[k] = 0.5
    
    @staticmethod
    def _update_energy(energy, cheese):
        if cheese:
            energy += 1
        else:
            energy -= 1
        
        return np.clip(energy, 0, 10)
    
    @staticmethod
    def _innate_evaluation(current_state, full_point=7):
        value = 0
        energy, cheese = current_state
        if cheese:
            value = -energy + full_point
        elif energy < 4:
            value = energy - 4
        return value

    def _update_action_table(self, previous_state, value, learning_rate, debug=True):
        if self.action:
            previous_strength = self._action_table[previous_state]
            new_strength = previous_strength + learning_rate * value
            new_strength = np.clip(new_strength, 0.01, 0.99)
            self._action_table[previous_state] = new_strength
            if debug:
                print("---- Update Action Table")
                print("Previous State:", previous_state)
                print("Previous Strength:", previous_strength)
                print("Value:", value)
                print("New Strength:", new_strength)
        else:
            if debug:
                print("Did not act last step. Nothing to change.")
        
    def _learn(self, previous_state, value, debug=False):
        self._update_action_table(
            previous_state, 
            value, 
            self._action_learning_rate,
            debug=debug
        )
    
    def _get_action(self, state):
        eat = False
        act_chance = self._action_table[state]
        if np.random.random() < act_chance:
            eat = True
        return eat
    
    def _dict_to_table(self, d):
        int_d = defaultdict(list)
        for k, v in d.items():
            p1, p2 = k
            int_d[p1].append(v)
        
        return [[k] + v for k, v in int_d.items()]
    
    def _display_table(self, d_table, headers):
        t = self._dict_to_table(d_table)
        print()
        print(tabulate(t, headers=headers))
        print()
    
    def display_knowledge(self):
        print("---- Action Table")
        self._display_table(
            self._action_table, 
            headers=["Energy", "Cheese", "No Cheese"]
        )
    
    def step(self, obseravation, debug=False):
        cheese = observation # Boolean if the mouse tastes cheese or not
        if debug: print("Initial energy value", self.energy)
        next_energy = self._update_energy(self.energy, cheese)
        if debug: print("Updated energy value", next_energy)
        self.previous_state = self.state
        self.state = (next_energy, cheese)
        
        if debug: 
            print("Previous State: (Energy, Cheese)", self.previous_state)
            print("Previous Action:", self.action)
            print("Current State: (Energy, Cheese)", self.state)
        if self.previous_state is not None:
            if debug: print("Evaluating and learning ...")
            value = self._innate_evaluation(self.state)
            if debug: print("Value:", value)
            self._learn(self.previous_state, value, debug)
            if debug: self.display_knowledge()
        
        next_action = self._get_action(self.state)
        if debug: 
            print("Action Chosen:", next_action)
            print("=" * 50)
        self.action = next_action
        self.cheese = cheese
        self.energy = next_energy
        return self.action
        
        

In [240]:
world = World()
mouse = Mouse(action_learning_rate=0.1)

In [242]:
mouse.reset()
observation = False # No cheese to begin with
chances_over_time = []
capture_frame_every = 100
actions = Counter()
for i in range(10000):
    if i % capture_frame_every == 0:
        chances = np.array(mouse._dict_to_table(mouse._action_table)).T
        chances_over_time.append(chances)
    action = mouse.step(observation, debug=False)
    if action:
        actions[mouse.energy] += 1
    observation = world.step(action)

In [251]:
action_frequency = []
for i in range(11):
    v = 0
    if actions.get(i):
        v = actions.get(i) / sum(actions.values())
    
    action_frequency.append(v)

action_frequency

[0,
 0,
 0.0003999200159968006,
 0.0005998800239952009,
 0.005398920215956809,
 0.5108978204359128,
 0.47290541891621674,
 0.008598280343931213,
 0.0009998000399920016,
 0.0001999600079984003,
 0]

In [214]:
mouse.display_knowledge()

---- Action Table

  Energy    Cheese    No Cheese
--------  --------  -----------
       0      0.5          0.5
       1      0.5          0.5
       2      0.5          0.5
       3      0.5          0.99
       4      0.99         0.99
       5      0.99         0.99
       6      0.5          0.5
       7      0.01         0.01
       8      0.01         0.1
       9      0.2          0.2
      10      0.5          0.5



In [215]:
chances = np.array(mouse._dict_to_table(mouse._action_table)).T
_, ysa_cheese, ysa_no_cheese = chances

In [216]:
iplot([go.Scatter(y=ysa_cheese), go.Scatter(y=ysa_no_cheese)])

In [217]:
frames = []
print(len(chances_over_time))
for i, chances in enumerate(chances_over_time):
    _, cheese, no_cheese = chances
    frame = {
        'data': [
            {'y': cheese}, 
            {'y': no_cheese}
        ],
        'layout': {
            'title': 'Chance to Act by Energy Level - {}'.format(i * capture_frame_every)
        }
    }
    frames.append(frame)
    

figure = {'data': [
            {'y': chances_over_time[0][1],
             'name': 'Cheese'
            }, 
            {'y': chances_over_time[0][2],
             'name': 'No Cheese'
            }],
          'layout': {'yaxis': {'range': [0, 1], 'autorange': False},
                     'title': 'Chance to Act by Energy Level - 0',
                     'updatemenus':[{
                        'type': 'buttons',
                        'buttons': [{
                            'args': [
                                None,
                                {'frame': {'duration': 0, 'redraw': True},
                                 'mode': 'next',
                                 'transition': {'duration': 0, 'easing': 'linear'}
                                 }
                            ],
                            'label': 'Play',
                            'method': 'animate'
                        }]
                      }]
                    },
          'frames': frames
         }

iplot(figure)

100
