### LO - reinforcing functional thinking

remember the functional idea of trying to find language features that allow a concept to be modeled functionally in a way that, given the problem context, does not compromise complexity or safety 

# Assessment notes

- Draw a diagram of the DMC
- define P and R mathematically
- define S mathematically
- define R mathematically
- should non defined states in S be captured by P and R ? Or should P and R assume S is valid ?
- can you keep rolling after you have passed the winning post, or is victory assumed ?
- do you need a max iteration variable ?
- how do I know why it stopped ?
- how much does your code resemble the pseudo code for the "beautiful in its simplicity" algorithm described in (ref goes here) ?
- what features are there in your implementation that are not in the pseudo code ? Do they obscure the purpose of your code ?
- if you were the an author of (ref goes here) would you modify the pseudo code in anyway ? If so how and why? If not, why not ?
- When you were testing your code to see if it works (assuming you did), how did you "know" if it worked or not ? What benchmark did you use to test it ?
- Did you test your implementation of value iteration on any simple test cases ? Could these be made into unit tests ?

### Implement a mapping $S \rightarrow 2^{S}$ associating a state $s$ with its set of reachable states $S^{'} \subseteq 2^{S}$.

For piglet, given a valid state $(i,j,k)$ then $(i,j,k) \rightarrow \left\{(i,j,k+1),(i+k,j,0),(i,j,0) \right\}$

In [1]:
from pymonad.tools import curry

In [31]:
@curry(1)
def piglet_accessability(N) :
    def codomain() :
        def domain() :
            if p == 0 :
                for s_dash in {(0,i,j,k+1),(1,j,i,0),(1,j,i+k,0)} :
                    yield s_dash
            else :
                for s_dash in {(1,i,j,k+1),(0,j,i,0),(0,j,i+k,0)} :
                    yield s_dash
        for p in {0,1} :
            for i in range(N) :
                for j in range(N) :
                    for k in range(N - i) :
                        yield ((p,i,j,k),domain)
    return codomain

In [3]:
def consume_iterator_object(access) :
    for s,S_dash in access() :
        print(str(s) + " :")
        for s_dash in S_dash() :
            print("    " + str(s_dash))
    pass
consume_iterator_object(piglet_accessability(2))

(0, 0, 0, 0) :
    (0, 0, 0, 1)
    (1, 0, 0, 0)
(0, 0, 0, 1) :
    (0, 0, 0, 2)
    (1, 0, 0, 0)
    (1, 0, 1, 0)
(0, 0, 1, 0) :
    (1, 1, 0, 0)
    (0, 0, 1, 1)
(0, 0, 1, 1) :
    (1, 1, 0, 0)
    (1, 1, 1, 0)
    (0, 0, 1, 2)
(0, 1, 0, 0) :
    (0, 1, 0, 1)
    (1, 0, 1, 0)
(0, 1, 1, 0) :
    (1, 1, 1, 0)
    (0, 1, 1, 1)
(1, 0, 0, 0) :
    (1, 0, 0, 1)
    (0, 0, 0, 0)
(1, 0, 0, 1) :
    (0, 0, 1, 0)
    (1, 0, 0, 2)
    (0, 0, 0, 0)
(1, 0, 1, 0) :
    (0, 1, 0, 0)
    (1, 0, 1, 1)
(1, 0, 1, 1) :
    (1, 0, 1, 2)
    (0, 1, 0, 0)
    (0, 1, 1, 0)
(1, 1, 0, 0) :
    (0, 0, 1, 0)
    (1, 1, 0, 1)
(1, 1, 1, 0) :
    (0, 1, 1, 0)
    (1, 1, 1, 1)


### Model actions

In [4]:
def piglet_actions() :
    for action in {"stick","roll"} :
        yield action
    pass

In [5]:
for action in piglet_actions() :
    print(action)

roll
stick


### Model the probability $P(s^{'}|s,a)$

In [6]:
def piglet_probability(s,s_dash,action) :
    p,i,j,k = s
    q = (p + 1)%2
    if action == "stick" :
        if s_dash == (q,j,i+k,0) : return 1.0
    elif s_dash == (p,i,j,k+1) or s_dash == (q,j,i,0) : return 0.5 
    return 0.0   
        
    
    

### Model Reward 

In [29]:
@curry(4)
def piglet_reward(N,s,s_dash,action) :
    p,i,j,k = s
    q,l,m,n = s_dash
    if action == "roll" and (q,l,m,n) == (p,i,j,k+1) and l + n == N : return 1.0 
    return 0.0  

In [22]:
for s,S_dash in piglet_accessability(2)() :
    for s_dash in S_dash() :
        print(str(s) + " : " + str(s_dash) + " : roll -> " + str(piglet_probability(s,s_dash,"roll"))) 

(0, 0, 0, 0) : (0, 0, 0, 1) : roll -> 0.5
(0, 0, 0, 0) : (1, 0, 0, 0) : roll -> 0.5
(0, 0, 0, 1) : (0, 0, 0, 2) : roll -> 0.5
(0, 0, 0, 1) : (1, 0, 0, 0) : roll -> 0.5
(0, 0, 0, 1) : (1, 0, 1, 0) : roll -> 0.0
(0, 0, 1, 0) : (1, 1, 0, 0) : roll -> 0.5
(0, 0, 1, 0) : (0, 0, 1, 1) : roll -> 0.5
(0, 0, 1, 1) : (1, 1, 0, 0) : roll -> 0.5
(0, 0, 1, 1) : (1, 1, 1, 0) : roll -> 0.0
(0, 0, 1, 1) : (0, 0, 1, 2) : roll -> 0.5
(0, 1, 0, 0) : (0, 1, 0, 1) : roll -> 0.5
(0, 1, 0, 0) : (1, 0, 1, 0) : roll -> 0.5
(0, 1, 1, 0) : (1, 1, 1, 0) : roll -> 0.5
(0, 1, 1, 0) : (0, 1, 1, 1) : roll -> 0.5
(1, 0, 0, 0) : (1, 0, 0, 1) : roll -> 0.5
(1, 0, 0, 0) : (0, 0, 0, 0) : roll -> 0.5
(1, 0, 0, 1) : (0, 0, 1, 0) : roll -> 0.0
(1, 0, 0, 1) : (1, 0, 0, 2) : roll -> 0.5
(1, 0, 0, 1) : (0, 0, 0, 0) : roll -> 0.5
(1, 0, 1, 0) : (0, 1, 0, 0) : roll -> 0.5
(1, 0, 1, 0) : (1, 0, 1, 1) : roll -> 0.5
(1, 0, 1, 1) : (1, 0, 1, 2) : roll -> 0.5
(1, 0, 1, 1) : (0, 1, 0, 0) : roll -> 0.5
(1, 0, 1, 1) : (0, 1, 1, 0) : roll

In [271]:
its

101

In [114]:
[s for s,(_,a) in V.items() if a == "roll"]

[]

### Implement value iteration

In [146]:
from copy import deepcopy

In [147]:
@curry(6)
def value_iteration(S,A,P,R,gamma,V) :
    V_dash = deepcopy(V)
    for s,S_dash in S() :
            V_dash[s] = max([(sum([P(s,s_dash,a)*(R(s,s_dash,a) + gamma*V[s_dash][0]) for s_dash in S_dash()]),a) for a in A()])
    return V_dash
    

#### methods to test for convergence 

In [148]:
@curry(3)
def bounding_box_convergence(epsilon,V,V_dash) :
    delta = 0.0
    for s in V :
        delta = max(delta,abs(V_dash[s][0] - V[s][0]))
    return True if delta < epsilon else False
        

### Check the value iteration implementation

See [this article](https://artint.info/2e/html2e/ArtInt2e.Ch9.S5.SS2.html#Ch9.F16) for details of the test problems

#### actions

In [149]:
def lifestyle_actions() :
    for action in {"party","relax"} :
        yield action
    pass

#### state transistions

In [150]:
def lifestyle_transitions() :
    def codomain() :
        for s_dash in {"healthy","sick"} :
            yield s_dash 
    for s,S_dash in {("healthy",codomain),("sick",codomain)} :
        yield s,S_dash
    return 

#### state transition probabilities

In [151]:
def lifestyle_transition_probabilities(s,s_dash,action) :
    match (s,s_dash,action) :
        case ("healthy","healthy","party") : return 0.7
        case ("healthy","sick","party") : return 0.3
        case ("healthy","healthy","relax") : return 0.95
        case ("healthy","sick","relax") : return 0.05
        case ("sick","healthy","party") : return 0.1
        case ("sick","sick","party") : return 0.9
        case ("sick","healthy","relax") : return 0.5
        case ("sick","sick","relax") : return 0.5

#### state transition rewards

In [152]:
def lifestyle_transition_rewards(s,s_dash,action) :
    match (s,action) :
        case ("healthy","relax") : return 7.0
        case ("healthy","party") : return 10.0
        case ("sick","relax") : return 0.0
        case ("sick","party") : return 2.0
        

#### solution

In [153]:
lifestyle_value_iteration = value_iteration(lifestyle_transitions,
                                            lifestyle_actions,
                                            lifestyle_transition_probabilities,
                                            lifestyle_transition_rewards,
                                            0.8)
lifestyle_converged = bounding_box_convergence(0.001)

V = defaultdict(lambda : (0.0,None))
V_dash = lifestyle_value_iteration(V)
while not lifestyle_converged(V,V_dash) :
    V = V_dash
    V_dash = lifestyle_value_iteration(V)
V_dash

defaultdict(<function __main__.<lambda>()>,
            {'sick': (23.80630043163403, 'relax'),
             'healthy': (35.711062336395926, 'party')})

### model ${\rm piglet}_{1}$

#### actions

In [122]:
piglet_actions = lambda : iter({"roll","stick"}) 

#### transitions

In [166]:
piglet_1_transitions = lambda : iter({((0,0),lambda : iter({(0,1),(1,0),(0,0)}))})

#### state transition probabilities

In [171]:
def piglet_1_probabilities(s,s_dash,action) :
    match (s,s_dash,action) :
        case ((0,0),(0,1),"roll") : return 0.5 
        case ((0,0),(1,0),"roll") : return 0.25
        case ((0,0),(0,0),"roll") : return 0.25
        case ((0,0),(0,1),"stick") : return 0.5
        case ((0,0),(0,0),"stick") : return 0.5
    return 0.0
    

#### state transition rewards

In [173]:
piglet_1_rewards = lambda s,s_dash,action : 1.0 if s == (0,0) and s_dash == (0,1) and action == "roll" else 0.0

In [180]:
piglet_1_value_iteration = value_iteration(piglet_1_transitions,
                                           piglet_actions,
                                           piglet_1_probabilities,
                                           piglet_1_rewards,
                                           1.0)
piglet_1_converged = bounding_box_convergence(0.001)
V = defaultdict(lambda : (0.0,None))


V_dash = piglet_1_value_iteration(V)
while not piglet_1_converged(V_dash,V) :
    V = V_dash
    V_dash = piglet_1_value_iteration(V)
    
V_dash

defaultdict(<function __main__.<lambda>()>, {(0, 0): (0.66650390625, 'roll')})

### state transitions

In [None]:
@curry(1)
def piglet_accessability(N) :
    def codomain() :
        def domain() :
            if p == 0 :
                for s_dash in {(0,i,j,k+1),(1,j,i,0),(1,j,i+k,0)} :
                    yield s_dash
            else :
                for s_dash in {(1,i,j,k+1),(0,j,i,0),(0,j,i+k,0)} :
                    yield s_dash
        for p in {0,1} :
            for i in range(N) :
                for j in range(N) :
                    for k in range(N - i) :
                        yield ((p,i,j,k),domain)
    return codomain

### transition probabilites

### transition rewards