In [1]:
# https://github.com/TikhonJelvis/RL-book


from random import randint

In [2]:
#this is not good as it doesn't:
#- generates a random number, but doesn't reference any type of distribution.


def six_sided_die():
    return randint(1, 6)

def roll_dice():
    return six_sided_die() + six_sided_die()



In [3]:
# Need to create an abstraction for the distribution.
# We use an interface to define the contract for the distribution
# interface: a class that is a definition of what we require for something to qualify as a distribution.
# Any distribution we create in the future will have to implement the sample method.

from abc import ABC, abstractmethod

class Distribution(ABC):
    @abstractmethod
    def sample(self):
        pass


In [4]:
# Now we can create a class that implements the Distribution interface called a concrete class.
# This class will be a concrete implementation of the Distribution interface.
# Added __repr__ method to make it easier to print the object.
# Added __eq__ method to make it easier to compare dice objects correctly using number sides.
class Die(Distribution):
    def __init__(self, sides):
        self.sides = sides

    def __repr__(self):
        return f'Die(sides={self.sides})'
    
    def __eq__(self, other):
        return isinstance(other, Die) and self.sides == other.sides
    
    def sample(self):
        return randint(1, self.sides)

six_sided_die = Die(6)

def roll_dice():
    return six_sided_die.sample() + six_sided_die.sample()

print(six_sided_die)

play_monopoly = roll_dice()
print(f'You rolled: {play_monopoly}')

#test the __eq__ method
assert Die(6) == Die(6) # True

print(Die(6) == Die(6)) # True
print(Die(6) == Die(10)) # False

Die(6) == None


Die(sides=6)
You rolled: 5
True
False


False

In [5]:
# Having to implement, __repr__, __eq__ method is a bit of a pain. That is why python created the dataclass decorator.
# The dataclass decorator will automatically implement __repr__, __eq__ and __init__ methods for us.
# The frozen=True argument to the dataclass decorator will make the class immutable. THis means that we can't change the value 
# of the object after it has been created. This is a good thing because it makes the object easier to reason about, and helps us avoid bugs.
### This will fail if we try to change the value of the object after it has been created.
# '''
# d = Die(6)
# d.sides = 10
# '''

from dataclasses import dataclass
from random import randint

@dataclass(frozen=True)
class Die():
    sides: int # this is a type hint, it tells us that sides is an integer.

    def sample(self):
        return randint(1, self.sides)
    
six_sided_die = Die(6)

def roll_dice():
    return six_sided_die.sample() + six_sided_die.sample()

print(six_sided_die)

play_monopoly = roll_dice()
print(f'You rolled: {play_monopoly}')

#test the __eq__ method
assert Die(6) == Die(6) # True

print(Die(6) == Die(6)) # True
print(Die(6) == Die(10)) # False

Die(6) == None


Die(sides=6)
You rolled: 3
True
False


False

In [6]:
## We can use dataclasses.replace to create a new object with the same value as the old object, but with some changes.
## This creates a copy of the object by creating a new object and allows us to change attributes of the object such as the number of sides.
import dataclasses

d6 = Die(6)
d20 = dataclasses.replace(d6, sides=20)
d20

Die(sides=20)

In [7]:
# Because we made out dataclass immutable, it means it is also hashable. This means we can use it as a key in a dictionary.

d = Die(6)
{d: 'hello die'}


{Die(sides=6): 'hello die'}

In [44]:
# Let's add type hints to our objects.

from abc import ABC, abstractmethod
from typing import Generic, TypeVar
from dataclasses import dataclass
from random import randint

# We use TypeVar to create a generic type. This is a type that can be any type. 
# We use it to define the type of the value that the distribution will return.
# This way we force the distribution to create a return type on sample.


A =TypeVar('A')

class Distribution(ABC, Generic[A]):
    @abstractmethod
    def sample(self) -> A:
        pass

@dataclass(frozen=True)
class Die(Distribution[int]):
    sides: int # this is a type hint, it tells us that sides is an integer.

    def sample(self) -> int:
        return randint(1, self.sides)
    
six_sided_die = Die(6)

def roll_dice():
    return six_sided_die.sample() + six_sided_die.sample()

print(six_sided_die)

play_monopoly = roll_dice()
print(f'You rolled: {play_monopoly}')

#test the __eq__ method
assert Die(6) == Die(6) # True

print(Die(6) == Die(6)) # True
print(Die(6) == Die(10)) # False

Die(6) == None

Die(sides=6)
You rolled: 5
True
False


False

In [45]:
import statistics

def expected_value(d: Distribution[float], n: int = 100) -> float:
    return statistics.mean(d.sample() for _ in range(n))

expected_value(Die(6), 100)




3.35

## 2.3.5 Functionality

What is the point of creating an abstraction for the distribution.

- How does the abstraction help us understand the code? 
- What kind of mistakes does it prevent—and what kind of mistakes does it encourage?
- What kind of added functionality does it give us?

**Reasons:**
- Reinforcemt learning ivolves both a wide range of distributions as well as algorithms that need to work on top of these distributions.
- Distrubition interface will thus:
    - **Unify** different applicatication of RL
    - **Generalize** our RL code to work in different contexts

- In the below code, there could be a more effecient or accurate way for a specific distribution to create samples. By adding it to the interface, We can override the behavior into a new class.


In [62]:
# Examples:

# from collections.abc import Sequence
from typing import Sequence
import timeit 

## This example works for getting samples from a die, but it does not tell us anything when coding more than that it is just looping through something.
## This is also somehting that we want to be able to do with all distributions, so we can create a method in the Distribution interface to do this.
samples = []
for _ in range(10):
    samples.append(Die(6).sample())


print(f'Running as simple loop:{samples[:]}')

## This is a better way to do it, but it is still not ideal. We can create a method in the Distribution interface to do this.

A =TypeVar('A')

class Distribution(ABC, Generic[A]):
    @abstractmethod
    def sample(self) -> A:
        pass
    
    def sample_n(self, n: int) -> Sequence[A]:
        return [self.sample() for _ in range(n)]


@dataclass(frozen=True)
class Die(Distribution[int]):
    sides: int # this is a type hint, it tells us that sides is an integer.

    def sample(self) -> int:
        return randint(1, self.sides)
    

d = Die(6)

samples = d.sample_n(10)
print(f'Running through interface:{samples[:]}')


## Overriding the sample_n method in the distribution class.
## there is an up-front cost to calling numpy.random.normal the first time, but it can quickly generate additional samples after that. The performance impact is significant
import numpy as np

@dataclass
class Gaussian(Distribution[float]):
    μ: float
    σ: float
    
    def sample(self) -> float:
        return np.random.normal(self.μ, self.σ)
    
    def sample_n(self, n: int) -> Sequence[float]:
        return np.random.normal(self.μ, self.σ, size=n)

d = Gaussian(μ=0, σ=1)

#using the list comprehension method that is implemented in our distrubution class.

total_time_to_run = timeit.timeit(lambda: [d.sample() for _ in range(10)])
# timeit.timeit(lambda: [d.sample() for _ in range(1000)])

print(f'Running through list comprehesion took: {total_time_to_run}') 
#using the np method that is implemented in our Gaussian class, which is overriding the sample_n in the distribution class.

total_time_to_run = timeit.timeit(lambda: d.sample_n(10))
print(f'Running through np method {total_time_to_run}')

Running as simple loop:[4, 2, 2, 2, 2, 2, 4, 1, 2, 3]
Running through interface:[2, 5, 6, 1, 5, 2, 1, 4, 6, 4]
Running through list comprehesion took: 10.723766251000598
Running through np method 1.5419414440002583


# 2.4 Abstracting over Computation

- We need to interact with the objects. We can do this through methods.
- Objects are first class, methods are not. 

2.4.1 First-Class Functions
- By using Callable from python we can then use the f() to call whatever action we pass out function. 

```python
for _ in range(10):
do_something()
```

Instead of writing a loop each time, we could factor this logic into a function that took n
and do_something as arguments:

```python
def repeat(action: Callable, n: int):
for _ in range(n):
action()
repeat(do_something, 10)
```
`repeat` takes action and n as arguments, then calls action n times. `action` has the type
Callable which, in Python, covers functions as well as any other objects you can call with
the f() syntax. We can also specify the return type and arguments a Callable should have;
if we wanted the type of a function that took an int and a str as input and returned a bool,
we would write `Callable[[int, str], bool]`.


**The key idea to remember is that functions are values that we can pass around or store just like any other object.**


`lambda`. Lambdas are function literals. We can
write 3.0 and get a number without giving it a name, and we can write a lambda expression
to get a function without giving it a name. Here’s the same example as with the payoff
function but using a lambda instead


In [66]:
## Callable Example not from Book
from typing import Callable

def func(x: int) -> str:
    return int(x) * int(x)

my_callable: Callable[[int], str] = func

my_callable(10)


100

In [119]:
## Examples

#previous expected_value function

import statistics
from typing import Callable

def expected_value(d: Distribution[float], n: int = 100) -> float:
    return statistics.mean(d.sample() for _ in range(n))

expected_value(Die(6), 100)


# by wrapping the function in a callable, we can have expected_value call a function that handles the distribution of coin which returns a heads or tails string. 
# The payoff function maps outcomes to numbers and then we calculate the expected value using that mapping

def expected_value(
        d: Distribution[float], 
        f: Callable[[A], float],
        n: int = 100
    ) -> float:

    return statistics.mean(f(d.sample()) for _ in range(n))


@dataclass(frozen=True)
class Coin(Distribution[str]):

    def sample(self) -> str:
        return 'heads' if randint(0, 1) == 1 else 'tails'

def payoff(coin: Coin) -> float:
    return 1.0 if coin == 'heads' else 0.0

coin = Coin()
print(payoff(coin))
print(expected_value(coin, payoff))

print(expected_value(coin, lambda coin: 1.0 if coin == 'heads' else 0.0))


0.0
0.49
0.47


## 2.4.2 Iterative Algroithms

Algorithms that need to iteratively converge to the correct result are very common in Reinforcement Learning and other machine learning applications.


In [134]:
# Examples

# hard coded stop threshold
def sqrt(a: float) -> float:
    x_n = a # initial guess
    x = a / 2 # initial guess
    while abs(x_n - x) > 0.01:
        x_n = (x + (a / x)) / 2
    return x_n

sqrt(4)


# fix it so it is not a hard coded threshold 
def sqrt(a: float, threshold: float) -> float:
    x_n = a # initial guess
    x = a / 2 # initial guess
    while abs(x_n - x) > threshold:
        x_n = (x + (a / x)) / 2
    return x_n

sqrt(8, .01)



KeyboardInterrupt: 