In [1]:
def is_even(n) -> bool:
    return not n & 1


def is_power(n) -> bool:
    if n & (n - 1) == 0:
        return True
    return False

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
from scipy.optimize import minimize

# sns.set_context("notebook")

sns.set_theme("notebook", style="whitegrid")


In [3]:
# def bsm_call(
#     S: float | np.ndarray, K: float | np.ndarray, T: float | np.ndarray, r: float, sigma: float | np.ndarray
# ) -> float | np.ndarray:
#     """
#     S: stock/underlying
#     K: strike
#     T: TTM
#     r: RFR
#     sigma: volatility of S || underlying
#     """
#     d1 = (np.log(S / K) + (r + 0.5**sigma**2) * T) / (sigma * np.sqrt(T))
#     d2 = d1 - sigma * np.sqrt(T)
#     call_price = S * st.norm.cdf(d1) - K * np.exp(-r * T) * st.norm.cdf(d2)
#     return call_price
# def calibrated_sigma(params, args):
#     """
#     Params: array of parameters to optimize sigma (in this case)
#     args: other args to the bsm_valid function
#     """
#     market_prices, S, K, T, r = args
#     sigma = params[0]
#     model_prices = bsm_call(S, K, T, r, sigma)
#     return np.sum(market_prices - model_prices )


##### <center>Yves Hilpisches new book</center>
# <center>Reinforcement Learning for Finance - A Python Based Intro</center>
---
## <center>Bayesian Learning</center>
---
---
### Tossing a biased Coin

In [4]:
rng = np.random.default_rng(seed=100)

ssp = [1, 0] # State space, 1 for heads, 0 for tails
asp = [1, 0] # Action space, 1 for a bet on heads, 0 for one on tails

def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)  # the random bet
        s = rng.choice(ssp)  # the random coin toss
        if a == s:
            tr += 1          # the reward for a winning bet
    return tr


In [5]:
rl = np.array([epoch() for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]

array([56, 47, 48, 55, 55, 51, 54, 43, 55, 40])

In [6]:
rl.mean()                                    # The average total reward

49.968

In [7]:
ssp = [1, 1, 1, 1, 0]  # The biased state space
asp = [1, 0]           # the same action as before

def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)  # the random bet
        s = rng.choice(ssp)  # the random coin toss
        if a == s:
            tr += 1          # the reward for a winning bet
    return tr

rl = np.array([epoch() for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]

array([53, 56, 40, 55, 53, 49, 43, 45, 50, 51])

In [8]:
rl.mean()

49.924

In [10]:
ssp = [1, 1, 1, 1, 0]

def epoch(n):
    tr = 0
    asp = [0, 1]  # the initial action space
    for _ in range(n):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

rl = np.array([epoch(100) for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]

array([71, 65, 67, 69, 68, 72, 68, 68, 77, 73])

In [11]:
rl.mean()


66.78

In [12]:
from collections import Counter

ssp = [1, 1, 1, 1, 0]

def epoch(n):
    tr = 0
    asp = [0, 1]  # the initial action space
    for _ in range(n):
        c = Counter(asp)
        a = c.most_common()[0][0]
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

rl = np.array([epoch(100) for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]


array([81, 70, 74, 77, 82, 74, 81, 80, 77, 78])

In [None]:
rl.mean()

78.828

In [14]:
ssp = [1, 2, 3, 4, 4, 4, 4, 5, 6]
asp = [1, 2, 3, 4, 5, 6]

def epoch():
    tr = 0
    for _ in range(600):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

rl = np.array([epoch() for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]

array([ 93,  97, 103,  99, 106, 107, 104, 104, 100, 110])

In [15]:
rl.mean()

100.556

In [18]:
def epoch():
	tr = 0
	asp = [1,2, 3, 4, 5, 6]
	for _ in range(600):
		a = rng.choice(asp)
		s = rng.choice(ssp)
		if a == s:
			tr += 1
		asp.append(s)
	return tr

rl = np.array([epoch() for _ in range(250)])
rl[:10]


array([144, 160, 145, 134, 148, 148, 166, 175, 149, 134])

In [19]:
rl.mean()

152.928

In [21]:
def epoch():
    tr = 0
    asp = [1, 2, 3, 4, 5, 6]
    for _ in range(600):
        c = Counter(asp)
        a = c.most_common()[0][0]
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

rl = np.array([epoch() for _ in range(250)])  # The simulation of multiple sequences of bets
rl[:10]

array([258, 258, 278, 280, 280, 263, 283, 281, 278, 247])

In [22]:
rl.mean()

263.168

In [23]:
cm = 10**40
print(f'{cm:,}')

10,000,000,000,000,000,000,000,000,000,000,000,000,000
