# The purpose of this notebook is for experimenting with code snippets

Monte Carlo in MC

In [1]:
from adaptive_time.environment import MountainCar
from adaptive_time.features import Fourier_Features
import numpy as np
from adaptive_time.utils import argmax

In [2]:
phi = Fourier_Features()
phi.init_fourier_features(2,2)
phi.init_state_normalizers(np.array([0.6,0.07]), np.array([-1.2,-0.07]))

phi.get_fourier_feature([-0.5,0.0])

array([ 1.00000000e+00,  6.12323400e-17, -1.00000000e+00,  3.42020143e-01,
       -9.39692621e-01, -3.42020143e-01, -7.66044443e-01, -6.42787610e-01,
        7.66044443e-01])

In [9]:



def foo(xs,tol):
    c = int(len(xs) / 2)
    f = lambda xs: len(xs) * ( xs[0] + xs[-1]) / 2 if len(xs) else 0
    if abs(f(xs) - (r := f(xs[:c]) + f(xs[c:]))) < tol: 
        return 1, r
    else: 
        x, fa = foo(xs[:c], tol / 2)
        y, fb = foo(xs[c:], tol / 2)
        return x + y, fa + fb


def generate_trajectory(env, weights, episode, num_actions = 3):
    trajectory = []
    s = env.reset(episode)
    done = False
    while not done:
        #state_feature = phi.get_fourier_feature(s)
        #action_value = np.zeros(num_actions) #For mountain car
        #for action in range(num_actions):
        #    action_value[action] = np.inner(state_feature, weights[action])
        #a = argmax(action_value)
        a = np.random.choice(3)
        r, s_, _, done = env.step(a)
        trajectory.append([s, a, r, s_])
        s = s_
    return trajectory



def gradient_monte_carlo(env, phi, weights, num_actions, episode, alpha = 0.01):
    
    rewards = []
    trajectory = generate_trajectory(env, weights, episode, num_actions)
    N = len(trajectory)
    G = 0
    for t in range(N-1,-1,-1):
        state, action, reward, _ = trajectory[t]
        G = G + reward
        x = phi.get_fourier_feature(state)

        weights = weights + alpha * (G - min(0,max(-200,np.inner(x,weights))))*x 

        rewards.append(reward)
    return weights, sum(rewards)



num_actions = 3
num_episodes = 10000
env = MountainCar(es=10000)
phi = Fourier_Features()
phi.init_fourier_features(2,2)
phi.init_state_normalizers(np.array([0.6,0.07]), np.array([-1.2,-0.07]))
d = len(phi.get_fourier_feature([0.0,0.0]))
Returns = []

weights_gradient = np.zeros(d)
x = phi.get_fourier_feature([-0.3,0.0])
for episode in range(num_episodes):
    weights_gradient, returns = gradient_monte_carlo(env, phi, weights_gradient, num_actions, episode, alpha=0.01)
    Returns.append(returns)
    print(episode, -200.0 - np.inner(x,weights_gradient))



    

0 -28.09435419371033
1 -15.867203473969909
2 -24.737815093880585
3 -10.19482739459113
4 -8.256263808259718
5 -8.15313065009201
6 -6.1379368447231855
7 -54.39177277078147
8 -22.01312035419096
9 -28.074793289512286
10 -19.793805845589475
11 -25.484098860984062
12 -26.54624716960231
13 -14.846324485748795
14 -71.92607697208649
15 -52.40307328874022
16 -27.887522447448873
17 -48.62666455867128
18 -66.64099613131097
19 -54.888228378743605
20 -10.914325273104367
21 -10.185449902121519
22 -47.23744399558916
23 -80.40747637530569
24 -67.59982718615501
25 -72.35811314060751
26 -73.35479871361237
27 -71.89588818507471
28 -72.31898343841951
29 -82.10899617309309
30 -77.62821727056988
31 -18.96937265739797
32 -13.30560187543233
33 -35.68203023134802
34 -21.980179727750226
35 -24.915976581358876
36 -25.12488028362469
37 -28.673115772730455
38 -26.66849000266012
39 -81.265120146177
40 -21.656386042110967
41 -19.0239193731866
42 -24.029654236648128
43 -50.92248573162519
44 -46.60606904746572
45 -45.0

# Quadrature

-65.6078558294331


-152.4820143884892

In [None]:
def trapeziod_rule(a,b):
    return (b-a) * (function(a) + function(b)) / 2.0

def integral_rule(a,b):
    return trapeziod_rule(a,b)

def function(x):
    return x**5

def adaptive_quadrature(a0, b0, tol0):
    sums = 0.0
    n = 1
    a = np.zeros(100000)
    b = np.zeros(100000)
    tol = np.zeros(100000)
    app = np.zeros(100000)
    iters = 0
    
    a[1] = a0
    b[1] = b0
    tol[1] = tol0
    app[1] = integral_rule(a0,b0)
    
    while n > 0:
        iters += 1
        c = (a[n] + b[n]) / 2
        oldapp = app[n]
        app[n] = integral_rule(a[n], c)
        app[n+1] = integral_rule(c, b[n])
        
        if np.abs(oldapp - (app[n]+app[n+1])) < 3 * tol[n]:
            sums = sums + app[n] + app[n+1] #success
            n = n - 1 #done with interval
            
        else:    #divide into two intervals
            b[n+1] = b[n] #setup new intervals
            b[n] = c  #setup new intervals
            a[n+1] = c #setup new intervals
            tol[n] = tol[n] / 2
            tol[n+1] = tol[n]
            n = n + 1
    return sums,iters
        
        

In [None]:
b = 15
truth = b**5/5

quad,iters = adaptive_quadrature(0, b, 0.00005)

h = iters
x = np.linspace(0, b, num=h)
y = function(x)
trap = np.trapz(y,x)

In [None]:
print(np.abs(truth - quad))
print(np.abs(truth - trap))

print(iters - h)

1746562.5000270708
1746562.5000363498
0


In [16]:
import math
def adaptive_sum(traj, tol0=10**(-2)):
    steps = {}
    N = len(traj)
    app = np.zeros(N+1000)
    tol = np.zeros(N+1000)
    N_begin = np.ones(N+1000,dtype=np.int8) * int(-1)
    N_end = np.ones(N+1000,dtype=np.int8) * int(-1)

    sums = 0.0


    steps[N_begin[0]] = 1
    steps[N_end[0]] = 1

    n = 0
    N_begin[0] = 0
    N_end[0] = N - 1
    tol[0] = tol0
    app[0] = (N_end[0] - N_begin[0] + 1) / (2) * ( traj[N_begin[0]] + traj[N_end[0]] )
    iters = 0
    while n > -1: 
        iters += 1
        if (N_end[n] - N_begin[n] + 1) % 2 == 0:
            N_split = int( (N_end[n] - N_begin[n] + 1) / 2 )

        else:
            N_split = math.ceil((N_end[n] - N_begin[n] + 1) / 2)

        old_app = app[n]
        app[n] = (N_split - N_begin[n]) / (2) * ( traj[N_split - 1] + traj[N_begin[n]] )
        app[n+1] = (N_end[n] - N_split + 1) / (2) * ( traj[N_split] + traj[N_end[n]] )
        
        print(iters, traj[N_begin[n]], traj[N_split - 1], traj[N_split], traj[N_end[n]])

        if abs(old_app - (app[n] + app[n+1])) < 3 * tol[n]:
            sums = sums + app[n] + app[n+1]
            n = n - 1

        else:
            N_end[n+1] = N_end[n]
            N_end[n] = N_split - 1
            N_begin[n+1] = N_split 
            tol[n] = tol[n] / 2
            tol[n+1] = tol[n]
            n = n + 1
            
    return sums


# Quadrature for Discrete Integrals (i.e. sums)

In [53]:
from adaptive_time.samplers import AdaptiveQuadratureSampler

horizon = 20000
dt = 1
num_steps = horizon - 1
tolerance_init = 0.01

sampler = AdaptiveQuadratureSampler(
    dt=dt,
    num_steps=num_steps,
    tolerance_init=tolerance_init,
    update_when_best=False,
)

def foo2(xs,tol,level):
    c=int(np.floor(len(xs)/2))
    #print(xs, c)
    f = lambda xs: len(xs)*(xs[0]+xs[-1])/2 if len(xs) else 0
    if abs(f(xs) - (r:=f(xs[:c]) + f(xs[c:]))) < tol: return 1, r, [level+c]
    else: 
        x, a, cs_a = foo2(xs[:c],tol/2, level)
        y, b, cs_b = foo2(xs[c:],tol/2, level+c)
        return x+y+1, a+b, cs_a + cs_b + [level+c]

def foo(xs,tol):
    c = int(len(xs) / 2)
    f = lambda xs: len(xs) * ( xs[0] + xs[-1]) / 2 if len(xs) else 0
    if abs(f(xs) - (r := f(xs[:c]) + f(xs[c:]))) < tol: 
        return 1, r
    else: 
        x, fa = foo(xs[:c], tol / 2)
        y, fb = foo(xs[c:], tol / 2)
        return x + y, fa + fb
    

In [56]:
for _ in range(1000):
    traj = np.zeros(horizon)
    traj[0:np.random.randint(horizon)] = -1
    pivots = []
    calls, approx_sum = foo(traj, 0.01)
    print(calls, approx_sum - sum(traj))
    approx_sum, total_seg, num_calls = foo2(traj, 0.01, 0)
    print(total_seg - sum(traj))

14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
11 0.0
0.0
15 0.0
0.0
14 0.0
0.0
11 0.0
0.0
11 0.0
0.0
14 0.0
0.0
15 0.0
0.0
13 0.0
0.0
9 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
11 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
13 0.0
0.0
15 0.0
0.0
15 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
12 0.0
0.0
14 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
12 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
11 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
12 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
15 0.0
0.0
12 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
12 0.0
0.0
14 0.0
0.0
3 0.0
0.0
14 0.0
0.0
14 0.0
0.0
15 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
9 0.0
0.0
12 0.0
0.0
15 0.0
0.0
15 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14 0.0
0.0
14

In [7]:
s = np.ones((10,1))

In [9]:
s.flatten()

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])