In [1]:
import plotly.graph_objects as go
import plotly.express as px

In [7]:
def bandit_matrix(l, h, birth_l, birth_h,e):
    L = zero_matrix(QQ, 3)
    L[0, 0] = 1 - l - h
    L[1, 0] = l
    L[2, 0] = h
    L[0, 1] = birth_l + e*(1-l-h)
    L[1, 1] = (1 - e) + e*l
    L[2, 1] = e*h
    L[0, 2] = birth_h
    L[1, 2] = 0 
    L[2, 2] = 1 
    return L

def is_essentially_real(x):
    if x.imag() == 0:
        return(True)
    else:
        return(False)
    
#Use of "if is_essentiallY_real(e)" rather than "if e in RR" is required since some computational errors
#    seem to come up in the eigenvalue computation, giving us things like x + 0.?e-80*I.

def get_leading_eigenvalue(L):
    evals = L.eigenvalues()
    moduli = [e.n() for e in evals if is_essentially_real(e)]
    moduli = [e for e in moduli if e >= 0]
    r = max(moduli)
    return(r)

def normalize(vec):
    tot = sum(vec)
    vec = vec/tot
    return(vec)

def get_leading_evec(L):
    r = get_leading_eigenvalue(L)
    evecs = L.eigenvectors_right()
    arrs = [e[0].n() for e in evecs]
    i = arrs.index(r)
    leading_evec = evecs[i][1][0]
    leading_evec = normalize(leading_evec).n()
    return(leading_evec)


In [18]:
r = get_leading_eigenvalue(L)

In [8]:
L = bandit_matrix(0.3, 0.03, 1, 10, 0)

In [10]:
L

[52937048251597/79010519778503                             1                            10]
[                         3/10                             1                             0]
[                        3/100                             0                             1]

In [19]:
v = get_leading_evec(L)
v

(0.655163541877161, 0.313487689202581, 0.0313487689202581)

In [12]:
l = 0.3
h = 0.03
birth_l = 1
birth_h = 10

In [106]:
L = bandit_matrix(3/10, 0.03, 1, 10, 85/100)
get_leading_eigenvalue(L)

1.62986274174579

In [7]:
def get_optimal_epsilon(l, h, birth_l, birth_h):
    def fitness_e(e):
        L = bandit_matrix(l, h, birth_l, birth_h,e)
        r_e = get_leading_eigenvalue(L)
        return r_e
    e_opt = find_local_maximum(fitness_e, 0, 1)[1]
    return e_opt
        
        
    

In [245]:
get_optimal_epsilon(3/10, 0.28495/10, 1, 10)

6.507816260210173e-09

In [247]:
get_optimal_epsilon(3/10, 0.0284989625215530, 1, 10)

0.9999999713002414

In [8]:
def is_essentially_zero(x):
    if x < 10^(-6):
        return True
    else:
        return False
    
def is_essentially_one(x):
    if x > 1 - 10^(-6) and  1 >= x:
        return True
    else:
        return False



In [9]:
def find_upper_bound_on_h(l, birth_l, birth_h):
    found_bound = False
    h_bound = 0
    while not found_bound:
        h_bound = h_bound + 1
        attempt = get_optimal_epsilon(l, h, birth_l, birth_h)
        if is_essentially_one(attempt):
            found_bound = True
    return h_bound
    



In [10]:
def find_disc_in_h(l, birth_l, birth_h):
    #If there is a lion in the desert, and you want to catch it, what do you do?
    #Build a fence through the middle of the desert. If it is on the right, repeat there, idem for left. 
    #Eventually, you catch the lion. 
    h_lower = 0
    h_upper = 1 - l
    for n in range(12):
        h_test = (h_lower + h_upper)/2
        opt_epsilon = get_optimal_epsilon(l, h_test, birth_l, birth_h)
        if is_essentially_one(opt_epsilon):
            #going down
            h_lower = h_lower
            h_upper = h_test
        else:
            #going up
            h_lower = h_test
            h_upper = h_upper
    return h_test
        

In [79]:
find_disc_in_h(3/10, 1, 10).n()

0.0285400390625000

In [311]:
find_disc_in_h(4/10, 1, 10).n()

0.0244628906250000

In [239]:
h_vec = []
for birth_h in range(1, 200):
    h_disc = find_disc_in_h(3/10, 1, birth_h/10)
    h_vec.append(h_disc)
    print( 'done with {}'.format(birth_h))
    


done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 99/5000
done with 

KeyboardInterrupt: 

In [246]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[birth_h/10 for birth_h in range(1, 200)], y=lyapunov_h_vec,
                    mode='markers',
                        marker = dict(color = '#d62728')))

fig.update_layout(xaxis_title = "beta_h", yaxis_title = "h_disc (beta_h)")

fig.show()

#### Onto the reinforcement learning side of things.

In [156]:
def immortal_no_birth_bandit_matrix(l, h, e):
    L = zero_matrix(QQ, 3)
    L[0, 0] = (1 - l - h)
    L[1, 0] = l
    L[2, 0] = h
    L[0, 1] = e*(1-l-h)
    L[1, 1] = (1 - e) + e*l
    L[2, 1] = e*h
    L[0, 2] = 0
    L[1, 2] = 0 
    L[2, 2] = 1 
    return L

def get_expected_findings(l, h, birth_l, birth_h, e, n_days = 1000):
    B = immortal_no_birth_bandit_matrix(l, h, e)
    dist = vector([1, 0, 0])
    exp_find_vec = []
    for d in range(n_days):
        dist = B*dist.n()        
        exp_find = birth_l*dist[1] + birth_h*dist[2]
        exp_find_vec.append(exp_find)
    return exp_find_vec

def rl_obj(l, h, birth_l, birth_h, e, gamma):
    efv = get_expected_findings(l, h, birth_l, birth_h, e)
    discounted_vec = [(gamma^d)*reward for d, reward in enumerate(efv)]
    return sum(discounted_vec)    

In [212]:
def rl_obj_is_increasing_in_epsilon(l, h, birth_l, birth_h, gamma = 0.8):
    
    ofv95 = rl_obj(l, h, birth_l, birth_h, 0.95, gamma)
    ofv100 = rl_obj(l, h, birth_l, birth_h, 1, gamma)
    
    if ofv100 > ofv95:
        return True
    else:
        return False

def rl_opt_epsilon(l, h, birth_l, birth_h, gamma = 0.8):
    #Relies on the assumption that the objective function \sum_t gamma^t E[r_t] is either increasing or
    #decreasing in epsilon. 
    
    if rl_obj_is_increasing_in_epsilon(l, h, birth_l, birth_h, gamma):
        return 1
    else:
        return 0

    
    

In [308]:
h_vec = []
opt_e_vec = []

for h in range(100):
    print('starting {}'.format(h))
    h = h/5000
    h_vec.append(h)
    opt_e = rl_opt_epsilon(0.6, h, 1, 10, 0.99)
    opt_e_vec.append(opt_e)
    

starting 0
starting 1
starting 2
starting 3
starting 4
starting 5
starting 6
starting 7
starting 8
starting 9
starting 10
starting 11
starting 12
starting 13
starting 14
starting 15
starting 16
starting 17
starting 18
starting 19
starting 20
starting 21
starting 22
starting 23
starting 24
starting 25
starting 26
starting 27
starting 28
starting 29
starting 30
starting 31
starting 32
starting 33
starting 34
starting 35
starting 36
starting 37
starting 38
starting 39
starting 40
starting 41
starting 42
starting 43
starting 44
starting 45
starting 46
starting 47
starting 48
starting 49
starting 50
starting 51
starting 52
starting 53
starting 54
starting 55
starting 56
starting 57
starting 58
starting 59
starting 60
starting 61
starting 62
starting 63
starting 64
starting 65
starting 66
starting 67
starting 68
starting 69
starting 70
starting 71
starting 72
starting 73
starting 74
starting 75
starting 76
starting 77
starting 78
starting 79
starting 80
starting 81
starting 82
starting 83
st

In [309]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=h_vec, y=opt_e_vec,
                    mode='markers'))
fig.show()

In [205]:
def rl_find_disc_in_h(l, birth_l, birth_h, gamma = 0.8):
    h_lower = 0
    h_upper = 1 - l
    for n in range(20):
        h_test = (h_lower + h_upper)/2
        opt_epsilon = rl_opt_epsilon(l, h_test, birth_l, birth_h, gamma)
        if is_essentially_one(opt_epsilon):
            #going down
            h_lower = h_lower
            h_upper = h_test
        else:
            #going up
            h_lower = h_test
            h_upper = h_upper
    return h_test

In [312]:
rl_find_disc_in_h(0.3, 1, 10)

0.0152173042297363

In [316]:
rl_find_disc_in_h(0.3, 1, 10)

0.0152173042297363

In [274]:
rl_h_vec_gamma_star = []
for birth_h in range(1, 200):
    rl_h_disc_gamma_star = rl_find_disc_in_h(3/10, 1, birth_h/10, gamma_star)
    
    rl_h_vec_gamma_star.append(rl_h_disc_gamma_star)
    
    print( 'done with {}'.format(birth_h))

done with 1
done with 2
done with 3
done with 4
done with 5
done with 6
done with 7
done with 8
done with 9
done with 10
done with 11
done with 12
done with 13
done with 14
done with 15
done with 16
done with 17
done with 18
done with 19
done with 20
done with 21
done with 22
done with 23
done with 24
done with 25
done with 26
done with 27
done with 28
done with 29
done with 30
done with 31
done with 32
done with 33
done with 34
done with 35
done with 36
done with 37
done with 38
done with 39
done with 40
done with 41
done with 42
done with 43
done with 44
done with 45
done with 46
done with 47
done with 48
done with 49
done with 50
done with 51
done with 52
done with 53
done with 54
done with 55
done with 56
done with 57
done with 58
done with 59
done with 60
done with 61
done with 62
done with 63
done with 64
done with 65
done with 66
done with 67
done with 68
done with 69
done with 70
done with 71
done with 72
done with 73
done with 74
done with 75
done with 76
done with 77
done wit

In [277]:
gamma_star

0.617541503906250

In [227]:
rl_h_vec80 = []
rl_h_vec99 = []
rl_h_vec40 = []
lyapunov_h_vec = []

for birth_h in range(1, 200):
    rl_h_disc80 = rl_find_disc_in_h(3/10, 1, birth_h/10, 0.8)
    rl_h_disc99 = rl_find_disc_in_h(3/10, 1, birth_h/10, 0.99)
    rl_h_disc40 = rl_find_disc_in_h(3/10, 1, birth_h/10, 0.4)

    lyap_h_disc = find_disc_in_h(3/10, 1, birth_h/10)
    
    rl_h_vec80.append(rl_h_disc80)
    rl_h_vec99.append(rl_h_disc99)
    rl_h_vec40.append(rl_h_disc40)

    lyapunov_h_vec.append(lyap_h_disc)
    
    print( 'done with {}'.format(birth_h))
    

done with 1
done with 2
done with 3
done with 4
done with 5
done with 6
done with 7
done with 8
done with 9
done with 10
done with 11
done with 12
done with 13
done with 14
done with 15
done with 16
done with 17
done with 18
done with 19
done with 20
done with 21
done with 22
done with 23
done with 24
done with 25
done with 26
done with 27
done with 28
done with 29
done with 30
done with 31
done with 32
done with 33
done with 34
done with 35
done with 36
done with 37
done with 38
done with 39
done with 40
done with 41
done with 42
done with 43
done with 44
done with 45
done with 46
done with 47
done with 48
done with 49
done with 50
done with 51
done with 52
done with 53
done with 54
done with 55
done with 56
done with 57
done with 58
done with 59
done with 60
done with 61
done with 62
done with 63
done with 64
done with 65
done with 66
done with 67
done with 68
done with 69
done with 70
done with 71
done with 72
done with 73
done with 74
done with 75
done with 76
done with 77
done wit

In [237]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[birth_h/10 for birth_h in range(1, 200)], y=rl_h_vec40,
                    mode='markers',
                    name = 'RL optimality -- gamma = 0.4'))
fig.add_trace(go.Scatter(x=[birth_h/10 for birth_h in range(1, 200)], y=lyapunov_h_vec,
                    mode='markers',
                    name = 'Evolutionary optimality'))
fig.add_trace(go.Scatter(x=[birth_h/10 for birth_h in range(1, 200)], y=rl_h_vec80,
                    mode='markers',
                    name = 'RL optimality -- gamma = 0.8'))
fig.add_trace(go.Scatter(x=[birth_h/10 for birth_h in range(1, 200)], y=rl_h_vec99,
                    mode='markers',
                    name = 'RL optimality -- gamma = 0.99'))



fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.68
))

fig.update_layout(xaxis_title = "beta_h", yaxis_title = "h_disc (beta_h)")

fig.show()

#### It looks like, having fixed $l,\beta_l$, there is some $\gamma^*$ satisfying $\varepsilon_{\text{RL}} = \varepsilon^*$ for every $h, \beta_h$. 

In [247]:
def diff_in_h_disc(gamma):
    evol_h_disc = find_disc_in_h(0.3, 1, 1.1)
    rl_h_disc = rl_find_disc_in_h(0.3, 1, 1.1, gamma)
    diff = evol_h_disc - rl_h_disc
    return diff.n()

In [272]:
gamma_star = bisection(diff_in_h_disc, 0.6, 0.7, 10^(-6))

starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12


In [273]:
gamma_star

0.617541503906250

In [259]:
%time diff_in_h_disc(0.4)

CPU times: user 9.16 s, sys: 4 ms, total: 9.16 s
Wall time: 9.17 s


-0.0450924873352052

In [262]:
%time diff_in_h_disc(0.7)

CPU times: user 9.03 s, sys: 8 ms, total: 9.03 s
Wall time: 9.03 s


0.0299065589904786

#### Generalize: for any $l, \beta_l$, find $\gamma^*(l, \beta_l)$ for which $\varepsilon^* = \varepsilon_{\text{RL}}^{\gamma^*}$

In [301]:
def get_gamma_star(l, beta_l, prec = 10^(-6)):
    def diff_in_h_disc(gamma):
        evol_h_disc = find_disc_in_h(l, beta_l, beta_l + 0.1)
        rl_h_disc = rl_find_disc_in_h(l, beta_l, beta_l + 0.1, gamma)
        diff = evol_h_disc - rl_h_disc    
        return diff
    gamma_star = bisection(diff_in_h_disc, 0, 1, prec)
    return gamma_star

In [318]:
get_gamma_star(0.5, 1)

starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13


10117/16384

In [317]:
get_gamma_star(0.3, 1)

starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13


10117/16384

In [305]:
gamma_star_vec = []
beta_l_vec = [x/10 for x in range(1, 50)] + [x for x in range(6, 14)] + [2*x for x in range(7, 15)]

for beta_l in beta_l_vec:
    gamma_star = get_gamma_star(0.3, beta_l)
    gamma_star_vec.append(gamma_star)
    print(beta_l)

starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
starting round 15
starting round 16
starting round 17
1/10
starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
starting round 15
starting round 16
starting round 17
starting round 18
starting round 19
1/5
starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
starting round 15
starting round 16
starting round 17
3/10
starting round 1
starting round 2
starting round 3
starting

starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
starting round 15
31/10
starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
16/5
starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
33/10
starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
starting round 14
17/5
starting round 1
starting round 2
starting round 3
starting round 4
start

In [306]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=beta_l_vec, y=gamma_star_vec,
                    mode='markers',
                    name = 'gamma*'))

fig.update_layout(xaxis_title = "beta_l", yaxis_title = "gamma*")

In [282]:
%time gamma_star20 = get_gamma_star(0.2, 1)

starting round 1
starting round 2
starting round 3
starting round 4
starting round 5
starting round 6
starting round 7
starting round 8
starting round 9
starting round 10
starting round 11
starting round 12
starting round 13
CPU times: user 6min 55s, sys: 180 ms, total: 6min 55s
Wall time: 6min 55s


#### Old: keeping just in case I ever need it again

In [270]:
def bisection(f,a,b,prec, verbose = True):
    '''Approximate solution of f(x)=0 on interval [a,b] by bisection method.

    Parameters
    ----------
    f : function
        The function for which we are trying to approximate a solution f(x)=0.
    a,b : numbers
        The interval in which to search for a solution. The function returns
        None if f(a)*f(b) >= 0 since a solution is not guaranteed.
    N : (positive) integer
        The number of iterations to implement.

    Returns
    -------
    x_N : number
        The midpoint of the Nth interval computed by the bisection method. The
        initial interval [a_0,b_0] is given by [a,b]. If f(m_n) == 0 for some
        midpoint m_n = (a_n + b_n)/2, then the function returns this solution.
        If all signs of values f(a_n), f(b_n) and f(m_n) are the same at any
        iteration, the bisection method fails and return None.

    Examples
    --------
    >>> f = lambda x: x**2 - x - 1
    >>> bisection(f,1,2,25)
    1.618033990263939
    >>> f = lambda x: (2*x - 1)*(x - 3)
    >>> bisection(f,0,1,10)
    0.5
    '''
#    if f(a)*f(b) >= 0:
#        print("Bisection method fails.")
#        return None
    a_n = a
    b_n = b
    m_n = (a_n + b_n)/2
    n = 0
    while abs(f(m_n)) > prec:
        n = n+1
        if verbose:
            print('starting round {}'.format(n))
        m_n = (a_n + b_n)/2
        f_m_n = f(m_n)
        if f(a_n)*f_m_n < 0:
            a_n = a_n
            b_n = m_n
        elif f(b_n)*f_m_n < 0:
            a_n = m_n
            b_n = b_n
        elif f_m_n == 0:
            print("Found exact solution.")
            return m_n
        else:
            print("Bisection method fails.")
            return None
    return (a_n + b_n)/2

In [None]:
def immortal_squirrel_findings(l, h, birth_l, birth_h, e, num_days = 10 ):
    #stochastic elements
    arm_probs = [1 - l - h, l, h]
    stay_search_probs = [1 - e, e]
    arms_dist = GeneralDiscreteDistribution(arm_probs)
    stay_search_dist = GeneralDiscreteDistribution(stay_search_probs)
    
    #environmental
    payout_vec = [0, birth_l, birth_h]
    findings = [0]*num_days
    
    #initialize
    at_null = False
    at_low = False
    at_high = False
    
    #first day
    arm = arms_dist.get_random_element()
    found = payout_vec[arm]
    findings[0] = found
    
    if arm == 0:
        at_null = True
    if arm == 1:
        at_low = True
    if arm == 2:
        at_high = True
    
    #now run through the rest of the days
    for d in range(1, num_days):
        #decide: will I search today?
        if at_high:
            will_search = False
        if at_null:
            will_search = True
            at_null = False
        if at_low:
            will_search = stay_search_dist.get_random_element()
            at_low = False
            
        if will_search:
            arm = arms_dist.get_random_element()
        
        #If I searched, I pick an arm and go. If I didn't my arm is the same as yesterday.
        found = payout_vec[arm]
        if arm == 0:
            at_null = True
        if arm == 1:
            at_low = True
        if arm == 2:
            at_high = True
            
            
        findings[d] = found
        
    return findings
            
        
        