# RL Bandit Notebook

In [None]:
# Notebook for RL Work
# Written By Bryan

# ==== Bandit Imports ====
from Bandits import *    # Custom bandit classes

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import log_loss
from scipy.optimize import minimize



# ==== Mermaid display imports ====
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def mm(graph):
    graphbytes = graph.encode("utf8")
    base64_bytes = base64.b64encode(graphbytes)
    base64_string = base64_bytes.decode("ascii")
    display(Image(url="https://mermaid.ink/img/" + base64_string))

## Environment Setup 

#### Trial Configurations

In [None]:
# Create and Show env
env = Testbed(num_problems=1000, stationary=False)
env.show_mean()


In [None]:
#Model Params
start_val = 1


# Create eGreedy task
# eg_01 = create_bandit_task(model_type="EG", model_params=[0,0.1], steps=trial_params, reward_values=reward_values, start_val=start_val)
eg_01 = create_bandit_task(model_type="EG", env=env, model_params=[0,0.1], steps=500, start_val=start_val )

tasks = [ eg_01]
for task in tasks:
    task.simulate()
    task.show_results()
    task.show_actions()

env.show_mean()


## Softmax

In [None]:
#Model Params
model_type = "SM"
alpha = 0.6
temp = 10                       # inverse temp
model_params = [alpha, temp]
# start_val = 1



SM = create_bandit_task(model_type, env.new(), model_params, steps=500, start_val=start_val)
SM.simulate()
SM.show_actions()
SM.show_results()

## Softmax_UCB

In [None]:
# Model Params
model_type = "SMUCB"
alpha = 0.1
temp = 5
uncertParam = .2
#uncert = 2
model_params = [alpha, temp, uncertParam]
# start_val = 1

SMUCB = create_bandit_task(model_type=model_type, env=env.new(), model_params=model_params, steps=500, start_val=start_val)

SMUCB.simulate()

### Simulate Performance

In [None]:
model_performance_summary(bandits=[eg_01, SM, SMUCB])
# print(f"Param Name:{key}\n {value}" for key, value in vars(eg_01))
# env.show_mean()

## Log-Likelihood

In [None]:
eg_01.simulate_LL()
SM.simulate_LL()
SMUCB.simulate_LL()
# print(SMUCB.selection_matrix)

# print(eg_01.total_LL_array)
# print(SM.total_LL_array)
# print(SMUCB.total_LL_array)


## Model Validation

#### Param Recovery
Fit data to model it was simulated with
<ol>
<li>Simulate choices and rewards using chosen parameter values(acts as ground truth)</li>
<li>
Run fitting procedure (neg log likelihood function) on simulated choice AND reward data.
See if we an estimate simulated parameter values (estimates are fitted parameter values)
</li>
</ol>

In [None]:
# Run this to display mermaid flowchart of parameter recovery
mm("""
flowchart LR
   subgraph input-parameters
   direction TB
   id1([epsilon])
   id2([alpha])
   id3([etc...])
   id1 ~~~ id2 ~~~ id3
   end

   subgraph output-parameters
   direction TB
   id4([epsilon])
   id5([alpha])
   id6([etc...])
   id4 ~~~ id5 ~~~ id6
   end

   simData[(Simulated Data)]
   model((RL Model))
   n([noise])
   fm{{fit data to model}}

   input-parameters --"(1) input"--> model
   model --"(1) creates"--> simData
   simData -."(2) (optional) Add".-> n
   simData --> fm
   n -.-> fm
   fm ~~~ output-parameters
   fm --"(3) Outputs"--> output-parameters
   input-parameters o-- "(4) compare"  --o output-parameters

""")

#### Model Recovery

Fit data to all models
<ol>
<li>
Use simulated choice and reward data from paramater recovery step
</li>

<li>
Calculate negative log likelihood values using each model on the simulated data
</li>
</ol>
