In [9]:
import numpy as np
from numpy import random
from random import choices
import matplotlib.pyplot as plt
import levy
# import plotly.express as px

In [18]:
class Stimuli:
    def __init__(self,mu,dev):
        self.mu = mu
        self.dev = dev
    def sample(self):
        return random.normal(self.mu, self.dev)      # random sample from distribution of sample

def exploit(candidates,total_time):
    scores = [c.sample() for c in candidates]
    chosen = candidates[np.argmax(scores)]
    for t in range(len(candidates),total_time):
        scores.append(chosen.sample())
    return sum(scores)

def ucb1(candidates, total_time):
    scores = []                # Initialise scores
    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}   # Init history then sample each option once.
    for t in range(len(candidates),total_time):
        # Find action + upper confidence bound for each candidate
        mu_plus_ucb = [np.mean(history[idx])+np.sqrt(2*np.log(t)/len(history[idx])) for idx in range(len(candidates))]
        chosen = candidates[np.argmax(mu_plus_ucb)]     # Find candidate with highest value
        score = chosen.sample()                         # Sample from candidate distribution   
        scores.append(score)                            # Add to score list
        history[candidates.index(chosen)].append(score) # Update history 
    return history, sum(scores)

def softmax(candidates,total_time):
    scores = []
    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}        # sample each option once
    for t in range(len(candidates), total_time): 
        expected_rewards = [np.mean(history[item]) for item in history]                           # overall reward
        weights = [np.exp(np.mean(history[item])) / np.sum(expected_rewards) for item in history] # probability vector
        chosen = choices(candidates,np.abs(weights))
        score = chosen[0].sample()
        scores.append(score)
        history[candidates.index(chosen[0])].append(score)
    return history, sum(scores)

def logistic_noise(candidates,total_time,sigma):  # Should specify the noise separately
    scores = []
    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}
    for t in range(len(candidates), total_time):
        expected_rewards = [np.mean(history[item]) for item in history]
        std_rewards = [np.std(history[item]) for item in history]
        noisy_rewards = random.logistic(expected_rewards, scale = sigma)  # what scale and shape?
        chosen = candidates[np.argmax(noisy_rewards)]
        score = chosen.sample()
        scores.append(score)
        history[candidates.index(chosen)].append(score)
    return history, sum(scores)

def levy_noise(candidates,total_time,sig):  # Consider pregenerating noise
    scores = []
    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}    # Initialise history and sample options once
    levy_noise = levy.random(alpha=1.3,beta=0,mu=0,sigma=sig,shape=(len(candidates),total_time))
    for t in range(len(candidates), total_time):
        expected_rewards = [np.mean(history[item]) for item in history]
        noisy_rewards = expected_rewards + levy_noise[:,t]
        chosen = candidates[np.argmax(noisy_rewards)]
        score = chosen.sample()
        scores.append(score)
        history[candidates.index(chosen)].append(score)
    return history, sum(scores)

2