# [KDD Cup|Humanities Track Tutorial Q-Learning](https://compete.hexagon-ml.com/tutorial/kdd-cuphumanities-track-tutorial/)

### KDD Cup|Humanities Track Tutorial Q-Learning
This Tutorial builds on the previous tutorial to demonstrate a baseline implementation of a standard Reinforcement Learning (RL) Algorithm

### State

$S \in \{1,2,3,4,5\}$

### Action
$A_S = [a_{ITN},a_{IRS}]$

where  $a_{ITN} \in [0,1]$ and $a_{IRS} \in [0,1]$

### Reward
$R_{\pi} \in (- \infty,\infty)$

![](image/rewards2.png)

In [130]:
import numpy as np
from collections import defaultdict
import random
# !pip3 install git+https://github.com/slremy/netsapi --user --upgrade
from netsapi.challenge import * 

### Creating a Valid Submission from Agent Code:

In [140]:
class BanditRPM(object):
    def __init__(self,env):
        self.env = env
        self.action_resolution = 0.1
        self.actions = self.actionSpace()    
        
        self.ActionValue = {}
        self.init = (2,5)
        for key in self.actions:
            self.ActionValue[key] = self.init
        print("self.ActionValue=",self.ActionValue)        
        
    def actionSpace(self):
        x = np.arange(0,1+self.action_resolution,self.action_resolution)
        y = 1-x
        x = x.reshape(len(x),1)
        y = y.reshape(len(y),1)
        xy = np.concatenate((x, y), axis=1)
        xy = [tuple(row) for row in xy]
        
        return xy
    
    def choose_action(self):
        """
        Use Thompson sampling to choose action. Sample from each posterior and choose the max of the samples.
        """
        samples = {}
        print(i)
        for key in self.ActionValue:
            print("key=",key)
#             print("key=",self.ActionValue[key][0])
#             print("self.ActionValue[key][1]=",self.ActionValue[key][1])
            samples[key] = np.random.beta(self.ActionValue[key][0], self.ActionValue[key][1])
            print("samples[key]=",samples[key])
        max_value =  max(samples, key=samples.get)
        print(max_value)
        return max_value    

    def update(self,action,reward):
        """
        Update parameters of posteriors, which are Beta distributions
        """
        a, b = self.ActionValue[action]
        a = a+reward/100
        b = b + 1 - reward/100
        a = 0.001 if a <= 0 else a
        b = 0.001 if b <= 0 else b
        
        self.ActionValue[action] = (a, b)
        
    def train(self):
        for _ in range(20): #Do not change
            self.env.reset()
            while True:
                action =  self.choose_action()
                print("action=",list(action))
                nextstate, reward, done, _ = self.env.evaluateAction(list(action))
                self.update(action,reward)
                if done:
                    break


    def generate(self):
        best_policy = None
        best_reward = -float('Inf'
        self.train()
        best_policy = {state: list(self.choose_action()) for state in range(1,6)}
        best_reward = self.env.evaluatePolicy(best_policy)
        
        print(best_policy, best_reward)
        
        return best_policy, best_reward                    

### Run the EvaluateChallengeSubmission Method with your Agent Class

In [None]:
EvaluateChallengeSubmission(ChallengeSeqDecEnvironment, BanditRPM, "BanditRPM_submission.csv")

self.ActionValue= {(0.0, 1.0): (2, 5), (0.1, 0.9): (2, 5), (0.2, 0.8): (2, 5), (0.30000000000000004, 0.7): (2, 5), (0.4, 0.6): (2, 5), (0.5, 0.5): (2, 5), (0.6000000000000001, 0.3999999999999999): (2, 5), (0.7000000000000001, 0.29999999999999993): (2, 5), (0.8, 0.19999999999999996): (2, 5), (0.9, 0.09999999999999998): (2, 5), (1.0, 0.0): (2, 5)}
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.24791396764610288
key= (0.1, 0.9)
samples[key]= 0.15375726962764952
key= (0.2, 0.8)
samples[key]= 0.14558791911483812
key= (0.30000000000000004, 0.7)
samples[key]= 0.13250792309228285
key= (0.4, 0.6)
samples[key]= 0.15527600050303247
key= (0.5, 0.5)
samples[key]= 0.25692874392645926
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2606856103734172
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.030966668433419407
key= (0.8, 0.19999999999999996)
samples[key]= 0.3784666861583946
key= (0.9, 0.09999999999999998)
samples[key]= 0.29622397671592304
key= (1.0, 0.0)
samples[key]= 0.1

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.25532376516220984
key= (0.1, 0.9)
samples[key]= 0.7009868076312823
key= (0.2, 0.8)
samples[key]= 0.23732929806085618
key= (0.30000000000000004, 0.7)
samples[key]= 0.03783279788668924
key= (0.4, 0.6)
samples[key]= 0.3590705952816539
key= (0.5, 0.5)
samples[key]= 0.1555686538122572
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3312009678090808
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.028366261130286383
key= (0.8, 0.19999999999999996)
samples[key]= 0.6635069378111463
key= (0.9, 0.09999999999999998)
samples[key]= 0.2979398406996261
key= (1.0, 0.0)
samples[key]= 0.30091626382524955
(0.1, 0.9)
action= [0.1, 0.9]
94  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.23817999668284034
key= (0.1, 0.9)
samples[key]= 0.1012743995933566
key= (0.2, 0.8)
samples[key]= 0.147407332925472
key= (0.30000000000000004, 0.7)
samples[key]= 0.11199850216661604
key= (0.4, 0.6)
samples[key]= 0.09513749799547294
key= 

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5644245616925003
key= (0.1, 0.9)
samples[key]= 0.30698314602908056
key= (0.2, 0.8)
samples[key]= 0.7235758987762984
key= (0.30000000000000004, 0.7)
samples[key]= 0.43434229508040256
key= (0.4, 0.6)
samples[key]= 0.2879674314998536
key= (0.5, 0.5)
samples[key]= 0.09341142371825423
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.16934954728824456
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.28183748924125035
key= (0.8, 0.19999999999999996)
samples[key]= 0.14818961534885802
key= (0.9, 0.09999999999999998)
samples[key]= 0.3399723909712852
key= (1.0, 0.0)
samples[key]= 0.3193429384201904
(0.2, 0.8)
action= [0.2, 0.8]
83  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.798978067894717
key= (0.1, 0.9)
samples[key]= 0.43338116529157517
key= (0.2, 0.8)
samples[key]= 0.36054526605761594
key= (0.30000000000000004, 0.7)
samples[key]= 0.2611193916353701
key= (0.4, 0.6)
samples[key]= 0.46198670819134885
key= 

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.42243337400921566
key= (0.1, 0.9)
samples[key]= 0.2721918833086624
key= (0.2, 0.8)
samples[key]= 0.07567980760219126
key= (0.30000000000000004, 0.7)
samples[key]= 0.13653416664824738
key= (0.4, 0.6)
samples[key]= 0.019199930467335116
key= (0.5, 0.5)
samples[key]= 0.26686108592783464
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.062339861387106996
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.3939659133815787
key= (0.8, 0.19999999999999996)
samples[key]= 0.3114348322886676
key= (0.9, 0.09999999999999998)
samples[key]= 0.10342557430170969
key= (1.0, 0.0)
samples[key]= 0.13640025184110327
(0.0, 1.0)
action= [0.0, 1.0]
71  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.40041514135061307
key= (0.1, 0.9)
samples[key]= 0.5812932782043708
key= (0.2, 0.8)
samples[key]= 0.6683576848399027
key= (0.30000000000000004, 0.7)
samples[key]= 0.27721523914713675
key= (0.4, 0.6)
samples[key]= 0.16598904741342982


[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.47730801259946015
key= (0.1, 0.9)
samples[key]= 0.15197793155631933
key= (0.2, 0.8)
samples[key]= 0.09203471130580167
key= (0.30000000000000004, 0.7)
samples[key]= 0.09364093656165244
key= (0.4, 0.6)
samples[key]= 0.17842149391025341
key= (0.5, 0.5)
samples[key]= 0.13249312279974648
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.041409323966943294
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.2169956852189532
key= (0.8, 0.19999999999999996)
samples[key]= 0.5349282602866151
key= (0.9, 0.09999999999999998)
samples[key]= 0.12021697603012817
key= (1.0, 0.0)
samples[key]= 0.15263144012102192
(0.8, 0.19999999999999996)
action= [0.8, 0.19999999999999996]
59  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5504869654050857
key= (0.1, 0.9)
samples[key]= 0.16009389806262636
key= (0.2, 0.8)
samples[key]= 0.2549164268723986
key= (0.30000000000000004, 0.7)
samples[key]= 0.12199418554175856
key= (0.4, 0.6)
sa

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4394171797971293
key= (0.1, 0.9)
samples[key]= 0.2660468682716079
key= (0.2, 0.8)
samples[key]= 0.41456737182588915
key= (0.30000000000000004, 0.7)
samples[key]= 0.15074164691984274
key= (0.4, 0.6)
samples[key]= 0.15434029280859596
key= (0.5, 0.5)
samples[key]= 0.12878155531698773
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.07345858720579851
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.1914224431989606
key= (0.8, 0.19999999999999996)
samples[key]= 0.36853288344973145
key= (0.9, 0.09999999999999998)
samples[key]= 0.30172961269340814
key= (1.0, 0.0)
samples[key]= 0.4835972591251031
(1.0, 0.0)
action= [1.0, 0.0]
47  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3417274991057407
key= (0.1, 0.9)
samples[key]= 0.11144530309396147
key= (0.2, 0.8)
samples[key]= 0.3186364930557621
key= (0.30000000000000004, 0.7)
samples[key]= 0.03698344520179876
key= (0.4, 0.6)
samples[key]= 0.04325704776417518
key

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3552290660655773
key= (0.1, 0.9)
samples[key]= 0.21856468663399428
key= (0.2, 0.8)
samples[key]= 0.2124693467517888
key= (0.30000000000000004, 0.7)
samples[key]= 0.03461788227706402
key= (0.4, 0.6)
samples[key]= 0.4705307567661494
key= (0.5, 0.5)
samples[key]= 0.28851547469530187
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.30788747684763945
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.25851865118767836
key= (0.8, 0.19999999999999996)
samples[key]= 0.35110436821233965
key= (0.9, 0.09999999999999998)
samples[key]= 0.08402859332675358
key= (1.0, 0.0)
samples[key]= 0.43053956315616965
(0.4, 0.6)
action= [0.4, 0.6]
35  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4513187902111079
key= (0.1, 0.9)
samples[key]= 0.13586279486473735
key= (0.2, 0.8)
samples[key]= 0.38239197731912683
key= (0.30000000000000004, 0.7)
samples[key]= 0.12192767971535118
key= (0.4, 0.6)
samples[key]= 0.2690676416842388
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.46415169026231984
key= (0.1, 0.9)
samples[key]= 0.2825128410993654
key= (0.2, 0.8)
samples[key]= 0.2706427577986182
key= (0.30000000000000004, 0.7)
samples[key]= 0.1524760076310406
key= (0.4, 0.6)
samples[key]= 0.3656310403808286
key= (0.5, 0.5)
samples[key]= 0.1228472285371332
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2968298676610723
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.3139403297572255
key= (0.8, 0.19999999999999996)
samples[key]= 0.2321649935147215
key= (0.9, 0.09999999999999998)
samples[key]= 0.22521745416575617
key= (1.0, 0.0)
samples[key]= 0.49935186714147717
(1.0, 0.0)
action= [1.0, 0.0]
23  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4829134683494375
key= (0.1, 0.9)
samples[key]= 0.24908628760280918
key= (0.2, 0.8)
samples[key]= 0.08797154337444621
key= (0.30000000000000004, 0.7)
samples[key]= 0.25927537108117044
key= (0.4, 0.6)
samples[key]= 0.2120903459327147
key= (0

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6331070527761011
key= (0.1, 0.9)
samples[key]= 0.12568870401880639
key= (0.2, 0.8)
samples[key]= 0.47211541272664725
key= (0.30000000000000004, 0.7)
samples[key]= 0.016105422791267865
key= (0.4, 0.6)
samples[key]= 0.20677139854487606
key= (0.5, 0.5)
samples[key]= 0.041924920275015785
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.16729015248956175
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.05475531971202096
key= (0.8, 0.19999999999999996)
samples[key]= 0.4459552327616914
key= (0.9, 0.09999999999999998)
samples[key]= 0.13877835362230187
key= (1.0, 0.0)
samples[key]= 0.5104273918325887
(0.0, 1.0)
action= [0.0, 1.0]
11  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4891649247081691
key= (0.1, 0.9)
samples[key]= 0.265419361181543
key= (0.2, 0.8)
samples[key]= 0.31372661063180496
key= (0.30000000000000004, 0.7)
samples[key]= 0.32251956367520473
key= (0.4, 0.6)
samples[key]= 0.18124625776092987
k

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.1988783969655584
key= (0.1, 0.9)
samples[key]= 0.11922616520358519
key= (0.2, 0.8)
samples[key]= 0.09489136189403027
key= (0.30000000000000004, 0.7)
samples[key]= 0.30406314355845243
key= (0.4, 0.6)
samples[key]= 0.04718670888602371
key= (0.5, 0.5)
samples[key]= 0.36894540556034505
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.8063623472191477
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.12520562471433097
key= (0.8, 0.19999999999999996)
samples[key]= 0.2027337402885081
key= (0.9, 0.09999999999999998)
samples[key]= 0.48748022939250396
key= (1.0, 0.0)
samples[key]= 0.30032183506891924
(0.6000000000000001, 0.3999999999999999)
action= [0.6000000000000001, 0.3999999999999999]
104  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6465692965407677
key= (0.1, 0.9)
samples[key]= 0.49697279644349507
key= (0.2, 0.8)
samples[key]= 0.16808005852216237
key= (0.30000000000000004, 0.7)
samples[key]= 0.29863178

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3060124079114455
key= (0.1, 0.9)
samples[key]= 0.24295444005404268
key= (0.2, 0.8)
samples[key]= 0.19118411174110564
key= (0.30000000000000004, 0.7)
samples[key]= 0.011100466544681728
key= (0.4, 0.6)
samples[key]= 0.08603800984883969
key= (0.5, 0.5)
samples[key]= 0.21122798569432513
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3677021322663326
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.06842980316844474
key= (0.8, 0.19999999999999996)
samples[key]= 0.2060818124560643
key= (0.9, 0.09999999999999998)
samples[key]= 0.269572915821967
key= (1.0, 0.0)
samples[key]= 0.22556603356885907
(0.6000000000000001, 0.3999999999999999)
action= [0.6000000000000001, 0.3999999999999999]
92  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.22521941406747806
key= (0.1, 0.9)
samples[key]= 0.36285805965469764
key= (0.2, 0.8)
samples[key]= 0.37736831211922134
key= (0.30000000000000004, 0.7)
samples[key]= 0.037803962

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4995666129939037
key= (0.1, 0.9)
samples[key]= 0.6723044470516659
key= (0.2, 0.8)
samples[key]= 0.35422969351190053
key= (0.30000000000000004, 0.7)
samples[key]= 0.11499183469197301
key= (0.4, 0.6)
samples[key]= 0.04522400446700674
key= (0.5, 0.5)
samples[key]= 0.3075896413509885
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3266754614504834
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.17250290433800555
key= (0.8, 0.19999999999999996)
samples[key]= 0.48240096435588553
key= (0.9, 0.09999999999999998)
samples[key]= 0.5168676531246735
key= (1.0, 0.0)
samples[key]= 0.3177727798627694
(0.1, 0.9)
action= [0.1, 0.9]
80  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.2807107657454968
key= (0.1, 0.9)
samples[key]= 0.6346560583655255
key= (0.2, 0.8)
samples[key]= 0.4132899864626108
key= (0.30000000000000004, 0.7)
samples[key]= 0.24684936425351264
key= (0.4, 0.6)
samples[key]= 0.05382479799407039
key= (

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.35577457271975904
key= (0.1, 0.9)
samples[key]= 0.37030517318262157
key= (0.2, 0.8)
samples[key]= 0.17689284911781022
key= (0.30000000000000004, 0.7)
samples[key]= 0.0502910809974144
key= (0.4, 0.6)
samples[key]= 0.051930071144005954
key= (0.5, 0.5)
samples[key]= 0.19878644060692385
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3244111009401552
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.07150445475502414
key= (0.8, 0.19999999999999996)
samples[key]= 0.599350830113779
key= (0.9, 0.09999999999999998)
samples[key]= 0.38570464630312384
key= (1.0, 0.0)
samples[key]= 0.5330778719668355
(0.8, 0.19999999999999996)
action= [0.8, 0.19999999999999996]
68  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.473587744766685
key= (0.1, 0.9)
samples[key]= 0.45939307946663316
key= (0.2, 0.8)
samples[key]= 0.04693390840236518
key= (0.30000000000000004, 0.7)
samples[key]= 0.16907988954619144
key= (0.4, 0.6)
sampl

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.37941536714603646
key= (0.1, 0.9)
samples[key]= 0.45679780918923113
key= (0.2, 0.8)
samples[key]= 0.2937317718600088
key= (0.30000000000000004, 0.7)
samples[key]= 0.1293238200315344
key= (0.4, 0.6)
samples[key]= 0.138351753164789
key= (0.5, 0.5)
samples[key]= 0.16838262629984468
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.177956976179801
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.13426356687539287
key= (0.8, 0.19999999999999996)
samples[key]= 0.34365972490355656
key= (0.9, 0.09999999999999998)
samples[key]= 0.24401783488603723
key= (1.0, 0.0)
samples[key]= 0.3012917219790053
(0.1, 0.9)
action= [0.1, 0.9]
56  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.10790672184439522
key= (0.1, 0.9)
samples[key]= 0.5122566121199277
key= (0.2, 0.8)
samples[key]= 0.3613466803335046
key= (0.30000000000000004, 0.7)
samples[key]= 0.009103139181932549
key= (0.4, 0.6)
samples[key]= 0.06829283874533901
key= 

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.22593564206925973
key= (0.1, 0.9)
samples[key]= 0.4058740520529583
key= (0.2, 0.8)
samples[key]= 0.25439884852741723
key= (0.30000000000000004, 0.7)
samples[key]= 0.3921753237756077
key= (0.4, 0.6)
samples[key]= 0.13664515509120992
key= (0.5, 0.5)
samples[key]= 0.43109213329923296
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2023315248763686
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.13429833880552716
key= (0.8, 0.19999999999999996)
samples[key]= 0.34010673764381555
key= (0.9, 0.09999999999999998)
samples[key]= 0.11176600520947266
key= (1.0, 0.0)
samples[key]= 0.4087105167936022
(0.5, 0.5)
action= [0.5, 0.5]
44  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.17472929651293756
key= (0.1, 0.9)
samples[key]= 0.36304829896229973
key= (0.2, 0.8)
samples[key]= 0.04074291106410421
key= (0.30000000000000004, 0.7)
samples[key]= 0.14010371075731326
key= (0.4, 0.6)
samples[key]= 0.2686342086396496
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.33928111815222894
key= (0.1, 0.9)
samples[key]= 0.4812115535070317
key= (0.2, 0.8)
samples[key]= 0.5241653876868712
key= (0.30000000000000004, 0.7)
samples[key]= 0.018714516328985173
key= (0.4, 0.6)
samples[key]= 0.05587187237216509
key= (0.5, 0.5)
samples[key]= 0.3879115484610846
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.1505703517046305
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.14211475343783822
key= (0.8, 0.19999999999999996)
samples[key]= 0.332802469589972
key= (0.9, 0.09999999999999998)
samples[key]= 0.20998021857757881
key= (1.0, 0.0)
samples[key]= 0.6227852131665683
(1.0, 0.0)
action= [1.0, 0.0]
32  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3543896032813052
key= (0.1, 0.9)
samples[key]= 0.3365654419295208
key= (0.2, 0.8)
samples[key]= 0.1597383174269555
key= (0.30000000000000004, 0.7)
samples[key]= 0.17708087452439816
key= (0.4, 0.6)
samples[key]= 0.16450541059837562
key= (

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.47967396809065094
key= (0.1, 0.9)
samples[key]= 0.5154901565509432
key= (0.2, 0.8)
samples[key]= 0.15785247707383465
key= (0.30000000000000004, 0.7)
samples[key]= 0.08512556841789944
key= (0.4, 0.6)
samples[key]= 0.1641094750516721
key= (0.5, 0.5)
samples[key]= 0.19510471015154496
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.05034619681516476
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.056150590776086126
key= (0.8, 0.19999999999999996)
samples[key]= 0.2035496911030602
key= (0.9, 0.09999999999999998)
samples[key]= 0.2419651672304939
key= (1.0, 0.0)
samples[key]= 0.3980343438184971
(0.1, 0.9)
action= [0.1, 0.9]
20  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5355800211361823
key= (0.1, 0.9)
samples[key]= 0.49216115869963184
key= (0.2, 0.8)
samples[key]= 0.182178184480469
key= (0.30000000000000004, 0.7)
samples[key]= 0.04467653697439848
key= (0.4, 0.6)
samples[key]= 0.04405016144308023
key=

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5680938929678823
key= (0.1, 0.9)
samples[key]= 0.4932446856615598
key= (0.2, 0.8)
samples[key]= 0.29531650898603157
key= (0.30000000000000004, 0.7)
samples[key]= 0.017317763063658627
key= (0.4, 0.6)
samples[key]= 0.11398994570501883
key= (0.5, 0.5)
samples[key]= 0.524188447001081
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3968390411713324
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.018683566263086725
key= (0.8, 0.19999999999999996)
samples[key]= 0.4437017383924874
key= (0.9, 0.09999999999999998)
samples[key]= 0.20496132700624736
key= (1.0, 0.0)
samples[key]= 0.4420699364731995
(0.0, 1.0)
action= [0.0, 1.0]
8  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6992749969108634
key= (0.1, 0.9)
samples[key]= 0.36115318467896845
key= (0.2, 0.8)
samples[key]= 0.2548305931538976
key= (0.30000000000000004, 0.7)
samples[key]= 0.5633698170382521
key= (0.4, 0.6)
samples[key]= 0.09826310519668194
key= (

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.2558193906926787
key= (0.1, 0.9)
samples[key]= 0.6534686667667019
key= (0.2, 0.8)
samples[key]= 0.3051548184037727
key= (0.30000000000000004, 0.7)
samples[key]= 0.16122661599472274
key= (0.4, 0.6)
samples[key]= 0.533792283186652
key= (0.5, 0.5)
samples[key]= 0.4900560903470846
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2465591917869225
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.10187561249354005
key= (0.8, 0.19999999999999996)
samples[key]= 0.6522882878963254
key= (0.9, 0.09999999999999998)
samples[key]= 0.028749720757902376
key= (1.0, 0.0)
samples[key]= 0.15977690499221459
(0.1, 0.9)
action= [0.1, 0.9]
102  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.30300784470116265
key= (0.1, 0.9)
samples[key]= 0.10850302147120255
key= (0.2, 0.8)
samples[key]= 0.2992125775491936
key= (0.30000000000000004, 0.7)
samples[key]= 0.49096397057310615
key= (0.4, 0.6)
samples[key]= 0.34991733035186495
key=

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.45071650876890645
key= (0.1, 0.9)
samples[key]= 0.20010446284485886
key= (0.2, 0.8)
samples[key]= 0.0947997606298343
key= (0.30000000000000004, 0.7)
samples[key]= 0.1441020659742248
key= (0.4, 0.6)
samples[key]= 0.2999616521216237
key= (0.5, 0.5)
samples[key]= 0.3396331019738856
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.16929119293049283
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.3515778709397254
key= (0.8, 0.19999999999999996)
samples[key]= 0.22713567142258997
key= (0.9, 0.09999999999999998)
samples[key]= 0.2150874344838279
key= (1.0, 0.0)
samples[key]= 0.39136041071109984
(0.0, 1.0)
action= [0.0, 1.0]
90  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5628224005101568
key= (0.1, 0.9)
samples[key]= 0.06284366682080852
key= (0.2, 0.8)
samples[key]= 0.2038466193036176
key= (0.30000000000000004, 0.7)
samples[key]= 0.1689510579234813
key= (0.4, 0.6)
samples[key]= 0.3283183720409748
key= (0

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.491579359219377
key= (0.1, 0.9)
samples[key]= 0.27970852135375646
key= (0.2, 0.8)
samples[key]= 0.10945986535420979
key= (0.30000000000000004, 0.7)
samples[key]= 0.40269312990639033
key= (0.4, 0.6)
samples[key]= 0.26808743244470085
key= (0.5, 0.5)
samples[key]= 0.14569681185724365
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.11903917250826065
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.22355445610885505
key= (0.8, 0.19999999999999996)
samples[key]= 0.2861575932631263
key= (0.9, 0.09999999999999998)
samples[key]= 0.13600510041482772
key= (1.0, 0.0)
samples[key]= 0.30503820524710273
(0.0, 1.0)
action= [0.0, 1.0]
78  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.321146675538913
key= (0.1, 0.9)
samples[key]= 0.2140018999679975
key= (0.2, 0.8)
samples[key]= 0.1513581055284725
key= (0.30000000000000004, 0.7)
samples[key]= 0.12389181535643778
key= (0.4, 0.6)
samples[key]= 0.11090979864807793
key=

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4749264866285882
key= (0.1, 0.9)
samples[key]= 0.32533453931202017
key= (0.2, 0.8)
samples[key]= 0.09941976470343634
key= (0.30000000000000004, 0.7)
samples[key]= 0.061630945588231324
key= (0.4, 0.6)
samples[key]= 0.2635379516332387
key= (0.5, 0.5)
samples[key]= 0.38935035762325876
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.33401370771726824
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.2670834792227588
key= (0.8, 0.19999999999999996)
samples[key]= 0.12264663591216479
key= (0.9, 0.09999999999999998)
samples[key]= 0.2070809203236735
key= (1.0, 0.0)
samples[key]= 0.2775759124210564
(0.0, 1.0)
action= [0.0, 1.0]
66  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4761966964914629
key= (0.1, 0.9)
samples[key]= 0.12569756248913452
key= (0.2, 0.8)
samples[key]= 0.13002842950057533
key= (0.30000000000000004, 0.7)
samples[key]= 0.08626016413069248
key= (0.4, 0.6)
samples[key]= 0.19620043804952297
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5366707726532183
key= (0.1, 0.9)
samples[key]= 0.1781297921894498
key= (0.2, 0.8)
samples[key]= 0.2515163389115374
key= (0.30000000000000004, 0.7)
samples[key]= 0.5914926930036002
key= (0.4, 0.6)
samples[key]= 0.1379460432640095
key= (0.5, 0.5)
samples[key]= 0.4632906123948703
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.10908286155811542
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.24951037666863693
key= (0.8, 0.19999999999999996)
samples[key]= 0.1719331615318668
key= (0.9, 0.09999999999999998)
samples[key]= 0.1125606147501486
key= (1.0, 0.0)
samples[key]= 0.1867350928179293
(0.30000000000000004, 0.7)
action= [0.30000000000000004, 0.7]
54  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.46562866356099025
key= (0.1, 0.9)
samples[key]= 0.27633243582682016
key= (0.2, 0.8)
samples[key]= 0.17024716878802681
key= (0.30000000000000004, 0.7)
samples[key]= 0.0831704794933086
key= (0.4, 0.6)
samples[k

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6041927814341969
key= (0.1, 0.9)
samples[key]= 0.20018790227679734
key= (0.2, 0.8)
samples[key]= 0.1989933320048327
key= (0.30000000000000004, 0.7)
samples[key]= 0.3000658254837288
key= (0.4, 0.6)
samples[key]= 0.024249971353744165
key= (0.5, 0.5)
samples[key]= 0.42823192689598755
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.07424993587739302
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.26076548122541054
key= (0.8, 0.19999999999999996)
samples[key]= 0.40774640369756043
key= (0.9, 0.09999999999999998)
samples[key]= 0.20980681706980497
key= (1.0, 0.0)
samples[key]= 0.4403640294951896
(0.0, 1.0)
action= [0.0, 1.0]
42  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.43266883462660344
key= (0.1, 0.9)
samples[key]= 0.25091875125867874
key= (0.2, 0.8)
samples[key]= 0.31325056353221625
key= (0.30000000000000004, 0.7)
samples[key]= 0.23996313310696077
key= (0.4, 0.6)
samples[key]= 0.11350189720878072


[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.41221486652182526
key= (0.1, 0.9)
samples[key]= 0.3230689342854227
key= (0.2, 0.8)
samples[key]= 0.27420898679180433
key= (0.30000000000000004, 0.7)
samples[key]= 0.24724538785664524
key= (0.4, 0.6)
samples[key]= 0.11663307864661554
key= (0.5, 0.5)
samples[key]= 0.047203229102594745
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.10740316453398265
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.22918127722555387
key= (0.8, 0.19999999999999996)
samples[key]= 0.27124721448169514
key= (0.9, 0.09999999999999998)
samples[key]= 0.026396001492572693
key= (1.0, 0.0)
samples[key]= 0.5046771680537683
(1.0, 0.0)
action= [1.0, 0.0]
30  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5003462678012351
key= (0.1, 0.9)
samples[key]= 0.19088291688382752
key= (0.2, 0.8)
samples[key]= 0.2050904357830317
key= (0.30000000000000004, 0.7)
samples[key]= 0.16294734769014166
key= (0.4, 0.6)
samples[key]= 0.14981450368573515

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4418416258283576
key= (0.1, 0.9)
samples[key]= 0.3215488696573963
key= (0.2, 0.8)
samples[key]= 0.3149678779037456
key= (0.30000000000000004, 0.7)
samples[key]= 0.212191882967802
key= (0.4, 0.6)
samples[key]= 0.07243866187559948
key= (0.5, 0.5)
samples[key]= 0.35636443956123914
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.23787342939913908
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.15811512518941465
key= (0.8, 0.19999999999999996)
samples[key]= 0.2966325321387569
key= (0.9, 0.09999999999999998)
samples[key]= 0.2009517823456082
key= (1.0, 0.0)
samples[key]= 0.5186452146195935
(1.0, 0.0)
action= [1.0, 0.0]
18  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.39319005831739356
key= (0.1, 0.9)
samples[key]= 0.17206987225560902
key= (0.2, 0.8)
samples[key]= 0.3663775832944179
key= (0.30000000000000004, 0.7)
samples[key]= 0.28252484469444167
key= (0.4, 0.6)
samples[key]= 0.061638693347966074
key= 

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4427603901832295
key= (0.1, 0.9)
samples[key]= 0.0781931617774978
key= (0.2, 0.8)
samples[key]= 0.11595384902532711
key= (0.30000000000000004, 0.7)
samples[key]= 0.038289438561578244
key= (0.4, 0.6)
samples[key]= 0.1768012983037237
key= (0.5, 0.5)
samples[key]= 0.32731632759552415
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.436504365656709
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.26675611846395436
key= (0.8, 0.19999999999999996)
samples[key]= 0.41996209889504044
key= (0.9, 0.09999999999999998)
samples[key]= 0.23502617577303428
key= (1.0, 0.0)
samples[key]= 0.7205278650589741
(1.0, 0.0)
action= [1.0, 0.0]
6  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.592706421522907
key= (0.1, 0.9)
samples[key]= 0.08971188179363053
key= (0.2, 0.8)
samples[key]= 0.47903298579968073
key= (0.30000000000000004, 0.7)
samples[key]= 0.1384493831870443
key= (0.4, 0.6)
samples[key]= 0.40310898865470485
key= (

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5095192425502911
key= (0.1, 0.9)
samples[key]= 0.48186793189232796
key= (0.2, 0.8)
samples[key]= 0.09580676799941412
key= (0.30000000000000004, 0.7)
samples[key]= 0.23343081464158472
key= (0.4, 0.6)
samples[key]= 0.3094719407989576
key= (0.5, 0.5)
samples[key]= 0.26570999282029983
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3027533441371589
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.10797315036410367
key= (0.8, 0.19999999999999996)
samples[key]= 0.2694104983504373
key= (0.9, 0.09999999999999998)
samples[key]= 0.3425387219831751
key= (1.0, 0.0)
samples[key]= 0.4770377879487109
(0.0, 1.0)
action= [0.0, 1.0]
100  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.10728284837358155
key= (0.1, 0.9)
samples[key]= 0.10142336062516522
key= (0.2, 0.8)
samples[key]= 0.41190022421452593
key= (0.30000000000000004, 0.7)
samples[key]= 0.34740888517556834
key= (0.4, 0.6)
samples[key]= 0.16625075467265937
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3323232122248774
key= (0.1, 0.9)
samples[key]= 0.27720614383847364
key= (0.2, 0.8)
samples[key]= 0.1370777786464324
key= (0.30000000000000004, 0.7)
samples[key]= 0.04446236261893876
key= (0.4, 0.6)
samples[key]= 0.23555394405923782
key= (0.5, 0.5)
samples[key]= 0.4026456927062513
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.1647969516016607
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.2764352514153857
key= (0.8, 0.19999999999999996)
samples[key]= 0.3512322416770722
key= (0.9, 0.09999999999999998)
samples[key]= 0.6179028041634878
key= (1.0, 0.0)
samples[key]= 0.0969403287247769
(0.9, 0.09999999999999998)
action= [0.9, 0.09999999999999998]
88  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.321369691089598
key= (0.1, 0.9)
samples[key]= 0.1604814587506442
key= (0.2, 0.8)
samples[key]= 0.3057499687451081
key= (0.30000000000000004, 0.7)
samples[key]= 0.013694675868451854
key= (0.4, 0.6)
samples[ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6601275433476103
key= (0.1, 0.9)
samples[key]= 0.1828170501523853
key= (0.2, 0.8)
samples[key]= 0.2409051934916033
key= (0.30000000000000004, 0.7)
samples[key]= 0.35476418508693625
key= (0.4, 0.6)
samples[key]= 0.39456229615375005
key= (0.5, 0.5)
samples[key]= 0.5313213997315653
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.23528872848134763
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.22493573656964383
key= (0.8, 0.19999999999999996)
samples[key]= 0.060533397356454195
key= (0.9, 0.09999999999999998)
samples[key]= 0.1828905089277951
key= (1.0, 0.0)
samples[key]= 0.06041064960192807
(0.0, 1.0)
action= [0.0, 1.0]
77  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.27817018655440306
key= (0.1, 0.9)
samples[key]= 0.2557301017698955
key= (0.2, 0.8)
samples[key]= 0.10565343428366013
key= (0.30000000000000004, 0.7)
samples[key]= 0.10610921110088911
key= (0.4, 0.6)
samples[key]= 0.11591674283556795
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.40679039279207946
key= (0.1, 0.9)
samples[key]= 0.445049927176036
key= (0.2, 0.8)
samples[key]= 0.23246087523006045
key= (0.30000000000000004, 0.7)
samples[key]= 0.08351336368736167
key= (0.4, 0.6)
samples[key]= 0.05217178433496472
key= (0.5, 0.5)
samples[key]= 0.15105817746820885
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.45061399072359537
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.2381957029062402
key= (0.8, 0.19999999999999996)
samples[key]= 0.7287034487751646
key= (0.9, 0.09999999999999998)
samples[key]= 0.3157032228996098
key= (1.0, 0.0)
samples[key]= 0.7579228730783512
(1.0, 0.0)
action= [1.0, 0.0]
65  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.30689238777585665
key= (0.1, 0.9)
samples[key]= 0.25758285066691733
key= (0.2, 0.8)
samples[key]= 0.07350935348904981
key= (0.30000000000000004, 0.7)
samples[key]= 0.14208469995231662
key= (0.4, 0.6)
samples[key]= 0.24331508256565998
key

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5405811511804364
key= (0.1, 0.9)
samples[key]= 0.5902137181571142
key= (0.2, 0.8)
samples[key]= 0.0916671537793757
key= (0.30000000000000004, 0.7)
samples[key]= 0.02143693112742284
key= (0.4, 0.6)
samples[key]= 0.10857956339250158
key= (0.5, 0.5)
samples[key]= 0.48631515924468266
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.23742357758046376
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.0014882028385916617
key= (0.8, 0.19999999999999996)
samples[key]= 0.0819013841188356
key= (0.9, 0.09999999999999998)
samples[key]= 0.240116990003748
key= (1.0, 0.0)
samples[key]= 0.532475659354611
(0.1, 0.9)
action= [0.1, 0.9]
53  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3282094683070482
key= (0.1, 0.9)
samples[key]= 0.6032168812365348
key= (0.2, 0.8)
samples[key]= 0.03806285436475882
key= (0.30000000000000004, 0.7)
samples[key]= 0.07491464983610596
key= (0.4, 0.6)
samples[key]= 0.018403921134001824
key=

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.47577145513986757
key= (0.1, 0.9)
samples[key]= 0.1994698178426413
key= (0.2, 0.8)
samples[key]= 0.3991778218044594
key= (0.30000000000000004, 0.7)
samples[key]= 0.0353124197087768
key= (0.4, 0.6)
samples[key]= 0.04314129923627294
key= (0.5, 0.5)
samples[key]= 0.26154084460373445
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.12456507704916528
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.10212071963349842
key= (0.8, 0.19999999999999996)
samples[key]= 0.3158487267674179
key= (0.9, 0.09999999999999998)
samples[key]= 0.2089993983949139
key= (1.0, 0.0)
samples[key]= 0.39124860277385964
(0.0, 1.0)
action= [0.0, 1.0]
41  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.34874212101927676
key= (0.1, 0.9)
samples[key]= 0.29283120352329284
key= (0.2, 0.8)
samples[key]= 0.29547527194318196
key= (0.30000000000000004, 0.7)
samples[key]= 0.19261442643087254
key= (0.4, 0.6)
samples[key]= 0.08263016185022382
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4825607594221865
key= (0.1, 0.9)
samples[key]= 0.39140738741962067
key= (0.2, 0.8)
samples[key]= 0.10657669681571183
key= (0.30000000000000004, 0.7)
samples[key]= 0.2002362327637276
key= (0.4, 0.6)
samples[key]= 0.06133419289772079
key= (0.5, 0.5)
samples[key]= 0.09002320877082161
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.18775564375291198
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.02788752751093488
key= (0.8, 0.19999999999999996)
samples[key]= 0.2062539681988543
key= (0.9, 0.09999999999999998)
samples[key]= 0.42503369498969473
key= (1.0, 0.0)
samples[key]= 0.6423924383439182
(1.0, 0.0)
action= [1.0, 0.0]
29  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5100982132598898
key= (0.1, 0.9)
samples[key]= 0.5817397026083164
key= (0.2, 0.8)
samples[key]= 0.1467267749919813
key= (0.30000000000000004, 0.7)
samples[key]= 0.10057788718397959
key= (0.4, 0.6)
samples[key]= 0.17761074881005542
key=

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5461580143641545
key= (0.1, 0.9)
samples[key]= 0.46881684184624367
key= (0.2, 0.8)
samples[key]= 0.1390333389571852
key= (0.30000000000000004, 0.7)
samples[key]= 0.192982929377769
key= (0.4, 0.6)
samples[key]= 0.0941102126855829
key= (0.5, 0.5)
samples[key]= 0.14119922174142838
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.10034952082273671
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.14231260787074537
key= (0.8, 0.19999999999999996)
samples[key]= 0.5190185136883634
key= (0.9, 0.09999999999999998)
samples[key]= 0.22668120715906406
key= (1.0, 0.0)
samples[key]= 0.525330318078671
(0.0, 1.0)
action= [0.0, 1.0]
17  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6267250710445811
key= (0.1, 0.9)
samples[key]= 0.5352549657896575
key= (0.2, 0.8)
samples[key]= 0.13629500010697781
key= (0.30000000000000004, 0.7)
samples[key]= 0.013704346861749421
key= (0.4, 0.6)
samples[key]= 0.1414124515197186
key= (0

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5465045434713891
key= (0.1, 0.9)
samples[key]= 0.547422195032946
key= (0.2, 0.8)
samples[key]= 0.1300123244223334
key= (0.30000000000000004, 0.7)
samples[key]= 0.1351776438391542
key= (0.4, 0.6)
samples[key]= 0.15723793231965288
key= (0.5, 0.5)
samples[key]= 0.20694726726057902
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.27336540599978365
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.13778743695915044
key= (0.8, 0.19999999999999996)
samples[key]= 0.3135341599978691
key= (0.9, 0.09999999999999998)
samples[key]= 0.10280458930391265
key= (1.0, 0.0)
samples[key]= 0.41304713143843214
(0.1, 0.9)
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.601056622244084
key= (0.1, 0.9)
samples[key]= 0.4543620804516803
key= (0.2, 0.8)
samples[key]= 0.20776109445541013
key= (0.30000000000000004, 0.7)
samples[key]= 0.5411482696098106
key= (0.4, 0.6)
samples[key]= 0.17678585446337935
key= (0.5, 0.5)
samples[key]= 0.20113947759384607
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.3440714314808062
key= (0.1, 0.9)
samples[key]= 0.3813289614816885
key= (0.2, 0.8)
samples[key]= 0.1378724196747545
key= (0.30000000000000004, 0.7)
samples[key]= 0.31181882720925885
key= (0.4, 0.6)
samples[key]= 0.13074894721250416
key= (0.5, 0.5)
samples[key]= 0.16202946365820367
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.024659155505764578
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.2603425426896466
key= (0.8, 0.19999999999999996)
samples[key]= 0.14847857947827398
key= (0.9, 0.09999999999999998)
samples[key]= 0.17454619601180912
key= (1.0, 0.0)
samples[key]= 0.038584119641582516
(0.1, 0.9)
action= [0.1, 0.9]
99  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.07664445406844793
key= (0.1, 0.9)
samples[key]= 0.4398455670954507
key= (0.2, 0.8)
samples[key]= 0.2993533839776685
key= (0.30000000000000004, 0.7)
samples[key]= 0.26318145549791183
key= (0.4, 0.6)
samples[key]= 0.16734509312822146
k

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4680776036649899
key= (0.1, 0.9)
samples[key]= 0.09319036668138618
key= (0.2, 0.8)
samples[key]= 0.14618069138373233
key= (0.30000000000000004, 0.7)
samples[key]= 0.3382000244057788
key= (0.4, 0.6)
samples[key]= 0.2512137109411345
key= (0.5, 0.5)
samples[key]= 0.33856509716166433
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.23120527541630603
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.23890606327957825
key= (0.8, 0.19999999999999996)
samples[key]= 0.31520406680250374
key= (0.9, 0.09999999999999998)
samples[key]= 0.16917432236605892
key= (1.0, 0.0)
samples[key]= 0.28443897105935945
(0.0, 1.0)
action= [0.0, 1.0]
87  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.40381475964360347
key= (0.1, 0.9)
samples[key]= 0.4395161471810085
key= (0.2, 0.8)
samples[key]= 0.25670530086269483
key= (0.30000000000000004, 0.7)
samples[key]= 0.2019455519385289
key= (0.4, 0.6)
samples[key]= 0.4680079515723467
key

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5116434240336197
key= (0.1, 0.9)
samples[key]= 0.3658038304449372
key= (0.2, 0.8)
samples[key]= 0.461015618841085
key= (0.30000000000000004, 0.7)
samples[key]= 0.06980478638096607
key= (0.4, 0.6)
samples[key]= 0.17042015737090996
key= (0.5, 0.5)
samples[key]= 0.6021997910871049
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2556553826672273
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.15069019297718103
key= (0.8, 0.19999999999999996)
samples[key]= 0.2935748616341658
key= (0.9, 0.09999999999999998)
samples[key]= 0.21840284486952502
key= (1.0, 0.0)
samples[key]= 0.44797232257772335
(0.5, 0.5)
action= [0.5, 0.5]
75  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4032874331546877
key= (0.1, 0.9)
samples[key]= 0.2545489932893374
key= (0.2, 0.8)
samples[key]= 0.09876914830928706
key= (0.30000000000000004, 0.7)
samples[key]= 0.3360315564336463
key= (0.4, 0.6)
samples[key]= 0.10306345013867904
key= (0

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.6857754393867045
key= (0.1, 0.9)
samples[key]= 0.08926710032245973
key= (0.2, 0.8)
samples[key]= 0.20043177293723205
key= (0.30000000000000004, 0.7)
samples[key]= 0.14548543904888908
key= (0.4, 0.6)
samples[key]= 0.3588548546793407
key= (0.5, 0.5)
samples[key]= 0.057733261512906545
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.10088281662025798
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.00017167143691574206
key= (0.8, 0.19999999999999996)
samples[key]= 0.2325816476411483
key= (0.9, 0.09999999999999998)
samples[key]= 0.23306409953399712
key= (1.0, 0.0)
samples[key]= 0.40743054311052057
(0.0, 1.0)
action= [0.0, 1.0]
63  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5146761486897505
key= (0.1, 0.9)
samples[key]= 0.2222421998594319
key= (0.2, 0.8)
samples[key]= 0.15005021956909353
key= (0.30000000000000004, 0.7)
samples[key]= 0.20372509493268823
key= (0.4, 0.6)
samples[key]= 0.0550889805082260

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.33686803598253257
key= (0.1, 0.9)
samples[key]= 0.35230582099961155
key= (0.2, 0.8)
samples[key]= 0.1692437424855852
key= (0.30000000000000004, 0.7)
samples[key]= 0.09625323902070744
key= (0.4, 0.6)
samples[key]= 0.3764161402224191
key= (0.5, 0.5)
samples[key]= 0.17962629979214415
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.34650421895549194
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.10893718084838248
key= (0.8, 0.19999999999999996)
samples[key]= 0.33439966804422666
key= (0.9, 0.09999999999999998)
samples[key]= 0.2959932762411764
key= (1.0, 0.0)
samples[key]= 0.39763392453537066
(1.0, 0.0)
action= [1.0, 0.0]
51  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4279201349988884
key= (0.1, 0.9)
samples[key]= 0.13247341625016618
key= (0.2, 0.8)
samples[key]= 0.36013267498784696
key= (0.30000000000000004, 0.7)
samples[key]= 0.24902224739832796
key= (0.4, 0.6)
samples[key]= 0.2809902265178757
ke

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.40643660499797224
key= (0.1, 0.9)
samples[key]= 0.35409988033575945
key= (0.2, 0.8)
samples[key]= 0.1855567775841109
key= (0.30000000000000004, 0.7)
samples[key]= 0.051449522749749885
key= (0.4, 0.6)
samples[key]= 0.26109593739384346
key= (0.5, 0.5)
samples[key]= 0.20617605414871587
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.12662026310799307
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.011827319840324741
key= (0.8, 0.19999999999999996)
samples[key]= 0.08808031330924852
key= (0.9, 0.09999999999999998)
samples[key]= 0.1479960985961937
key= (1.0, 0.0)
samples[key]= 0.25847167416454336
(0.0, 1.0)
action= [0.0, 1.0]
40  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4837734832802515
key= (0.1, 0.9)
samples[key]= 0.3329954754173333
key= (0.2, 0.8)
samples[key]= 0.39871148718110544
key= (0.30000000000000004, 0.7)
samples[key]= 0.04510728753054686
key= (0.4, 0.6)
samples[key]= 0.5850096354051013


[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5880435545101307
key= (0.1, 0.9)
samples[key]= 0.22897051830128295
key= (0.2, 0.8)
samples[key]= 0.49373033920808834
key= (0.30000000000000004, 0.7)
samples[key]= 0.08756238683972646
key= (0.4, 0.6)
samples[key]= 0.3155197732399182
key= (0.5, 0.5)
samples[key]= 0.18417762925676043
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3663457839421997
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.1232233792769308
key= (0.8, 0.19999999999999996)
samples[key]= 0.46426550617581813
key= (0.9, 0.09999999999999998)
samples[key]= 0.260675589692913
key= (1.0, 0.0)
samples[key]= 0.397251615795301
(0.0, 1.0)
action= [0.0, 1.0]
28  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5246068311232917
key= (0.1, 0.9)
samples[key]= 0.2247745531463103
key= (0.2, 0.8)
samples[key]= 0.1550882129570719
key= (0.30000000000000004, 0.7)
samples[key]= 0.4073599464791608
key= (0.4, 0.6)
samples[key]= 0.5014504014066244
key= (0.5,

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5336282862042316
key= (0.1, 0.9)
samples[key]= 0.16071492226327605
key= (0.2, 0.8)
samples[key]= 0.2839147249609656
key= (0.30000000000000004, 0.7)
samples[key]= 0.2045262031332251
key= (0.4, 0.6)
samples[key]= 0.27351340186979084
key= (0.5, 0.5)
samples[key]= 0.09655193821676604
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.1425875115998125
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.10167723532717131
key= (0.8, 0.19999999999999996)
samples[key]= 0.5711900191686196
key= (0.9, 0.09999999999999998)
samples[key]= 0.1775893973523441
key= (1.0, 0.0)
samples[key]= 0.603429508155633
(1.0, 0.0)
action= [1.0, 0.0]
16  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.5643809928436506
key= (0.1, 0.9)
samples[key]= 0.21924042507653893
key= (0.2, 0.8)
samples[key]= 0.24535418357509903
key= (0.30000000000000004, 0.7)
samples[key]= 0.1755014574147751
key= (0.4, 0.6)
samples[key]= 0.13000332606602782
key= (0

{1: [1.0, 0.0], 2: [1.0, 0.0], 3: [0.0, 1.0], 4: [0.0, 1.0], 5: [0.9, 0.09999999999999998]} 283.7632501826192
self.ActionValue= {(0.0, 1.0): (2, 5), (0.1, 0.9): (2, 5), (0.2, 0.8): (2, 5), (0.30000000000000004, 0.7): (2, 5), (0.4, 0.6): (2, 5), (0.5, 0.5): (2, 5), (0.6000000000000001, 0.3999999999999999): (2, 5), (0.7000000000000001, 0.29999999999999993): (2, 5), (0.8, 0.19999999999999996): (2, 5), (0.9, 0.09999999999999998): (2, 5), (1.0, 0.0): (2, 5)}
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.03242592769857173
key= (0.1, 0.9)
samples[key]= 0.3248324222493679
key= (0.2, 0.8)
samples[key]= 0.13765880814969036
key= (0.30000000000000004, 0.7)
samples[key]= 0.32171363174780937
key= (0.4, 0.6)
samples[key]= 0.3975553225631364
key= (0.5, 0.5)
samples[key]= 0.2774821107381542
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3275566948285206
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.38447988318590737
key= (0.8, 0.19999999999999996)
samples[key]= 0.4072037214

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.310848018793878
key= (0.1, 0.9)
samples[key]= 0.6285655166266014
key= (0.2, 0.8)
samples[key]= 0.07498619932460762
key= (0.30000000000000004, 0.7)
samples[key]= 0.2181447109943283
key= (0.4, 0.6)
samples[key]= 0.4824640461533589
key= (0.5, 0.5)
samples[key]= 0.23408434506498124
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.3804833538617523
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.39722680122190546
key= (0.8, 0.19999999999999996)
samples[key]= 0.3832288478348979
key= (0.9, 0.09999999999999998)
samples[key]= 0.5696991594606693
key= (1.0, 0.0)
samples[key]= 0.28386679817964805
(0.1, 0.9)
action= [0.1, 0.9]
94  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.2658135650504183
key= (0.1, 0.9)
samples[key]= 0.3243674104973895
key= (0.2, 0.8)
samples[key]= 0.27809346353264136
key= (0.30000000000000004, 0.7)
samples[key]= 0.4676488067893675
key= (0.4, 0.6)
samples[key]= 0.43708942978746196
key= (0.

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.15598034111305492
key= (0.1, 0.9)
samples[key]= 0.2198047870439194
key= (0.2, 0.8)
samples[key]= 0.1881598109487209
key= (0.30000000000000004, 0.7)
samples[key]= 0.3756854478968397
key= (0.4, 0.6)
samples[key]= 0.301252269470176
key= (0.5, 0.5)
samples[key]= 0.25784106043636346
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.2771882780905324
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.21840298212311113
key= (0.8, 0.19999999999999996)
samples[key]= 0.24309600925967947
key= (0.9, 0.09999999999999998)
samples[key]= 0.1730981422049398
key= (1.0, 0.0)
samples[key]= 0.35073551044351964
(0.30000000000000004, 0.7)
action= [0.30000000000000004, 0.7]
82  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.09742764349942909
key= (0.1, 0.9)
samples[key]= 0.34521185078905164
key= (0.2, 0.8)
samples[key]= 0.5628973500680335
key= (0.30000000000000004, 0.7)
samples[key]= 0.13460880174395384
key= (0.4, 0.6)
samples

[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.08074941358549344
key= (0.1, 0.9)
samples[key]= 0.21159164786109708
key= (0.2, 0.8)
samples[key]= 0.32427239853762035
key= (0.30000000000000004, 0.7)
samples[key]= 0.1627583112133874
key= (0.4, 0.6)
samples[key]= 0.08501169252854326
key= (0.5, 0.5)
samples[key]= 0.3583677365631337
key= (0.6000000000000001, 0.3999999999999999)
samples[key]= 0.13626241945724532
key= (0.7000000000000001, 0.29999999999999993)
samples[key]= 0.256397961637779
key= (0.8, 0.19999999999999996)
samples[key]= 0.1948523715283096
key= (0.9, 0.09999999999999998)
samples[key]= 0.31320474999867465
key= (1.0, 0.0)
samples[key]= 0.12070730349330942
(0.5, 0.5)
action= [0.5, 0.5]
70  Evaluations Remaining
[1.0, 0.0]
key= (0.0, 1.0)
samples[key]= 0.4996308427543967
key= (0.1, 0.9)
samples[key]= 0.3367553279422984
key= (0.2, 0.8)
samples[key]= 0.430214744283648
key= (0.30000000000000004, 0.7)
samples[key]= 0.43851629337478504
key= (0.4, 0.6)
samples[key]= 0.16836130454747023
key= (

In [134]:
ActionValue= {(0.0, 1.0): (10, 50), (0.1, 0.9): (10, 50), (0.2, 0.8): (10, 50), (0.30000000000000004, 0.7): (10, 50), (0.4, 0.6): (10, 50), (0.5, 0.5): (10, 50), (0.6000000000000001, 0.3999999999999999): (10, 50), (0.7000000000000001, 0.29999999999999993): (10, 50), (0.8, 0.19999999999999996): (10, 50), (0.9, 0.09999999999999998): (10, 50), (1.0, 0.0): (10, 50)}

In [136]:
for key in ActionValue:
    print("key=",key)
    print("self.ActionValue[key][0]=",ActionValue[key][0])
    print("self.ActionValue[key][1]=",ActionValue[key][1])

key= (0.0, 1.0)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.1, 0.9)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.2, 0.8)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.30000000000000004, 0.7)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.4, 0.6)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.5, 0.5)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.6000000000000001, 0.3999999999999999)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.7000000000000001, 0.29999999999999993)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.8, 0.19999999999999996)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (0.9, 0.09999999999999998)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50
key= (1.0, 0.0)
self.ActionValue[key][0]= 10
self.ActionValue[key][1]= 50


In [137]:
class BanditRPM(object):
    def __init__(self,keys,init):
        self.ActionValue = {}
        for key in keys:
            self.ActionValue[key] = init
    
    def get_reward(self,action,text):
        print("action=",action)
        print("text=",text)
        if any(x in text for x in action):
            return 1
        else:
            return 0
    
    def choose_action(self):
        """
        Use Thompson sampling to choose action. Sample from each posterior and choose the max of the samples.
        """
        samples = {}
        for key in self.ActionValue:
            print("key=",key)
#             print("key=",self.ActionValue[key][0])
#             print("self.ActionValue[key][1]=",self.ActionValue[key][1])
            
            samples[key] = np.random.beta(self.ActionValue[key][0], self.ActionValue[key][1])
#             print("samples[key]=",samples[key])
        max_value =  max(samples, key=samples.get)
        print("max_value=",max_value)
        return max_value

    def update(self,action,reward):
        """
        Update parameters of posteriors, which are Beta distributions
        """
        print("action=",action)
        print("reward=",reward)
        a, b = self.ActionValue[action]
        a = a+reward
        b = b + 1 - reward
        a = 0.0001 if a <= 0 else a
        b = 0.0001 if b <= 0 else b
        print("a=",a)
        print("b=",b)
        self.ActionValue[action] = (a, b)

In [138]:
bandit = BanditRPM([('hillary','clinton'),('donald','trump'),('bernie','sanders')],(1,5))

In [111]:
bandit.ActionValue

{('hillary', 'clinton'): (1, 5),
 ('donald', 'trump'): (1, 5),
 ('bernie', 'sanders'): (1, 5)}

In [112]:
action = bandit.choose_action()
reward= bandit.get_reward(action)
bandit.update(action,reward)

max_value= ('hillary', 'clinton')


TypeError: get_reward() missing 1 required positional argument: 'text'