# Paying Attention

Basic game where the agent has to learn to divide attention between two attackers.  I don't know yet what will signal him to change.

In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

In [2]:
N_EPOCHS = 500
COMPASS_POINTS = 12
STATE_ROUNDS_SINCE_LAST_ATTACK = [0] * COMPASS_POINTS
MAX_MEMORY = 15
HISTORY = []

In [3]:
class Defender():
    def __init__(self, hp=None, attention_points=None):
        self.hp = hp
        self.attention_points = attention_points
        self.attention = [0] * self.attention_points
        self.attention[0] = 1
    
    def divide_attention(self, state):
        self.attention = [0] * self.attention_points
        self.attention[random.randint(a=0, b=self.attention_points-1)] = 1


In [4]:
class Attacker():
    def __init__(self, attack_interval=None, damage=None, pos=None):
        self.attack_interval = attack_interval
        self.damage = damage
        self.pos = pos
        
    def attack(self, epoch):
        if epoch % self.attack_interval == 0:
            return True
        else:
            return False

In [5]:
def melee(attacker, defender):
    d = (attacker.pos - defender.attention.index(1)) % COMPASS_POINTS
    print("\t position: {pos}  attention: {att}  diff: {d}".format(pos=attacker.pos, att=defender.attention.index(1), d=d))
    dam = -round(attacker.damage * d / COMPASS_POINTS)
    return dam

In [6]:
attackers = []
attackers.append(Attacker(attack_interval=3, pos=3, damage=1))
attackers.append(Attacker(attack_interval=7, pos=9, damage=6))

In [7]:
darkBlade = Defender(hp=50, attention_points=COMPASS_POINTS)

In [8]:
for e in range(1, N_EPOCHS):
    dam = 0
    for attacker in attackers:
        if attacker.attack(e):
            #print("{e}: attack!".format(e=e))
            STATE_ROUNDS_SINCE_LAST_ATTACK[attacker.pos] = 0
            dam += melee(attacker, darkBlade)
    darkBlade.hp += dam
    if darkBlade.hp <= 0:
        #print("Darkblade is dead!")
        break
    #print ("Darkblade's hp: {hp}".format(hp=darkBlade.hp))
    STATE_ROUNDS_SINCE_LAST_ATTACK = [min(x+1, MAX_MEMORY) for x in STATE_ROUNDS_SINCE_LAST_ATTACK]
    state = STATE_ROUNDS_SINCE_LAST_ATTACK + darkBlade.attention + [dam]
    HISTORY.append(state)
    darkBlade.divide_attention(state)

	 position: 3  attention: 11  diff: 4
	 position: 3  attention: 10  diff: 5
	 position: 9  attention: 11  diff: 10
	 position: 3  attention: 11  diff: 4
	 position: 3  attention: 1  diff: 2
	 position: 9  attention: 9  diff: 0
	 position: 3  attention: 8  diff: 7
	 position: 3  attention: 4  diff: 11
	 position: 3  attention: 6  diff: 9
	 position: 9  attention: 6  diff: 3
	 position: 3  attention: 7  diff: 8
	 position: 3  attention: 4  diff: 11
	 position: 9  attention: 1  diff: 8
	 position: 3  attention: 6  diff: 9
	 position: 3  attention: 3  diff: 0
	 position: 9  attention: 0  diff: 9
	 position: 3  attention: 5  diff: 10
	 position: 3  attention: 7  diff: 8
	 position: 3  attention: 9  diff: 6
	 position: 9  attention: 9  diff: 0
	 position: 3  attention: 11  diff: 4
	 position: 3  attention: 2  diff: 1
	 position: 9  attention: 2  diff: 7
	 position: 3  attention: 6  diff: 9
	 position: 3  attention: 10  diff: 5
	 position: 9  attention: 1  diff: 8
	 position: 3  attention: 8 

In [9]:
STATE_ROUNDS_SINCE_LAST_ATTACK

[15, 15, 15, 1, 15, 15, 15, 15, 15, 0, 15, 15]

In [10]:
x = [1, 0]
y = [3,2]

In [11]:
z = x + y

In [12]:
z

[1, 0, 3, 2]

In [13]:
clf = MLPRegressor(alpha=0.001, hidden_layer_sizes = (10,), max_iter = 100000, 
                 activation = 'logistic', verbose = 'False', learning_rate = 'adaptive')
n = len(STATE_ROUNDS_SINCE_LAST_ATTACK) + COMPASS_POINTS
y = [x[n] for x in HISTORY]
X = [x[0:n] for x in HISTORY]
a = clf.fit(X, y)

Iteration 1, loss = 1.82704684
Iteration 2, loss = 1.79139935
Iteration 3, loss = 1.75682675
Iteration 4, loss = 1.72335463
Iteration 5, loss = 1.69096196
Iteration 6, loss = 1.65958612
Iteration 7, loss = 1.62913086
Iteration 8, loss = 1.59947592
Iteration 9, loss = 1.57048755
Iteration 10, loss = 1.54202875
Iteration 11, loss = 1.51396843
Iteration 12, loss = 1.48618911
Iteration 13, loss = 1.45859292
Iteration 14, loss = 1.43110592
Iteration 15, loss = 1.40368109
Iteration 16, loss = 1.37630017
Iteration 17, loss = 1.34897478
Iteration 18, loss = 1.32174683
Iteration 19, loss = 1.29468847
Iteration 20, loss = 1.26790098
Iteration 21, loss = 1.24151232
Iteration 22, loss = 1.21567255
Iteration 23, loss = 1.19054640
Iteration 24, loss = 1.16630303
Iteration 25, loss = 1.14310328
Iteration 26, loss = 1.12108583
Iteration 27, loss = 1.10035435
Iteration 28, loss = 1.08096774
Iteration 29, loss = 1.06293511
Iteration 30, loss = 1.04621624
Iteration 31, loss = 1.03072682
Iteration 32, los

Iteration 303, loss = 0.60409710
Iteration 304, loss = 0.60375034
Iteration 305, loss = 0.60340359
Iteration 306, loss = 0.60305699
Iteration 307, loss = 0.60271069
Iteration 308, loss = 0.60236489
Iteration 309, loss = 0.60201973
Iteration 310, loss = 0.60167539
Iteration 311, loss = 0.60133202
Iteration 312, loss = 0.60098973
Iteration 313, loss = 0.60064862
Iteration 314, loss = 0.60030876
Iteration 315, loss = 0.59997020
Iteration 316, loss = 0.59963292
Iteration 317, loss = 0.59929692
Iteration 318, loss = 0.59896215
Iteration 319, loss = 0.59862852
Iteration 320, loss = 0.59829593
Iteration 321, loss = 0.59796428
Iteration 322, loss = 0.59763343
Iteration 323, loss = 0.59730322
Iteration 324, loss = 0.59697346
Iteration 325, loss = 0.59664396
Iteration 326, loss = 0.59631445
Iteration 327, loss = 0.59598461
Iteration 328, loss = 0.59565402
Iteration 329, loss = 0.59532208
Iteration 330, loss = 0.59498788
Iteration 331, loss = 0.59465001
Iteration 332, loss = 0.59430613
Iteration 

Iteration 566, loss = 0.49503412
Iteration 567, loss = 0.49447050
Iteration 568, loss = 0.49391350
Iteration 569, loss = 0.49335930
Iteration 570, loss = 0.49280181
Iteration 571, loss = 0.49223819
Iteration 572, loss = 0.49167085
Iteration 573, loss = 0.49110471
Iteration 574, loss = 0.49054263
Iteration 575, loss = 0.48998309
Iteration 576, loss = 0.48942214
Iteration 577, loss = 0.48885744
Iteration 578, loss = 0.48829026
Iteration 579, loss = 0.48772370
Iteration 580, loss = 0.48715948
Iteration 581, loss = 0.48659641
Iteration 582, loss = 0.48603205
Iteration 583, loss = 0.48546536
Iteration 584, loss = 0.48489754
Iteration 585, loss = 0.48433059
Iteration 586, loss = 0.48376511
Iteration 587, loss = 0.48319986
Iteration 588, loss = 0.48263343
Iteration 589, loss = 0.48206578
Iteration 590, loss = 0.48149813
Iteration 591, loss = 0.48093141
Iteration 592, loss = 0.48036532
Iteration 593, loss = 0.47979884
Iteration 594, loss = 0.47923152
Iteration 595, loss = 0.47866395
Iteration 

Iteration 824, loss = 0.35871603
Iteration 825, loss = 0.35825435
Iteration 826, loss = 0.35779330
Iteration 827, loss = 0.35733287
Iteration 828, loss = 0.35687306
Iteration 829, loss = 0.35641388
Iteration 830, loss = 0.35595532
Iteration 831, loss = 0.35549738
Iteration 832, loss = 0.35504007
Iteration 833, loss = 0.35458339
Iteration 834, loss = 0.35412733
Iteration 835, loss = 0.35367189
Iteration 836, loss = 0.35321708
Iteration 837, loss = 0.35276290
Iteration 838, loss = 0.35230934
Iteration 839, loss = 0.35185640
Iteration 840, loss = 0.35140409
Iteration 841, loss = 0.35095240
Iteration 842, loss = 0.35050134
Iteration 843, loss = 0.35005090
Iteration 844, loss = 0.34960109
Iteration 845, loss = 0.34915190
Iteration 846, loss = 0.34870333
Iteration 847, loss = 0.34825539
Iteration 848, loss = 0.34780808
Iteration 849, loss = 0.34736138
Iteration 850, loss = 0.34691531
Iteration 851, loss = 0.34646987
Iteration 852, loss = 0.34602505
Iteration 853, loss = 0.34558085
Iteration 

Iteration 1088, loss = 0.25743152
Iteration 1089, loss = 0.25711965
Iteration 1090, loss = 0.25680825
Iteration 1091, loss = 0.25649734
Iteration 1092, loss = 0.25618690
Iteration 1093, loss = 0.25587695
Iteration 1094, loss = 0.25556747
Iteration 1095, loss = 0.25525847
Iteration 1096, loss = 0.25494995
Iteration 1097, loss = 0.25464191
Iteration 1098, loss = 0.25433434
Iteration 1099, loss = 0.25402725
Iteration 1100, loss = 0.25372063
Iteration 1101, loss = 0.25341448
Iteration 1102, loss = 0.25310881
Iteration 1103, loss = 0.25280361
Iteration 1104, loss = 0.25249889
Iteration 1105, loss = 0.25219463
Iteration 1106, loss = 0.25189085
Iteration 1107, loss = 0.25158753
Iteration 1108, loss = 0.25128469
Iteration 1109, loss = 0.25098231
Iteration 1110, loss = 0.25068040
Iteration 1111, loss = 0.25037896
Iteration 1112, loss = 0.25007798
Iteration 1113, loss = 0.24977747
Iteration 1114, loss = 0.24947742
Iteration 1115, loss = 0.24917784
Iteration 1116, loss = 0.24887873
Iteration 1117

Iteration 1351, loss = 0.19008722
Iteration 1352, loss = 0.18988071
Iteration 1353, loss = 0.18967445
Iteration 1354, loss = 0.18946855
Iteration 1355, loss = 0.18926300
Iteration 1356, loss = 0.18905772
Iteration 1357, loss = 0.18885281
Iteration 1358, loss = 0.18864822
Iteration 1359, loss = 0.18844393
Iteration 1360, loss = 0.18824000
Iteration 1361, loss = 0.18803638
Iteration 1362, loss = 0.18783307
Iteration 1363, loss = 0.18763011
Iteration 1364, loss = 0.18742745
Iteration 1365, loss = 0.18722512
Iteration 1366, loss = 0.18702312
Iteration 1367, loss = 0.18682143
Iteration 1368, loss = 0.18662006
Iteration 1369, loss = 0.18641902
Iteration 1370, loss = 0.18621829
Iteration 1371, loss = 0.18601788
Iteration 1372, loss = 0.18581780
Iteration 1373, loss = 0.18561803
Iteration 1374, loss = 0.18541858
Iteration 1375, loss = 0.18521945
Iteration 1376, loss = 0.18502063
Iteration 1377, loss = 0.18482213
Iteration 1378, loss = 0.18462395
Iteration 1379, loss = 0.18442608
Iteration 1380

Iteration 1614, loss = 0.14575933
Iteration 1615, loss = 0.14562446
Iteration 1616, loss = 0.14548982
Iteration 1617, loss = 0.14535540
Iteration 1618, loss = 0.14522120
Iteration 1619, loss = 0.14508723
Iteration 1620, loss = 0.14495347
Iteration 1621, loss = 0.14481994
Iteration 1622, loss = 0.14468662
Iteration 1623, loss = 0.14455353
Iteration 1624, loss = 0.14442065
Iteration 1625, loss = 0.14428800
Iteration 1626, loss = 0.14415556
Iteration 1627, loss = 0.14402334
Iteration 1628, loss = 0.14389134
Iteration 1629, loss = 0.14375956
Iteration 1630, loss = 0.14362800
Iteration 1631, loss = 0.14349665
Iteration 1632, loss = 0.14336552
Iteration 1633, loss = 0.14323461
Iteration 1634, loss = 0.14310391
Iteration 1635, loss = 0.14297343
Iteration 1636, loss = 0.14284316
Iteration 1637, loss = 0.14271311
Iteration 1638, loss = 0.14258327
Iteration 1639, loss = 0.14245365
Iteration 1640, loss = 0.14232425
Iteration 1641, loss = 0.14219505
Iteration 1642, loss = 0.14206607
Iteration 1643

In [14]:
y = [x[len(STATE_ROUNDS_SINCE_LAST_ATTACK) + COMPASS_POINTS] for x in HISTORY]

In [15]:
for i in range(COMPASS_POINTS):
    a = [0] * COMPASS_POINTS
    a[i] = 1
    state = STATE_ROUNDS_SINCE_LAST_ATTACK + a
    print("attention: {a}  prediction {p}".format(a=i, p = clf.predict([state])))

attention: 0  prediction [-4.0661326]
attention: 1  prediction [-4.2180661]
attention: 2  prediction [-4.31612528]
attention: 3  prediction [-3.76148466]
attention: 4  prediction [-2.99204681]
attention: 5  prediction [-4.07596106]
attention: 6  prediction [-4.10167376]
attention: 7  prediction [-3.86499738]
attention: 8  prediction [-3.99155304]
attention: 9  prediction [-1.63032703]
attention: 10  prediction [-2.28470027]
attention: 11  prediction [-4.19685821]


In [16]:
STATE_ROUNDS_SINCE_LAST_ATTACK

[15, 15, 15, 1, 15, 15, 15, 15, 15, 0, 15, 15]

In [17]:
darkBlade.hp

0

In [18]:
e

91