# Paying Attention

Basic game where the agent has to learn to divide attention between two attackers.  I don't know yet what will signal him to change.

In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

In [2]:
N_EPOCHS = 500
COMPASS_POINTS = 12
STATE_ROUNDS_SINCE_LAST_ATTACK = [0] * COMPASS_POINTS
MAX_MEMORY = 15
HISTORY = []

In [3]:
class Defender():
    def __init__(self, hp=None, attention_points=None):
        self.hp = hp
        self.attention_points = attention_points
        self.attention = [0] * self.attention_points
        self.attention[0] = 1
    
    def divide_attention(self, state):
        self.attention = [0] * self.attention_points
        self.attention[random.randint(a=0, b=self.attention_points-1)] = 1


In [4]:
class Attacker():
    def __init__(self, attack_interval=None, damage=None, pos=None):
        self.attack_interval = attack_interval
        self.damage = damage
        self.pos = pos
        
    def attack(self, epoch):
        if epoch % self.attack_interval == 0:
            return True
        else:
            return False

In [5]:
def melee(attacker, defender):
    d = (attacker.pos - defender.attention.index(1)) % COMPASS_POINTS
    print("\t position: {pos}  attention: {att}  diff: {d}".format(pos=attacker.pos, att=defender.attention.index(1), d=d))
    dam = -round(attacker.damage * d / COMPASS_POINTS)
    return dam

In [6]:
attackers = []
attackers.append(Attacker(attack_interval=3, pos=3, damage=1))
attackers.append(Attacker(attack_interval=7, pos=9, damage=6))

In [7]:
darkBlade = Defender(hp=50, attention_points=COMPASS_POINTS)

In [8]:
for e in range(1, N_EPOCHS):
    dam = 0
    for attacker in attackers:
        if attacker.attack(e):
            #print("{e}: attack!".format(e=e))
            STATE_ROUNDS_SINCE_LAST_ATTACK[attacker.pos] = 0
            dam += melee(attacker, darkBlade)
    darkBlade.hp += dam
    if darkBlade.hp <= 0:
        #print("Darkblade is dead!")
        break
    #print ("Darkblade's hp: {hp}".format(hp=darkBlade.hp))
    STATE_ROUNDS_SINCE_LAST_ATTACK = [min(x+1, MAX_MEMORY) for x in STATE_ROUNDS_SINCE_LAST_ATTACK]
    state = STATE_ROUNDS_SINCE_LAST_ATTACK + darkBlade.attention + [dam]
    HISTORY.append(state)
    darkBlade.divide_attention(state)

	 position: 3  attention: 8  diff: 7
	 position: 3  attention: 1  diff: 2
	 position: 9  attention: 4  diff: 5
	 position: 3  attention: 3  diff: 0
	 position: 3  attention: 11  diff: 4
	 position: 9  attention: 7  diff: 2
	 position: 3  attention: 3  diff: 0
	 position: 3  attention: 9  diff: 6
	 position: 3  attention: 5  diff: 10
	 position: 9  attention: 5  diff: 4
	 position: 3  attention: 6  diff: 9
	 position: 3  attention: 1  diff: 2
	 position: 9  attention: 11  diff: 10
	 position: 3  attention: 9  diff: 6
	 position: 3  attention: 4  diff: 11
	 position: 9  attention: 10  diff: 11
	 position: 3  attention: 11  diff: 4
	 position: 3  attention: 10  diff: 5
	 position: 3  attention: 2  diff: 1
	 position: 9  attention: 2  diff: 7
	 position: 3  attention: 0  diff: 3
	 position: 3  attention: 3  diff: 0
	 position: 9  attention: 10  diff: 11
	 position: 3  attention: 6  diff: 9
	 position: 3  attention: 5  diff: 10
	 position: 9  attention: 5  diff: 4
	 position: 3  attention: 

In [9]:
STATE_ROUNDS_SINCE_LAST_ATTACK

[15, 15, 15, 2, 15, 15, 15, 15, 15, 0, 15, 15]

In [10]:
x = [1, 0]
y = [3,2]

In [11]:
z = x + y

In [12]:
z

[1, 0, 3, 2]

In [13]:
clf = MLPRegressor(alpha=0.001, hidden_layer_sizes = (10,), max_iter = 100000, 
                 activation = 'logistic', verbose = 'False', learning_rate = 'adaptive')
n = len(STATE_ROUNDS_SINCE_LAST_ATTACK) + COMPASS_POINTS
y = [x[n] for x in HISTORY]
X = [x[0:n] for x in HISTORY]
a = clf.fit(X, y)

Iteration 1, loss = 1.12147478
Iteration 2, loss = 1.10964303
Iteration 3, loss = 1.09895998
Iteration 4, loss = 1.08935634
Iteration 5, loss = 1.08075084
Iteration 6, loss = 1.07306137
Iteration 7, loss = 1.06620916
Iteration 8, loss = 1.06011987
Iteration 9, loss = 1.05472458
Iteration 10, loss = 1.04996117
Iteration 11, loss = 1.04577334
Iteration 12, loss = 1.04210790
Iteration 13, loss = 1.03891159
Iteration 14, loss = 1.03612858
Iteration 15, loss = 1.03369923
Iteration 16, loss = 1.03156062
Iteration 17, loss = 1.02964840
Iteration 18, loss = 1.02789935
Iteration 19, loss = 1.02625357
Iteration 20, loss = 1.02465594
Iteration 21, loss = 1.02305679
Iteration 22, loss = 1.02141180
Iteration 23, loss = 1.01968153
Iteration 24, loss = 1.01783075
Iteration 25, loss = 1.01582783
Iteration 26, loss = 1.01364446
Iteration 27, loss = 1.01125578
Iteration 28, loss = 1.00864108
Iteration 29, loss = 1.00578505
Iteration 30, loss = 1.00267948
Iteration 31, loss = 0.99932536
Iteration 32, los

Iteration 296, loss = 0.66482215
Iteration 297, loss = 0.66381676
Iteration 298, loss = 0.66281297
Iteration 299, loss = 0.66181077
Iteration 300, loss = 0.66081015
Iteration 301, loss = 0.65981112
Iteration 302, loss = 0.65881365
Iteration 303, loss = 0.65781776
Iteration 304, loss = 0.65682344
Iteration 305, loss = 0.65583067
Iteration 306, loss = 0.65483946
Iteration 307, loss = 0.65384981
Iteration 308, loss = 0.65286170
Iteration 309, loss = 0.65187514
Iteration 310, loss = 0.65089011
Iteration 311, loss = 0.64990662
Iteration 312, loss = 0.64892465
Iteration 313, loss = 0.64794421
Iteration 314, loss = 0.64696529
Iteration 315, loss = 0.64598789
Iteration 316, loss = 0.64501199
Iteration 317, loss = 0.64403760
Iteration 318, loss = 0.64306471
Iteration 319, loss = 0.64209332
Iteration 320, loss = 0.64112342
Iteration 321, loss = 0.64015501
Iteration 322, loss = 0.63918808
Iteration 323, loss = 0.63822262
Iteration 324, loss = 0.63725864
Iteration 325, loss = 0.63629613
Iteration 

Iteration 630, loss = 0.39912573
Iteration 631, loss = 0.39851763
Iteration 632, loss = 0.39791063
Iteration 633, loss = 0.39730471
Iteration 634, loss = 0.39669989
Iteration 635, loss = 0.39609616
Iteration 636, loss = 0.39549352
Iteration 637, loss = 0.39489197
Iteration 638, loss = 0.39429151
Iteration 639, loss = 0.39369214
Iteration 640, loss = 0.39309386
Iteration 641, loss = 0.39249668
Iteration 642, loss = 0.39190058
Iteration 643, loss = 0.39130558
Iteration 644, loss = 0.39071167
Iteration 645, loss = 0.39011885
Iteration 646, loss = 0.38952711
Iteration 647, loss = 0.38893647
Iteration 648, loss = 0.38834692
Iteration 649, loss = 0.38775846
Iteration 650, loss = 0.38717109
Iteration 651, loss = 0.38658481
Iteration 652, loss = 0.38599962
Iteration 653, loss = 0.38541552
Iteration 654, loss = 0.38483251
Iteration 655, loss = 0.38425060
Iteration 656, loss = 0.38366982
Iteration 657, loss = 0.38309023
Iteration 658, loss = 0.38251212
Iteration 659, loss = 0.38193632
Iteration 

Iteration 948, loss = 0.25732904
Iteration 949, loss = 0.25702511
Iteration 950, loss = 0.25672188
Iteration 951, loss = 0.25641935
Iteration 952, loss = 0.25611753
Iteration 953, loss = 0.25581642
Iteration 954, loss = 0.25551600
Iteration 955, loss = 0.25521628
Iteration 956, loss = 0.25491726
Iteration 957, loss = 0.25461894
Iteration 958, loss = 0.25432131
Iteration 959, loss = 0.25402437
Iteration 960, loss = 0.25372812
Iteration 961, loss = 0.25343255
Iteration 962, loss = 0.25313767
Iteration 963, loss = 0.25284348
Iteration 964, loss = 0.25254997
Iteration 965, loss = 0.25225714
Iteration 966, loss = 0.25196498
Iteration 967, loss = 0.25167351
Iteration 968, loss = 0.25138270
Iteration 969, loss = 0.25109257
Iteration 970, loss = 0.25080311
Iteration 971, loss = 0.25051432
Iteration 972, loss = 0.25022620
Iteration 973, loss = 0.24993874
Iteration 974, loss = 0.24965194
Iteration 975, loss = 0.24936580
Iteration 976, loss = 0.24908033
Iteration 977, loss = 0.24879551
Iteration 

Iteration 1230, loss = 0.19329302
Iteration 1231, loss = 0.19312343
Iteration 1232, loss = 0.19295412
Iteration 1233, loss = 0.19278510
Iteration 1234, loss = 0.19261637
Iteration 1235, loss = 0.19244792
Iteration 1236, loss = 0.19227975
Iteration 1237, loss = 0.19211187
Iteration 1238, loss = 0.19194427
Iteration 1239, loss = 0.19177695
Iteration 1240, loss = 0.19160990
Iteration 1241, loss = 0.19144314
Iteration 1242, loss = 0.19127666
Iteration 1243, loss = 0.19111045
Iteration 1244, loss = 0.19094452
Iteration 1245, loss = 0.19077886
Iteration 1246, loss = 0.19061347
Iteration 1247, loss = 0.19044836
Iteration 1248, loss = 0.19028352
Iteration 1249, loss = 0.19011895
Iteration 1250, loss = 0.18995465
Iteration 1251, loss = 0.18979062
Iteration 1252, loss = 0.18962686
Iteration 1253, loss = 0.18946336
Iteration 1254, loss = 0.18930013
Iteration 1255, loss = 0.18913717
Iteration 1256, loss = 0.18897447
Iteration 1257, loss = 0.18881203
Iteration 1258, loss = 0.18864986
Iteration 1259

Iteration 1509, loss = 0.15414421
Iteration 1510, loss = 0.15402472
Iteration 1511, loss = 0.15390521
Iteration 1512, loss = 0.15378587
Iteration 1513, loss = 0.15366676
Iteration 1514, loss = 0.15354772
Iteration 1515, loss = 0.15342868
Iteration 1516, loss = 0.15330973
Iteration 1517, loss = 0.15319095
Iteration 1518, loss = 0.15307231
Iteration 1519, loss = 0.15295372
Iteration 1520, loss = 0.15283518
Iteration 1521, loss = 0.15271676
Iteration 1522, loss = 0.15259849
Iteration 1523, loss = 0.15248031
Iteration 1524, loss = 0.15236219
Iteration 1525, loss = 0.15224416
Iteration 1526, loss = 0.15212624
Iteration 1527, loss = 0.15200845
Iteration 1528, loss = 0.15189074
Iteration 1529, loss = 0.15177310
Iteration 1530, loss = 0.15165555
Iteration 1531, loss = 0.15153812
Iteration 1532, loss = 0.15142079
Iteration 1533, loss = 0.15130355
Iteration 1534, loss = 0.15118639
Iteration 1535, loss = 0.15106932
Iteration 1536, loss = 0.15095236
Iteration 1537, loss = 0.15083550
Iteration 1538

In [14]:
y = [x[len(STATE_ROUNDS_SINCE_LAST_ATTACK) + COMPASS_POINTS] for x in HISTORY]

In [15]:
for i in range(COMPASS_POINTS):
    a = [0] * COMPASS_POINTS
    a[i] = 1
    state = STATE_ROUNDS_SINCE_LAST_ATTACK + a
    print("attention: {a}  prediction {p}".format(a=i, p = clf.predict([state])))

attention: 0  prediction [-5.13733747]
attention: 1  prediction [-5.93883932]
attention: 2  prediction [-5.88876848]
attention: 3  prediction [-4.36960319]
attention: 4  prediction [-5.51765073]
attention: 5  prediction [-5.11125976]
attention: 6  prediction [-6.15712182]
attention: 7  prediction [-4.27681281]
attention: 8  prediction [-5.1152085]
attention: 9  prediction [-4.36865458]
attention: 10  prediction [-7.14998333]
attention: 11  prediction [-6.08883032]


In [16]:
STATE_ROUNDS_SINCE_LAST_ATTACK

[15, 15, 15, 2, 15, 15, 15, 15, 15, 0, 15, 15]

In [17]:
darkBlade.hp

0

In [18]:
e

77