# Linear TD($\lambda$) Agent
As a first-pass at constructing an agent to play Connect 4 in the Kaggle [ConnectX tournament](https://www.kaggle.com/c/connectx), I'll construct an agent that acts greedily via a linear value function that is approximated using coarse coding and the TD($\lambda$) algorithm.

In [1]:
from linear_TD_agent import TDAgent
from RL_utils import train, watch_play, evaluate
from eval_agents import BaseAgent, StepPlay
import numpy as np

The following weights were learned after 20000 self play episodes.

In [43]:
w1 = np.copy(agent1.w)
w2 = np.copy(agent2.w)
w3 = np.copy(agent3.w)

In [44]:
agent1 = TDAgent()
agent1.agent_init({"w": w1})

agent2 = TDAgent()
agent2.agent_init({"w": w2})

agent3 = TDAgent()
agent3.agent_init({"w" : w3})

players = [agent1, agent2, agent3]

record = train(10000, players)

In [45]:
print(len(record))
print(np.average( np.asarray(record) == 1 ) )

10000
0.4925


In [58]:
player1 = BaseAgent()
player1.agent_init()

players = [agent3, player1]

record = evaluate(1000, players)
#watch_play([player1, agent1])

In [59]:
print(np.average( np.asarray(record) == 1 ) )

0.618


In [None]:
player1 = StepPlay()
player1.agent_init()

In [62]:
watch_play([player1, agent3])

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 

In [46]:
print(agent1.w)

[ 15.44970335 -31.53553972  15.27800159 -10.93214797   4.10162943
  23.39855714   3.4437143  -12.14129155 -16.95243594  -0.39253215
 -18.61573168  -5.96644278  -3.98780021 -14.17828489  -9.59736143
   0.617924     3.73867363   6.81377764  28.11889061  21.87349724
  14.59843284  11.64023825   7.55224071  -2.87048637   3.03705224
  -7.79956973  -6.58383401  -9.80595944 -10.47479278   2.39522924
  14.16346102  -0.63496999  -0.5361538  -21.23184798 -18.41897942
 -11.93968669 -12.75197111  27.120184   -22.98875034  -4.36447423
 -11.30395938 -16.23474845 -40.68682543 -24.31526179  31.85029166
  32.20238896   6.68429874  17.58883697  30.89223075   6.70380795
  22.70221048  15.25270103  17.71352982  12.54567356   9.77847077
   6.21129812  13.73247424  30.90480402  21.95236212  30.2008981
  38.84591569  16.33192405  14.21846185  23.32952289   4.55427879
   9.30237445   8.12405102  -5.09097744 -33.03013604 -22.24421758
 -12.14657445  10.37902607  -0.67244867   7.78618825]


In [47]:
print(agent2.w)

[-2.69940571e+00 -5.26332493e+01 -2.23994427e+00 -5.63525122e+01
 -4.38903590e+01  7.24649144e+00  2.98255753e+00  1.03293075e+00
  1.39255013e+00 -2.49293265e+01 -1.89330899e+01  1.64095775e+01
  3.67154545e+01 -8.95106752e+00 -6.19786648e+00 -3.81744605e+00
  1.41448929e+01  2.42091656e+01  1.48894072e+01  7.59093841e+00
  5.62246242e+00  7.33293348e+00 -4.31504715e+00 -1.32928384e+00
  5.25476556e+00 -3.67573583e+00 -1.15640861e+01 -1.87107399e+01
 -9.05192420e-03 -4.32721462e-02 -1.19011185e+00 -2.15476320e+00
  3.10638883e+00 -2.94796921e+00 -7.42629848e+00 -1.23635981e+01
 -1.46627380e+01  4.34471442e+01 -8.50474412e+00  1.64208200e+01
 -3.07376181e+00  2.31436506e+01 -2.55139770e+01  2.66550448e+01
  1.83159099e+01  2.89032321e+00 -3.17959563e+00  2.38191257e+01
  6.66189858e+00 -1.09268451e+01  1.07564220e+00  2.92079111e+00
 -2.65722654e+00 -1.85881572e+01  9.10347722e+00  2.09369962e+01
  3.71844556e+01  8.46666962e+00  1.10258250e+01  1.18668516e+00
  1.02143392e+01  5.29832

In [48]:
print(agent3.w)

[ 43.65177503 -11.03677002  32.55963632   4.01264105  14.36752159
  -1.08952691  23.76485244  -3.22370421 -11.62172698 -28.81629329
 -19.19900897 -22.40257057 -33.93851056  -6.77716978 -14.74672506
  -5.78576781 -15.76611588   7.689347    -8.1581809    1.02286712
 -13.6151467  -13.9286816  -17.96095651   5.36022741  -2.57284175
   6.37656909   2.43519259  17.43404149   0.38737591   4.73757696
   4.97085166  27.41411238  17.05858519  17.97930601  -3.56365139
  -3.07979985   2.34831277  32.15385111  -9.20896251   6.5967602
 -17.08379333 -19.77475497 -43.84508699  -8.10400142  28.8025441
  34.50221784  30.09912392  26.90891185  22.08347814   1.42689675
  10.60321682  17.20248069  -0.08614797  -1.86827505  -6.25237398
 -17.41778204 -18.0448256   -3.09601968  -3.81951841  11.65352418
  26.39538378  11.36650307  28.39553811  24.67165738   4.27996927
   4.95995938   4.12071245  -7.79824053  -7.34879303  -5.56295571
   4.3596655   21.36479176  20.78170238  13.30924163]


In [65]:
import h5py

with h5py.File('linear_weights.h5', 'a') as hf:
    hf.create_dataset("w1",  data=agent1.w)

with h5py.File('linear_weights.h5', 'a') as hf:
    hf.create_dataset("w2",  data=agent2.w)
    
#with h5py.File('linear_weights.h5', 'a') as hf:
#    hf.create_dataset("w3",  data=agent3.w)



RuntimeError: Unable to create link (name already exists)

In [68]:
    
with h5py.File('linear_weights.h5', 'r') as hf:
    data = hf['w3'][:]
    
print(data)

[ 43.65177503 -11.03677002  32.55963632   4.01264105  14.36752159
  -1.08952691  23.76485244  -3.22370421 -11.62172698 -28.81629329
 -19.19900897 -22.40257057 -33.93851056  -6.77716978 -14.74672506
  -5.78576781 -15.76611588   7.689347    -8.1581809    1.02286712
 -13.6151467  -13.9286816  -17.96095651   5.36022741  -2.57284175
   6.37656909   2.43519259  17.43404149   0.38737591   4.73757696
   4.97085166  27.41411238  17.05858519  17.97930601  -3.56365139
  -3.07979985   2.34831277  32.15385111  -9.20896251   6.5967602
 -17.08379333 -19.77475497 -43.84508699  -8.10400142  28.8025441
  34.50221784  30.09912392  26.90891185  22.08347814   1.42689675
  10.60321682  17.20248069  -0.08614797  -1.86827505  -6.25237398
 -17.41778204 -18.0448256   -3.09601968  -3.81951841  11.65352418
  26.39538378  11.36650307  28.39553811  24.67165738   4.27996927
   4.95995938   4.12071245  -7.79824053  -7.34879303  -5.56295571
   4.3596655   21.36479176  20.78170238  13.30924163]
