# ðŸš€ RL Football - FAST Training

**Optimized for speed** - 10-50x faster.

1. Runtime â†’ GPU
2. Run all cells
3. Download weights when done

In [None]:
import numpy as np
import json
import time
from collections import deque
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import keras
from keras import layers, ops
print(f'Keras: {keras.__version__}')

import tensorflow as tf
print(f'GPU: {tf.config.list_physical_devices("GPU")}')

In [None]:
# Fast numpy game
class Game:
    def __init__(self):
        self.W, self.H = 720, 420
        self.reset()

    def reset(self):
        self.blip = np.array([120., 210., 0., 0.])
        self.bloop = np.array([600., 210., 0., 0.])
        self.ball = np.array([360., 210., 0., 0.])
        self.score = [0, 0]
        self.time = 30.0
        self.kick = [0, 0]

    def step(self, a1, a2):
        M = [(0,-1),(0,1),(-1,0),(1,0),(-1,-1),(1,-1),(-1,1),(1,1),(0,0),(0,0)]
        for i, (p, a) in enumerate([(self.blip, a1), (self.bloop, a2)]):
            if a < 8:
                p[2] += M[a][0] * 2
                p[3] += M[a][1] * 2
            elif a == 8:
                self.kick[i] = 1
            s = np.sqrt(p[2]**2 + p[3]**2)
            if s > 4: p[2:4] *= 4/s

        for p in [self.blip, self.bloop]:
            p[0:2] += p[2:4]
            p[2:4] *= 0.85
            p[0] = np.clip(p[0], 25, self.W-25)
            p[1] = np.clip(p[1], 25, self.H-25)

        self.ball[0:2] += self.ball[2:4]
        self.ball[2:4] *= 0.98

        if self.ball[1] < 12: self.ball[1], self.ball[3] = 12, -self.ball[3]*0.8
        if self.ball[1] > self.H-12: self.ball[1], self.ball[3] = self.H-12, -self.ball[3]*0.8

        gy = (self.H-120)//2
        ing = gy < self.ball[1] < gy+120
        if not ing:
            if self.ball[0] < 12: self.ball[0], self.ball[2] = 12, -self.ball[2]*0.8
            if self.ball[0] > self.W-12: self.ball[0], self.ball[2] = self.W-12, -self.ball[2]*0.8

        for i, p in enumerate([self.blip, self.bloop]):
            d = np.sqrt((self.ball[0]-p[0])**2 + (self.ball[1]-p[1])**2)
            if 0 < d < 37:
                n = (self.ball[0:2] - p[0:2]) / d
                self.ball[0:2] = p[0:2] + n*37
                pw = 12 if self.kick[i] else 6
                self.ball[2:4] = n*pw + p[2:4]*0.5
        self.kick = [0, 0]

        ev = None
        if ing:
            if self.ball[0] < 0:
                self.score[1] += 1
                ev = 'L'
                self._rp()
            elif self.ball[0] > self.W:
                self.score[0] += 1
                ev = 'W'
                self._rp()

        self.time -= 1/60
        return ev, self.time <= 0

    def _rp(self):
        self.blip[0:2], self.bloop[0:2] = [120,210], [600,210]
        self.ball[:] = [360,210,0,0]

    def state(self, t):
        p, o = (self.blip, self.bloop) if t==0 else (self.bloop, self.blip)
        d = np.sqrt((p[0]-self.ball[0])**2+(p[1]-self.ball[1])**2)/830
        return np.array([p[0]/self.W, p[1]/self.H, self.ball[0]/self.W, self.ball[1]/self.H,
                         np.clip(self.ball[2]/15,-1,1), np.clip(self.ball[3]/15,-1,1),
                         o[0]/self.W, o[1]/self.H, d, 0, 0, 0], dtype=np.float32)

print('âœ… Game ready')

In [None]:
def ai_act(s):
    dx, dy = s[2]-s[0], s[3]-s[1]
    if s[8] < 0.04: return 8
    M = [(0,-1),(0,1),(-1,0),(1,0),(-1,-1),(1,-1),(-1,1),(1,1)]
    mx = -1 if s[8]<0.08 else (1 if dx>0.02 else (-1 if dx<-0.02 else 0))
    my = 1 if dy>0.02 else (-1 if dy<-0.02 else 0)
    for i,(x,y) in enumerate(M):
        if x==mx and y==my: return i
    return 9

print('âœ… AI ready')

In [None]:
# DQN with Keras 3 compatible architecture
class DQN:
    def __init__(self):
        self.eps = 1.0
        self.mem = deque(maxlen=50000)
        self.step = 0
        self.ld = None
        self.model = self._build()
        self.target = self._build()
        self.target.set_weights(self.model.get_weights())

    def _build(self):
        inp = layers.Input(shape=(12,))
        x = layers.Dense(256, activation='relu')(inp)
        x = layers.Dense(256, activation='relu')(x)
        x = layers.Dense(128, activation='relu')(x)
        v = layers.Dense(64, activation='relu')(x)
        v = layers.Dense(1)(v)
        a = layers.Dense(64, activation='relu')(x)
        a = layers.Dense(10)(a)
        # Keras 3 compatible: use Lambda with ops.mean
        m = layers.Lambda(lambda t: ops.mean(t, axis=1, keepdims=True))(a)
        ac = layers.Subtract()([a, m])
        q = layers.Add()([v, ac])
        model = keras.Model(inp, q)
        model.compile(optimizer=keras.optimizers.Adam(0.0005), loss='mse')
        return model

    def act(self, s):
        if np.random.random() < self.eps:
            return np.random.randint(10)
        return int(np.argmax(self.model(s[np.newaxis], training=False)[0]))

    def remember(self, s, a, r, s2, d):
        self.mem.append((s,a,r,s2,d))

    def train(self):
        if len(self.mem) < 500: return
        b = random.sample(self.mem, 64)
        S = np.array([x[0] for x in b])
        S2 = np.array([x[3] for x in b])
        Q = self.model.predict(S, verbose=0)
        Q2 = self.model.predict(S2, verbose=0)
        QT = self.target.predict(S2, verbose=0)
        for i,(s,a,r,s2,d) in enumerate(b):
            Q[i][a] = r if d else r + 0.995*QT[i][np.argmax(Q2[i])]
        self.model.fit(S, Q, verbose=0)
        self.step += 1
        if self.step % 500 == 0:
            self.target.set_weights(self.model.get_weights())

    def reward(self, g, ev):
        p, b, o = g.blip, g.ball, g.bloop
        d = np.sqrt((p[0]-b[0])**2+(p[1]-b[1])**2)
        r = 500 if ev=='W' else (-300 if ev=='L' else 0)
        r += (1-d/830)*5
        if d < 40: r += 10
        if self.ld:
            dd = self.ld - d
            r += dd*0.5
            if dd > 2: r += 3
        self.ld = d
        sp = np.sqrt(p[2]**2+p[3]**2)
        if sp < 0.5 and d > 50: r -= 8
        if sp > 1: r += 1
        if b[2] > 2 and d < 80: r += 8
        if abs(b[0]-720) < 100: r += 5
        if (p[0]<80 or p[0]>640) and (p[1]<80 or p[1]>340):
            r -= 5
            if d > 100: r -= 5
        if d > 300: r -= 5
        elif d > 200: r -= 3
        elif d > 150: r -= 1
        od = np.sqrt((o[0]-b[0])**2+(o[1]-b[1])**2)
        if od < d and d > 60: r -= 2
        return r - 0.1

    def reset(self):
        self.ld = None
        self.eps = max(0.02, self.eps * 0.9999)

    def save(self, f):
        w = self.model.get_weights()
        json.dump({'weights':[{'shape':list(x.shape),'data':x.flatten().tolist()} for x in w],
                   'epsilon':self.eps,'trainStepCount':self.step}, open(f,'w'))

print('âœ… DQN ready')

In [None]:
def train(n=100000):
    g, a = Game(), DQN()
    st = {'W':0,'L':0,'D':0,'G':0}
    t0 = time.time()
    print('='*50+'\nðŸš€ Training\n'+'='*50)

    for ep in range(1, n+1):
        g.reset()
        done = False
        c = 0
        while not done:
            c += 1
            s1 = g.state(0)
            s2 = g.state(1)
            a1 = a.act(s1)
            a2 = ai_act(s2)
            ev, done = g.step(a1, a2)
            r = a.reward(g, ev)
            a.remember(s1, a1, r, g.state(0), done)
            if ev: st['G'] += 1
            if c % 4 == 0: a.train()

        w = 'W' if g.score[0]>g.score[1] else ('L' if g.score[1]>g.score[0] else 'D')
        st[w] += 1
        a.reset()

        if ep % 100 == 0 or ep == 1:
            el = time.time() - t0
            sp = ep/el
            eta = (n-ep)/sp if sp>0 else 0
            print(f'Ep {ep}/{n} | Îµ:{a.eps:.3f} | W:{st["W"]} L:{st["L"]} D:{st["D"]} | G:{st["G"]} | {sp:.1f}/s | ETA:{eta/3600:.1f}h')

        if ep % 5000 == 0:
            a.save(f'w_{ep}.json')
            print(f'ðŸ’¾ Saved')

    a.save('final.json')
    print('âœ… Done!')
    return a, st

print('âœ… Ready')

In [None]:
agent, stats = train(100000)

In [None]:
from google.colab import files
w = json.load(open('final.json'))
out = {'version':2,'blipAgent':w,'bloopAgent':w,'blip':w,'bloop':w,'stats':stats}
json.dump(out, open('trained.json','w'))
files.download('trained.json')