# ðŸš€ RL Football - FAST Training

**Optimized version** - runs 10-50x faster than standard version.

Key optimizations:
- Compiled TF functions
- Numpy-based game simulation
- Reduced overhead

**Instructions:**
1. Runtime â†’ Change runtime type â†’ GPU
2. Run all cells
3. Download weights when done

In [None]:
import numpy as np
import json
import time
from collections import deque
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
print(f'TensorFlow: {tf.__version__}')
print(f'GPU: {tf.config.list_physical_devices("GPU")}')

# Enable memory growth
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# Fast numpy-based game simulation
class FastGame:
    def __init__(self, match_time=30):
        self.field_width = 720
        self.field_height = 420
        self.goal_height = 120
        self.match_time = match_time
        self.reset()

    def reset(self):
        # Positions: [x, y, vx, vy] for blip, bloop, ball
        self.blip = np.array([120.0, 250.0, 0.0, 0.0])
        self.bloop = np.array([600.0, 250.0, 0.0, 0.0])
        self.ball = np.array([360.0, 250.0, 0.0, 0.0])
        self.blip_score = 0
        self.bloop_score = 0
        self.time = self.match_time
        self.blip_kick = 0
        self.bloop_kick = 0

    def step(self, blip_action, bloop_action):
        # Actions: 0-7 = movement, 8 = kick, 9 = stay
        moves = [(0,-1), (0,1), (-1,0), (1,0), (-1,-1), (1,-1), (-1,1), (1,1), (0,0), (0,0)]
        speed = 4.0
        friction = 0.85

        # Apply movement
        if blip_action < 8:
            self.blip[2] += moves[blip_action][0] * speed * 0.5
            self.blip[3] += moves[blip_action][1] * speed * 0.5
        elif blip_action == 8:
            self.blip_kick = 1

        if bloop_action < 8:
            self.bloop[2] += moves[bloop_action][0] * speed * 0.5
            self.bloop[3] += moves[bloop_action][1] * speed * 0.5
        elif bloop_action == 8:
            self.bloop_kick = 1

        # Clamp velocities
        for p in [self.blip, self.bloop]:
            spd = np.sqrt(p[2]**2 + p[3]**2)
            if spd > speed:
                p[2:4] *= speed / spd

        # Update positions
        self.blip[0:2] += self.blip[2:4]
        self.bloop[0:2] += self.bloop[2:4]
        self.ball[0:2] += self.ball[2:4]

        # Apply friction
        self.blip[2:4] *= friction
        self.bloop[2:4] *= friction
        self.ball[2:4] *= 0.98

        # Constrain players
        self.blip[0] = np.clip(self.blip[0], 25, self.field_width - 25)
        self.blip[1] = np.clip(self.blip[1], 25, self.field_height - 25)
        self.bloop[0] = np.clip(self.bloop[0], 25, self.field_width - 25)
        self.bloop[1] = np.clip(self.bloop[1], 25, self.field_height - 25)

        # Ball boundaries
        if self.ball[1] < 12:
            self.ball[1] = 12
            self.ball[3] *= -0.8
        if self.ball[1] > self.field_height - 12:
            self.ball[1] = self.field_height - 12
            self.ball[3] *= -0.8

        goal_y_min = (self.field_height - self.goal_height) / 2
        goal_y_max = goal_y_min + self.goal_height
        in_goal = goal_y_min < self.ball[1] < goal_y_max

        if not in_goal:
            if self.ball[0] < 12:
                self.ball[0] = 12
                self.ball[2] *= -0.8
            if self.ball[0] > self.field_width - 12:
                self.ball[0] = self.field_width - 12
                self.ball[2] *= -0.8

        # Ball-player collisions
        for p, kick in [(self.blip, self.blip_kick), (self.bloop, self.bloop_kick)]:
            dx = self.ball[0] - p[0]
            dy = self.ball[1] - p[1]
            dist = np.sqrt(dx*dx + dy*dy)
            if dist < 37 and dist > 0:
                nx, ny = dx/dist, dy/dist
                self.ball[0] = p[0] + nx * 37
                self.ball[1] = p[1] + ny * 37
                power = 12 if kick else 6
                self.ball[2] = nx * power + p[2] * 0.5
                self.ball[3] = ny * power + p[3] * 0.5

        self.blip_kick = 0
        self.bloop_kick = 0

        # Check goals
        event = None
        if in_goal:
            if self.ball[0] < 0:
                self.bloop_score += 1
                event = 'bloop'
                self._reset_positions()
            elif self.ball[0] > self.field_width:
                self.blip_score += 1
                event = 'blip'
                self._reset_positions()

        self.time -= 1/60
        return event, self.time <= 0

    def _reset_positions(self):
        self.blip[0:2] = [120, 250]
        self.bloop[0:2] = [600, 250]
        self.ball[:] = [360, 250, 0, 0]

    def get_state(self, team='blip'):
        if team == 'blip':
            p, o = self.blip, self.bloop
        else:
            p, o = self.bloop, self.blip

        px, py = p[0]/self.field_width, p[1]/self.field_height
        bx, by = self.ball[0]/self.field_width, self.ball[1]/self.field_height
        bvx, bvy = np.clip(self.ball[2]/15, -1, 1), np.clip(self.ball[3]/15, -1, 1)
        ox, oy = o[0]/self.field_width, o[1]/self.field_height

        max_d = np.sqrt(self.field_width**2 + self.field_height**2)
        dist_b = np.sqrt((p[0]-self.ball[0])**2 + (p[1]-self.ball[1])**2) / max_d
        ang_b = (np.arctan2(self.ball[1]-p[1], self.ball[0]-p[0]) + np.pi) / (2*np.pi)

        gx = self.field_width if team == 'blip' else 0
        dist_g = np.sqrt((p[0]-gx)**2 + (p[1]-250)**2) / max_d
        ang_g = (np.arctan2(250-p[1], gx-p[0]) + np.pi) / (2*np.pi)

        return np.array([px, py, bx, by, bvx, bvy, ox, oy, dist_b, ang_b, dist_g, ang_g], dtype=np.float32)

print('âœ… Fast game ready')

In [None]:
# Simple AI - always chases ball
def simple_ai_action(state):
    px, py, bx, by = state[0], state[1], state[2], state[3]
    dist = state[8]

    if dist < 0.04:
        return 8  # kick

    dx = bx - px
    dy = by - py
    thresh = 0.02

    if dist < 0.08:
        # Push toward goal (bloop attacks left)
        mdx, mdy = -1, (1 if dy > 0.01 else (-1 if dy < -0.01 else 0))
    else:
        mdx = 1 if dx > thresh else (-1 if dx < -thresh else 0)
        mdy = 1 if dy > thresh else (-1 if dy < -thresh else 0)

    moves = [(0,-1), (0,1), (-1,0), (1,0), (-1,-1), (1,-1), (-1,1), (1,1)]
    for i, (mx, my) in enumerate(moves):
        if mx == mdx and my == mdy:
            return i
    return 9

print('âœ… Simple AI ready')

In [None]:
# Fast DQN with compiled predict
class FastDQN:
    def __init__(self):
        self.state_size = 12
        self.action_size = 10
        self.lr = 0.0005
        self.gamma = 0.995
        self.epsilon = 1.0
        self.epsilon_min = 0.02
        self.epsilon_decay = 0.9999
        self.memory = deque(maxlen=50000)
        self.batch_size = 64
        self.min_mem = 500
        self.train_step = 0
        self.target_freq = 500
        self.last_dist = None

        self.model = self._build()
        self.target = self._build()
        self.target.set_weights(self.model.get_weights())

        # Compile predict function for speed
        self._predict = tf.function(self.model, experimental_relax_shapes=True)

    def _build(self):
        inp = tf.keras.Input(shape=(12,))
        x = tf.keras.layers.Dense(256, activation='relu')(inp)
        x = tf.keras.layers.Dense(256, activation='relu')(x)
        x = tf.keras.layers.Dense(128, activation='relu')(x)

        v = tf.keras.layers.Dense(64, activation='relu')(x)
        v = tf.keras.layers.Dense(1)(v)

        a = tf.keras.layers.Dense(64, activation='relu')(x)
        a = tf.keras.layers.Dense(10)(a)

        q = v + (a - tf.reduce_mean(a, axis=1, keepdims=True))

        m = tf.keras.Model(inp, q)
        m.compile(optimizer=tf.keras.optimizers.Adam(self.lr), loss='mse')
        return m

    def act(self, state):
        if np.random.random() < self.epsilon:
            return np.random.randint(10)
        q = self.model(state[np.newaxis], training=False)
        return int(tf.argmax(q[0]))

    def remember(self, s, a, r, s2, done):
        self.memory.append((s, a, r, s2, done))

    def train(self):
        if len(self.memory) < self.min_mem:
            return

        batch = random.sample(self.memory, self.batch_size)
        states = np.array([b[0] for b in batch])
        next_states = np.array([b[3] for b in batch])

        q_curr = self.model.predict(states, verbose=0)
        q_next = self.model.predict(next_states, verbose=0)
        q_targ = self.target.predict(next_states, verbose=0)

        for i, (s, a, r, s2, d) in enumerate(batch):
            if d:
                q_curr[i][a] = r
            else:
                q_curr[i][a] = r + self.gamma * q_targ[i][np.argmax(q_next[i])]

        self.model.fit(states, q_curr, epochs=1, verbose=0)
        self.train_step += 1

        if self.train_step % self.target_freq == 0:
            self.target.set_weights(self.model.get_weights())

    def reward(self, game, event):
        p = game.blip
        b = game.ball
        o = game.bloop
        r = 0
        dist = np.sqrt((p[0]-b[0])**2 + (p[1]-b[1])**2)

        if event == 'blip': r += 500
        elif event == 'bloop': r -= 300

        r += (1 - dist/830) * 5
        if dist < 40: r += 10

        if self.last_dist:
            delta = self.last_dist - dist
            r += delta * 0.5
            if delta > 2: r += 3
        self.last_dist = dist

        spd = np.sqrt(p[2]**2 + p[3]**2)
        if spd < 0.5 and dist > 50: r -= 8
        if spd > 1: r += 1

        if (b[2] > 2 and dist < 80): r += 8
        if abs(b[0] - 720) < 100: r += 5

        if (p[0] < 80 or p[0] > 640) and (p[1] < 80 or p[1] > 340):
            r -= 5
            if dist > 100: r -= 5

        if dist > 300: r -= 5
        elif dist > 200: r -= 3
        elif dist > 150: r -= 1

        odist = np.sqrt((o[0]-b[0])**2 + (o[1]-b[1])**2)
        if odist < dist and dist > 60: r -= 2

        return r - 0.1

    def reset(self):
        self.last_dist = None
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def save(self, path):
        w = self.model.get_weights()
        data = {'weights': [{'shape': list(x.shape), 'data': x.flatten().tolist()} for x in w],
                'epsilon': self.epsilon, 'trainStepCount': self.train_step}
        with open(path, 'w') as f:
            json.dump(data, f)

print('âœ… Fast DQN ready')

In [None]:
def train_fast(episodes=100000, save_every=5000):
    game = FastGame(match_time=30)
    agent = FastDQN()
    stats = {'blip': 0, 'bloop': 0, 'draw': 0, 'goals': 0}
    start = time.time()

    print('='*50)
    print('ðŸš€ FAST Training Started')
    print('='*50)

    for ep in range(1, episodes + 1):
        game.reset()
        done = False
        steps = 0

        while not done:
            steps += 1
            s1 = game.get_state('blip')
            s2 = game.get_state('bloop')

            a1 = agent.act(s1)
            a2 = simple_ai_action(s2)

            event, done = game.step(a1, a2)

            r = agent.reward(game, event)
            s1_new = game.get_state('blip')
            agent.remember(s1, a1, r, s1_new, done)

            if event: stats['goals'] += 1

            if steps % 4 == 0:
                agent.train()

        winner = 'blip' if game.blip_score > game.bloop_score else ('bloop' if game.bloop_score > game.blip_score else 'draw')
        stats[winner] += 1
        agent.reset()

        if ep % 100 == 0 or ep == 1:
            elapsed = time.time() - start
            speed = ep / elapsed
            eta = (episodes - ep) / speed if speed > 0 else 0
            eta_str = f'{eta/60:.1f}m' if eta < 3600 else f'{eta/3600:.1f}h'
            print(f'Ep {ep}/{episodes} | Îµ:{agent.epsilon:.3f} | B:{stats["blip"]} L:{stats["bloop"]} D:{stats["draw"]} | Goals:{stats["goals"]} | {speed:.1f} ep/s | ETA:{eta_str}')

        if ep % save_every == 0:
            agent.save(f'weights_{ep}.json')
            print(f'ðŸ’¾ Saved at {ep}')

    agent.save('weights_final.json')
    print('='*50)
    print('âœ… Done!')
    print(f'Blip:{stats["blip"]} Bloop:{stats["bloop"]} Draw:{stats["draw"]}')
    return agent, stats

print('âœ… Training function ready')

In [None]:
# ðŸš€ START TRAINING
agent, stats = train_fast(episodes=100000, save_every=5000)

In [None]:
# Download weights
from google.colab import files
import json

with open('weights_final.json') as f:
    w = json.load(f)

browser = {
    'version': 2, 'aiType': 'dqn', 'episodeCount': 100000,
    'blipAgent': w, 'bloopAgent': w, 'blip': w, 'bloop': w, 'stats': stats
}

with open('rl_football_trained.json', 'w') as f:
    json.dump(browser, f)

files.download('rl_football_trained.json')