# ðŸ¤– RL Football Champions - GPU Training

This notebook trains the DQN agent on Google Colab's free GPU.

**Instructions:**
1. Go to Runtime â†’ Change runtime type â†’ Select **T4 GPU**
2. Run all cells (Runtime â†’ Run all)
3. Training will save weights to your Google Drive
4. Download `weights-final.json` when done

In [None]:
# Mount Google Drive for saving weights
from google.colab import drive
drive.mount('/content/drive')

# Create output directory
import os
OUTPUT_DIR = '/content/drive/MyDrive/rl-football-weights'
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f'âœ… Weights will be saved to: {OUTPUT_DIR}')

In [None]:
# Install Node.js and TensorFlow.js
!apt-get update -qq
!apt-get install -qq nodejs npm
!npm install -g n
!n 20
!node --version

In [None]:
# Create trainer directory
!mkdir -p /content/trainer
%cd /content/trainer

In [None]:
%%writefile package.json
{
    "name": "rl-football-trainer",
    "version": "1.0.0",
    "main": "train.js",
    "dependencies": {
        "@tensorflow/tfjs-node-gpu": "^4.17.0"
    }
}

In [None]:
# Install dependencies (GPU version of TensorFlow.js)
!npm install

In [None]:
%%writefile dqn-agent.js
/**
 * Advanced DQN Agent - Dueling Architecture with GPU support
 */
const tf = require('@tensorflow/tfjs-node-gpu');

class DuelingCombineLayer extends tf.layers.Layer {
    constructor(config) {
        super(config);
        this.actionSize = config.actionSize;
    }
    computeOutputShape(inputShape) { return [inputShape[0], this.actionSize]; }
    call(inputs) {
        return tf.tidy(() => {
            const input = Array.isArray(inputs) ? inputs[0] : inputs;
            const value = tf.slice(input, [0, 0], [-1, 1]);
            const advantages = tf.slice(input, [0, 1], [-1, this.actionSize]);
            const meanAdvantage = tf.mean(advantages, -1, true);
            return tf.add(value, tf.sub(advantages, meanAdvantage));
        });
    }
    getConfig() { const c = super.getConfig(); c.actionSize = this.actionSize; return c; }
    static get className() { return 'DuelingCombineLayer'; }
}
tf.serialization.registerClass(DuelingCombineLayer);

class DQNAgent {
    constructor(name, team) {
        this.name = name;
        this.team = team;
        this.stateSize = 12;
        this.actionSize = 10;
        this.learningRate = 0.0005;
        this.gamma = 0.995;
        this.epsilon = 1.0;
        this.epsilonMin = 0.02;
        this.epsilonDecay = 0.9999;
        this.replayBuffer = [];
        this.bufferSize = 50000;
        this.batchSize = 64;
        this.minBufferSize = 500;
        this.targetUpdateFreq = 500;
        this.trainStepCount = 0;
        this.model = null;
        this.targetModel = null;
        this.actions = [
            { dx: 0, dy: -1, kick: false, name: 'up' },
            { dx: 0, dy: 1, kick: false, name: 'down' },
            { dx: -1, dy: 0, kick: false, name: 'left' },
            { dx: 1, dy: 0, kick: false, name: 'right' },
            { dx: -1, dy: -1, kick: false, name: 'up-left' },
            { dx: 1, dy: -1, kick: false, name: 'up-right' },
            { dx: -1, dy: 1, kick: false, name: 'down-left' },
            { dx: 1, dy: 1, kick: false, name: 'down-right' },
            { dx: 0, dy: 0, kick: true, name: 'kick' },
            { dx: 0, dy: 0, kick: false, name: 'stay' }
        ];
        this.lastState = null;
        this.lastAction = null;
        this.lastDistToBall = null;
        this.initNetworks();
    }

    initNetworks() {
        this.model = this.createNetwork();
        this.targetModel = this.createNetwork();
        this.updateTargetNetwork();
        console.log(`${this.name} DQN initialized (GPU)!`);
    }

    createNetwork() {
        const input = tf.input({ shape: [this.stateSize] });
        let shared = tf.layers.dense({ units: 256, activation: 'relu', kernelInitializer: 'heNormal' }).apply(input);
        shared = tf.layers.dense({ units: 256, activation: 'relu', kernelInitializer: 'heNormal' }).apply(shared);
        shared = tf.layers.dense({ units: 128, activation: 'relu', kernelInitializer: 'heNormal' }).apply(shared);
        let valueStream = tf.layers.dense({ units: 64, activation: 'relu', kernelInitializer: 'heNormal' }).apply(shared);
        const value = tf.layers.dense({ units: 1, activation: 'linear', kernelInitializer: 'heNormal' }).apply(valueStream);
        let advantageStream = tf.layers.dense({ units: 64, activation: 'relu', kernelInitializer: 'heNormal' }).apply(shared);
        const advantage = tf.layers.dense({ units: this.actionSize, activation: 'linear', kernelInitializer: 'heNormal' }).apply(advantageStream);
        const combined = tf.layers.concatenate().apply([value, advantage]);
        const qValues = new DuelingCombineLayer({ actionSize: this.actionSize }).apply(combined);
        const model = tf.model({ inputs: input, outputs: qValues });
        model.compile({ optimizer: tf.train.adam(this.learningRate), loss: 'meanSquaredError' });
        return model;
    }

    updateTargetNetwork() {
        const weights = this.model.getWeights();
        const targetWeights = weights.map(w => w.clone());
        this.targetModel.setWeights(targetWeights);
    }

    getState(player, ball, opponent, fieldWidth, fieldHeight) {
        const distBall = this.distance(player, ball) / Math.sqrt(fieldWidth ** 2 + fieldHeight ** 2);
        const angleBall = (this.angle(player, ball) + Math.PI) / (2 * Math.PI);
        const goalX = this.team === 'blip' ? fieldWidth : 0;
        const goalY = fieldHeight / 2;
        const distGoal = Math.sqrt((player.x - goalX) ** 2 + (player.y - goalY) ** 2) / Math.sqrt(fieldWidth ** 2 + fieldHeight ** 2);
        const angleGoal = (Math.atan2(goalY - player.y, goalX - player.x) + Math.PI) / (2 * Math.PI);
        return [player.x / fieldWidth, player.y / fieldHeight, ball.x / fieldWidth, ball.y / fieldHeight,
                Math.max(-1, Math.min(1, ball.vx / 15)), Math.max(-1, Math.min(1, ball.vy / 15)),
                opponent.x / fieldWidth, opponent.y / fieldHeight, distBall, angleBall, distGoal, angleGoal];
    }

    distance(a, b) { return Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2); }
    angle(a, b) { return Math.atan2(b.y - a.y, b.x - a.x); }

    chooseAction(state, training = true) {
        let actionIndex;
        if (training && Math.random() < this.epsilon) {
            actionIndex = Math.floor(Math.random() * this.actionSize);
        } else {
            actionIndex = tf.tidy(() => {
                const stateTensor = tf.tensor2d([state], [1, this.stateSize]);
                return this.model.predict(stateTensor).argMax(1).dataSync()[0];
            });
        }
        this.lastState = state;
        this.lastAction = actionIndex;
        return this.actions[actionIndex];
    }

    remember(state, action, reward, nextState, done) {
        this.replayBuffer.push({ state, action, reward, nextState, done });
        if (this.replayBuffer.length > this.bufferSize) this.replayBuffer.shift();
    }

    async train() {
        if (this.replayBuffer.length < this.minBufferSize) return;
        const batch = [];
        for (let i = 0; i < this.batchSize; i++) {
            batch.push(this.replayBuffer[Math.floor(Math.random() * this.replayBuffer.length)]);
        }
        const states = batch.map(e => e.state);
        const nextStates = batch.map(e => e.nextState);
        const { currentQsArray, nextQsMainArray, nextQsTargetArray } = tf.tidy(() => {
            const statesTensor = tf.tensor2d(states, [this.batchSize, this.stateSize]);
            const nextStatesTensor = tf.tensor2d(nextStates, [this.batchSize, this.stateSize]);
            return {
                currentQsArray: this.model.predict(statesTensor).arraySync(),
                nextQsMainArray: this.model.predict(nextStatesTensor).arraySync(),
                nextQsTargetArray: this.targetModel.predict(nextStatesTensor).arraySync()
            };
        });
        for (let i = 0; i < this.batchSize; i++) {
            const { action, reward, done } = batch[i];
            if (done) {
                currentQsArray[i][action] = reward;
            } else {
                const bestAction = nextQsMainArray[i].indexOf(Math.max(...nextQsMainArray[i]));
                currentQsArray[i][action] = reward + this.gamma * nextQsTargetArray[i][bestAction];
            }
        }
        const statesTensor = tf.tensor2d(states, [this.batchSize, this.stateSize]);
        const targetTensor = tf.tensor2d(currentQsArray, [this.batchSize, this.actionSize]);
        await this.model.fit(statesTensor, targetTensor, { epochs: 1, verbose: 0 });
        statesTensor.dispose();
        targetTensor.dispose();
        this.trainStepCount++;
        if (this.trainStepCount % this.targetUpdateFreq === 0) this.updateTargetNetwork();
    }

    calculateReward(player, ball, opponent, event, fieldWidth) {
        let reward = 0;
        const fieldHeight = 420;
        const distToBall = this.distance(player, ball);
        if (event === 'scored') reward += 500;
        else if (event === 'conceded') reward -= 300;
        const maxDist = Math.sqrt(fieldWidth ** 2 + fieldHeight ** 2);
        reward += (1 - distToBall / maxDist) * 5;
        if (distToBall < 40) reward += 10;
        if (this.lastDistToBall !== null) {
            const distDelta = this.lastDistToBall - distToBall;
            reward += distDelta * 0.5;
            if (distDelta > 2) reward += 3;
        }
        this.lastDistToBall = distToBall;
        const playerSpeed = Math.sqrt(player.vx * player.vx + player.vy * player.vy);
        if (playerSpeed < 0.5 && distToBall > 50) reward -= 8;
        if (playerSpeed > 1) reward += 1;
        const attackingGoalX = this.team === 'blip' ? fieldWidth : 0;
        const ballMovingTowardGoal = (this.team === 'blip' && ball.vx > 2) || (this.team === 'bloop' && ball.vx < -2);
        if (ballMovingTowardGoal && distToBall < 80) reward += 8;
        if (Math.abs(ball.x - attackingGoalX) < 100) reward += 5;
        const cornerMargin = 80;
        const inCorner = (player.x < cornerMargin || player.x > fieldWidth - cornerMargin) && (player.y < cornerMargin || player.y > fieldHeight - cornerMargin);
        if (inCorner) { reward -= 5; if (distToBall > 100) reward -= 5; }
        if (distToBall > 300) reward -= 5;
        else if (distToBall > 200) reward -= 3;
        else if (distToBall > 150) reward -= 1;
        const opponentDistToBall = this.distance(opponent, ball);
        if (opponentDistToBall < distToBall && distToBall > 60) reward -= 2;
        reward -= 0.1;
        return reward;
    }

    reset() {
        this.lastState = null;
        this.lastAction = null;
        this.lastDistToBall = null;
        if (this.epsilon > this.epsilonMin) this.epsilon *= this.epsilonDecay;
    }

    async exportWeights() {
        const weights = this.model.getWeights();
        const weightData = await Promise.all(weights.map(async w => ({ shape: w.shape, data: Array.from(await w.data()) })));
        return { weights: weightData, epsilon: this.epsilon, trainStepCount: this.trainStepCount };
    }

    async importWeights(data) {
        if (!data || !data.weights) return;
        const weights = data.weights.map(w => tf.tensor(w.data, w.shape));
        this.model.setWeights(weights);
        this.updateTargetNetwork();
        this.epsilon = data.epsilon || this.epsilon;
        this.trainStepCount = data.trainStepCount || 0;
        weights.forEach(w => w.dispose());
    }
}

module.exports = DQNAgent;

In [None]:
%%writefile simple-ai.js
class SimpleAI {
    constructor(name, team) {
        this.name = name;
        this.team = team;
        this.actions = [
            { dx: 0, dy: -1, kick: false, name: 'up' },
            { dx: 0, dy: 1, kick: false, name: 'down' },
            { dx: -1, dy: 0, kick: false, name: 'left' },
            { dx: 1, dy: 0, kick: false, name: 'right' },
            { dx: -1, dy: -1, kick: false, name: 'up-left' },
            { dx: 1, dy: -1, kick: false, name: 'up-right' },
            { dx: -1, dy: 1, kick: false, name: 'down-left' },
            { dx: 1, dy: 1, kick: false, name: 'down-right' },
            { dx: 0, dy: 0, kick: true, name: 'kick' },
            { dx: 0, dy: 0, kick: false, name: 'stay' }
        ];
        this.lastAction = 0;
    }

    chooseAction(state, training = true) {
        const playerX = state[0], playerY = state[1], ballX = state[2], ballY = state[3];
        const distToBall = state[8];
        const dxToBall = ballX - playerX, dyToBall = ballY - playerY;
        if (distToBall < 0.08) {
            const goalDir = this.team === 'bloop' ? -1 : 1;
            if (distToBall < 0.04) { this.lastAction = 8; return this.actions[8]; }
            let moveDx = goalDir, moveDy = dyToBall > 0.01 ? 1 : (dyToBall < -0.01 ? -1 : 0);
            for (let i = 0; i < 8; i++) {
                if (this.actions[i].dx === moveDx && this.actions[i].dy === moveDy) {
                    this.lastAction = i; return this.actions[i];
                }
            }
        }
        let moveDx = 0, moveDy = 0;
        if (Math.abs(dxToBall) > 0.02) moveDx = dxToBall > 0 ? 1 : -1;
        if (Math.abs(dyToBall) > 0.02) moveDy = dyToBall > 0 ? 1 : -1;
        for (let i = 0; i < 8; i++) {
            if (this.actions[i].dx === moveDx && this.actions[i].dy === moveDy) {
                this.lastAction = i; return this.actions[i];
            }
        }
        this.lastAction = 3; return this.actions[3];
    }

    getState(player, ball, opponent, fieldWidth, fieldHeight) {
        const distBall = Math.sqrt((player.x-ball.x)**2+(player.y-ball.y)**2) / Math.sqrt(fieldWidth**2+fieldHeight**2);
        const angleBall = (Math.atan2(ball.y-player.y, ball.x-player.x) + Math.PI) / (2 * Math.PI);
        const goalX = this.team === 'blip' ? fieldWidth : 0, goalY = fieldHeight / 2;
        const distGoal = Math.sqrt((player.x-goalX)**2+(player.y-goalY)**2) / Math.sqrt(fieldWidth**2+fieldHeight**2);
        const angleGoal = (Math.atan2(goalY-player.y, goalX-player.x) + Math.PI) / (2 * Math.PI);
        return [player.x/fieldWidth, player.y/fieldHeight, ball.x/fieldWidth, ball.y/fieldHeight,
                ball.vx/15, ball.vy/15, opponent.x/fieldWidth, opponent.y/fieldHeight, distBall, angleBall, distGoal, angleGoal];
    }

    distance(a, b) { return Math.sqrt((a.x-b.x)**2+(a.y-b.y)**2); }
    angle(a, b) { return Math.atan2(b.y-a.y, b.x-a.x); }
    remember() {}
    async train() {}
    reset() {}
    calculateReward() { return 0; }
    async exportWeights() { return null; }
    async importWeights() {}
}
module.exports = SimpleAI;

In [None]:
%%writefile game.js
class Player {
    constructor(x, y, team) {
        this.x = x; this.y = y; this.startX = x; this.startY = y; this.team = team;
        this.vx = 0; this.vy = 0; this.radius = 25; this.speed = 4; this.friction = 0.85;
        this.kickAnimation = 0;
    }
    reset() { this.x = this.startX; this.y = this.startY; this.vx = 0; this.vy = 0; }
    update(dt = 1) { this.x += this.vx * dt; this.y += this.vy * dt; this.vx *= this.friction; this.vy *= this.friction; }
    move(dx, dy) {
        this.vx += dx * this.speed * 0.5; this.vy += dy * this.speed * 0.5;
        const speed = Math.sqrt(this.vx**2 + this.vy**2);
        if (speed > this.speed) { this.vx = this.vx/speed*this.speed; this.vy = this.vy/speed*this.speed; }
    }
    kick() { this.kickAnimation = 1; }
}

class Game {
    constructor() {
        this.width = 800; this.height = 500; this.padding = 40;
        this.fieldLeft = this.padding; this.fieldRight = this.width - this.padding;
        this.fieldTop = this.padding; this.fieldBottom = this.height - this.padding;
        this.fieldWidth = this.fieldRight - this.fieldLeft; this.fieldHeight = this.fieldBottom - this.fieldTop;
        this.goalWidth = 15; this.goalHeight = 120; this.goalY = this.height / 2;
        this.blip = new Player(this.fieldLeft + 80, this.height / 2, 'blip');
        this.bloop = new Player(this.fieldRight - 80, this.height / 2, 'bloop');
        this.ball = { x: this.width/2, y: this.height/2, vx: 0, vy: 0, radius: 12, startX: this.width/2, startY: this.height/2 };
        this.blipScore = 0; this.bloopScore = 0; this.matchTime = 30; this.timeRemaining = this.matchTime;
    }
    reset() { this.blip.reset(); this.bloop.reset(); this.ball.x=this.ball.startX; this.ball.y=this.ball.startY; this.ball.vx=0; this.ball.vy=0; this.timeRemaining=this.matchTime; }
    resetScores() { this.blipScore = 0; this.bloopScore = 0; }
    update(dt = 1) {
        this.timeRemaining -= dt / 60;
        this.blip.update(dt); this.bloop.update(dt); this.updateBall(dt);
        this.checkPlayerBallCollision(this.blip); this.checkPlayerBallCollision(this.bloop);
        this.constrainPlayer(this.blip); this.constrainPlayer(this.bloop);
        return { event: this.checkGoal(), done: this.timeRemaining <= 0 };
    }
    updateBall(dt) {
        this.ball.x += this.ball.vx * dt; this.ball.y += this.ball.vy * dt;
        this.ball.vx *= 0.98; this.ball.vy *= 0.98;
        if (this.ball.y - this.ball.radius < this.fieldTop) { this.ball.y = this.fieldTop + this.ball.radius; this.ball.vy *= -0.8; }
        if (this.ball.y + this.ball.radius > this.fieldBottom) { this.ball.y = this.fieldBottom - this.ball.radius; this.ball.vy *= -0.8; }
        const inGoalRange = this.ball.y > this.goalY - this.goalHeight/2 && this.ball.y < this.goalY + this.goalHeight/2;
        if (!inGoalRange) {
            if (this.ball.x - this.ball.radius < this.fieldLeft) { this.ball.x = this.fieldLeft + this.ball.radius; this.ball.vx *= -0.8; }
            if (this.ball.x + this.ball.radius > this.fieldRight) { this.ball.x = this.fieldRight - this.ball.radius; this.ball.vx *= -0.8; }
        }
    }
    checkPlayerBallCollision(player) {
        const dx = this.ball.x - player.x, dy = this.ball.y - player.y;
        const dist = Math.sqrt(dx*dx + dy*dy), minDist = player.radius + this.ball.radius;
        if (dist < minDist && dist > 0) {
            const nx = dx/dist, ny = dy/dist;
            this.ball.x = player.x + nx * minDist; this.ball.y = player.y + ny * minDist;
            const kickPower = player.kickAnimation > 0 ? 12 : 6;
            this.ball.vx = nx * kickPower + player.vx * 0.5; this.ball.vy = ny * kickPower + player.vy * 0.5;
            player.kickAnimation = 0;
            return true;
        }
        return false;
    }
    constrainPlayer(player) {
        const m = player.radius;
        if (player.x - m < this.fieldLeft) { player.x = this.fieldLeft + m; player.vx = 0; }
        if (player.x + m > this.fieldRight) { player.x = this.fieldRight - m; player.vx = 0; }
        if (player.y - m < this.fieldTop) { player.y = this.fieldTop + m; player.vy = 0; }
        if (player.y + m > this.fieldBottom) { player.y = this.fieldBottom - m; player.vy = 0; }
    }
    checkGoal() {
        const inGoalY = this.ball.y > this.goalY - this.goalHeight/2 && this.ball.y < this.goalY + this.goalHeight/2;
        if (inGoalY && this.ball.x < this.fieldLeft) { this.bloopScore++; this.resetBallAfterGoal(); return 'bloop_scored'; }
        if (inGoalY && this.ball.x > this.fieldRight) { this.blipScore++; this.resetBallAfterGoal(); return 'blip_scored'; }
        return null;
    }
    resetBallAfterGoal() {
        this.ball.x=this.ball.startX; this.ball.y=this.ball.startY; this.ball.vx=0; this.ball.vy=0;
        this.blip.x=this.blip.startX; this.blip.y=this.blip.startY;
        this.bloop.x=this.bloop.startX; this.bloop.y=this.bloop.startY;
    }
    applyAction(player, action) { if (action.dx||action.dy) player.move(action.dx, action.dy); if (action.kick) player.kick(); }
    getWinner() { if (this.blipScore > this.bloopScore) return 'blip'; if (this.bloopScore > this.blipScore) return 'bloop'; return 'draw'; }
}
module.exports = { Game, Player };

In [None]:
%%writefile train.js
const fs = require('fs');
const DQNAgent = require('./dqn-agent');
const SimpleAI = require('./simple-ai');
const { Game } = require('./game');

const targetEpisodes = 100000;
const saveEvery = 5000;
const matchTime = 30;
const OUTPUT_DIR = '/content/drive/MyDrive/rl-football-weights';

console.log('=' .repeat(50));
console.log('ðŸ¤– RL Football - GPU Training on Colab');
console.log('=' .repeat(50));
console.log(`Episodes: ${targetEpisodes}`);
console.log(`Save every: ${saveEvery}`);
console.log('=' .repeat(50));

const game = new Game();
game.matchTime = matchTime;
const blipAgent = new DQNAgent('Blip', 'blip');
const bloopAgent = new SimpleAI('Bloop', 'bloop');

let stats = { blipWins: 0, bloopWins: 0, draws: 0, totalGoals: 0 };

async function train() {
    const startTime = Date.now();
    for (let episode = 1; episode <= targetEpisodes; episode++) {
        game.reset(); game.resetScores();
        let steps = 0, done = false;
        while (!done) {
            steps++;
            const blipState = blipAgent.getState(game.blip, game.ball, game.bloop, game.fieldWidth, game.fieldHeight);
            const bloopState = bloopAgent.getState(game.bloop, game.ball, game.blip, game.fieldWidth, game.fieldHeight);
            const blipAction = blipAgent.chooseAction(blipState, true);
            const bloopAction = bloopAgent.chooseAction(bloopState, true);
            game.applyAction(game.blip, blipAction); game.applyAction(game.bloop, bloopAction);
            const result = game.update(1); done = result.done;
            let blipEvent = null, bloopEvent = null;
            if (result.event === 'blip_scored') { blipEvent = 'scored'; bloopEvent = 'conceded'; stats.totalGoals++; }
            else if (result.event === 'bloop_scored') { blipEvent = 'conceded'; bloopEvent = 'scored'; stats.totalGoals++; }
            const blipReward = blipAgent.calculateReward(game.blip, game.ball, game.bloop, blipEvent, game.fieldWidth);
            const newBlipState = blipAgent.getState(game.blip, game.ball, game.bloop, game.fieldWidth, game.fieldHeight);
            blipAgent.remember(blipState, blipAgent.lastAction, blipReward, newBlipState, done);
            if (steps % 4 === 0) await blipAgent.train();
        }
        const winner = game.getWinner();
        if (winner === 'blip') stats.blipWins++;
        else if (winner === 'bloop') stats.bloopWins++;
        else stats.draws++;
        blipAgent.reset();
        if (episode % 100 === 0 || episode === 1) {
            const elapsed = (Date.now() - startTime) / 1000;
            const epsPerSec = episode / elapsed;
            const remaining = (targetEpisodes - episode) / epsPerSec;
            const hrs = Math.floor(remaining/3600), mins = Math.floor((remaining%3600)/60);
            console.log(`Ep ${episode}/${targetEpisodes} | Îµ: ${blipAgent.epsilon.toFixed(3)} | Blip: ${stats.blipWins} | Bloop: ${stats.bloopWins} | Draws: ${stats.draws} | Goals: ${stats.totalGoals} | ETA: ${hrs}h${mins}m`);
        }
        if (episode % saveEvery === 0) await saveWeights(episode);
    }
    await saveWeights(targetEpisodes);
    console.log('âœ… Training complete!');
}

async function saveWeights(episode) {
    const blipWeights = await blipAgent.exportWeights();
    const saveData = {
        version: 2, aiType: 'dqn', episode, episodeCount: episode,
        timestamp: new Date().toISOString(), trainedAgainst: 'SimpleAI', stats,
        blipAgent: blipWeights, bloopAgent: blipWeights, blip: blipWeights, bloop: blipWeights
    };
    fs.writeFileSync(`${OUTPUT_DIR}/weights-${episode}.json`, JSON.stringify(saveData));
    fs.writeFileSync(`${OUTPUT_DIR}/weights-latest.json`, JSON.stringify(saveData));
    console.log(`ðŸ’¾ Saved to Google Drive: weights-${episode}.json`);
}

train().catch(err => { console.error('Error:', err); process.exit(1); });

In [None]:
# ðŸš€ Start Training!
!node train.js

## ðŸ“¥ Download Weights

After training completes, download your weights from Google Drive:
- Navigate to `My Drive > rl-football-weights`
- Download `weights-latest.json`
- Load it in the browser app!