In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Terra Scout - Kaggle GPU Training\n",
    "\n",
    "Train the diamond-finding agent using Kaggle's free GPU.\n",
    "\n",
    "**Setup:**\n",
    "1. Enable GPU: Settings â†’ Accelerator â†’ GPU T4 x2\n",
    "2. Run all cells\n",
    "3. Download trained model when complete"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 1: Install dependencies\n",
    "!pip install stable-baselines3[extra] gymnasium torch numpy tensorboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 2: Check GPU\n",
    "import torch\n",
    "print(f\"PyTorch: {torch.__version__}\")\n",
    "print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
    "if torch.cuda.is_available():\n",
    "    print(f\"GPU: {torch.cuda.get_device_name(0)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 3: Create simplified environment for Kaggle\n",
    "# (No Minecraft connection - simulated environment)\n",
    "\n",
    "import gymnasium as gym\n",
    "from gymnasium import spaces\n",
    "import numpy as np\n",
    "\n",
    "class TerraScoutSimEnv(gym.Env):\n",
    "    \"\"\"Simulated Terra Scout for offline training.\"\"\"\n",
    "    \n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        \n",
    "        self.action_space = spaces.Discrete(20)\n",
    "        self.observation_space = spaces.Box(-np.inf, np.inf, (35,), np.float32)\n",
    "        \n",
    "        self.max_steps = 2000\n",
    "        self.current_step = 0\n",
    "        self.y_level = 64\n",
    "        self.diamond_chance = 0.001  # Base chance\n",
    "        self.visited = set()\n",
    "        self.position = [0, 64, 0]\n",
    "    \n",
    "    def reset(self, seed=None, options=None):\n",
    "        super().reset(seed=seed)\n",
    "        self.current_step = 0\n",
    "        self.y_level = 64\n",
    "        self.visited.clear()\n",
    "        self.position = [0, 64, 0]\n",
    "        return self._get_obs(), {}\n",
    "    \n",
    "    def _get_obs(self):\n",
    "        obs = np.zeros(35, dtype=np.float32)\n",
    "        obs[0] = self.position[0] / 100\n",
    "        obs[1] = self.position[1] / 64\n",
    "        obs[2] = self.position[2] / 100\n",
    "        obs[3] = 1.0  # Health\n",
    "        obs[4] = 1.0  # Food\n",
    "        obs[5] = float(self.y_level <= -50)  # At diamond level\n",
    "        obs[6] = float(-59 <= self.y_level <= -54)  # Optimal Y\n",
    "        return obs\n",
    "    \n",
    "    def step(self, action):\n",
    "        self.current_step += 1\n",
    "        reward = -0.001\n",
    "        done = False\n",
    "        \n",
    "        # Movement simulation\n",
    "        if action == 7:  # descend\n",
    "            self.y_level -= 1\n",
    "            self.position[1] = self.y_level\n",
    "            if self.y_level < 64:\n",
    "                reward += 0.01\n",
    "        elif action in [8, 9, 10]:  # mining actions\n",
    "            if self.y_level <= -50:\n",
    "                reward += 0.02\n",
    "                # Chance to find diamond at optimal Y\n",
    "                if -59 <= self.y_level <= -54:\n",
    "                    if np.random.random() < 0.005:  # 0.5% per mining action\n",
    "                        reward += 1000\n",
    "                        done = True\n",
    "                        print(\"ðŸ’Ž Diamond found!\")\n",
    "        elif action == 12:  # explore cave\n",
    "            if self.y_level <= 0:\n",
    "                reward += 0.01\n",
    "        \n",
    "        # Y-level rewards\n",
    "        if self.y_level <= -50:\n",
    "            reward += 0.02\n",
    "        if -59 <= self.y_level <= -54:\n",
    "            reward += 0.05\n",
    "        \n",
    "        # Exploration\n",
    "        pos_key = (int(self.position[0]), int(self.position[1]))\n",
    "        if pos_key not in self.visited:\n",
    "            reward += 0.01\n",
    "            self.visited.add(pos_key)\n",
    "        \n",
    "        truncated = self.current_step >= self.max_steps\n",
    "        \n",
    "        return self._get_obs(), reward, done, truncated, {\"y_level\": self.y_level}\n",
    "\n",
    "# Test environment\n",
    "env = TerraScoutSimEnv()\n",
    "obs, _ = env.reset()\n",
    "print(f\"Observation shape: {obs.shape}\")\n",
    "print(f\"Action space: {env.action_space}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 4: Training with PPO\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "import os\n",
    "\n",
    "class TrainingCallback(BaseCallback):\n",
    "    def __init__(self, verbose=0):\n",
    "        super().__init__(verbose)\n",
    "        self.episode_rewards = []\n",
    "        self.episode_count = 0\n",
    "        self.current_reward = 0\n",
    "        self.diamonds_found = 0\n",
    "    \n",
    "    def _on_step(self):\n",
    "        self.current_reward += self.locals.get('rewards', [0])[0]\n",
    "        \n",
    "        if self.locals.get('dones', [False])[0]:\n",
    "            self.episode_count += 1\n",
    "            self.episode_rewards.append(self.current_reward)\n",
    "            \n",
    "            if self.current_reward > 500:\n",
    "                self.diamonds_found += 1\n",
    "            \n",
    "            if self.episode_count % 100 == 0:\n",
    "                avg = np.mean(self.episode_rewards[-100:])\n",
    "                print(f\"Episode {self.episode_count}: avg_reward={avg:.2f}, diamonds={self.diamonds_found}\")\n",
    "            \n",
    "            self.current_reward = 0\n",
    "        return True\n",
    "\n",
    "# Create model\n",
    "env = TerraScoutSimEnv()\n",
    "model = PPO(\n",
    "    \"MlpPolicy\",\n",
    "    env,\n",
    "    learning_rate=3e-4,\n",
    "    n_steps=2048,\n",
    "    batch_size=64,\n",
    "    n_epochs=10,\n",
    "    gamma=0.99,\n",
    "    verbose=0,\n",
    "    device=\"cuda\" if torch.cuda.is_available() else \"cpu\",\n",
    "    tensorboard_log=\"./logs/\"\n",
    ")\n",
    "\n",
    "print(f\"Training on: {model.device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 5: Train!\n",
    "TOTAL_TIMESTEPS = 100_000  # Increase for better results\n",
    "\n",
    "callback = TrainingCallback()\n",
    "model.learn(\n",
    "    total_timesteps=TOTAL_TIMESTEPS,\n",
    "    callback=callback,\n",
    "    progress_bar=True\n",
    ")\n",
    "\n",
    "print(f\"\\nTraining complete!\")\n",
    "print(f\"Total episodes: {callback.episode_count}\")\n",
    "print(f\"Diamonds found: {callback.diamonds_found}\")\n",
    "print(f\"Average reward: {np.mean(callback.episode_rewards):.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 6: Save model\n",
    "os.makedirs(\"models\", exist_ok=True)\n",
    "model.save(\"models/terra_scout_kaggle\")\n",
    "print(\"Model saved to models/terra_scout_kaggle.zip\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 7: Evaluate\n",
    "env = TerraScoutSimEnv()\n",
    "model = PPO.load(\"models/terra_scout_kaggle\")\n",
    "\n",
    "eval_episodes = 100\n",
    "rewards = []\n",
    "diamonds = 0\n",
    "\n",
    "for ep in range(eval_episodes):\n",
    "    obs, _ = env.reset()\n",
    "    total_reward = 0\n",
    "    done = False\n",
    "    \n",
    "    while not done:\n",
    "        action, _ = model.predict(obs, deterministic=True)\n",
    "        obs, reward, terminated, truncated, info = env.step(action)\n",
    "        total_reward += reward\n",
    "        done = terminated or truncated\n",
    "    \n",
    "    rewards.append(total_reward)\n",
    "    if total_reward > 500:\n",
    "        diamonds += 1\n",
    "\n",
    "print(f\"Evaluation Results ({eval_episodes} episodes):\")\n",
    "print(f\"  Mean reward: {np.mean(rewards):.2f}\")\n",
    "print(f\"  Diamond rate: {diamonds}/{eval_episodes} ({100*diamonds/eval_episodes:.1f}%)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 8: Download model\n",
    "from IPython.display import FileLink\n",
    "FileLink('models/terra_scout_kaggle.zip')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}