# Supply Chain RL Training with PPO

This notebook trains a PPO agent for supply chain optimization and exports it to TensorFlow Lite for edge deployment.

**Run this in Google Colab for free GPU training!**

In [None]:
# Install required packages
!pip install gymnasium==0.29.1 stable-baselines3==2.2.1 tensorflow==2.15.0
!pip install matplotlib numpy

In [None]:
# Clone the repository (replace with your actual repo URL)
!git clone https://github.com/your-username/supply-chain-rl.git
%cd supply-chain-rl/rl_trainer

In [None]:
# Import the environment and training script
from supply_chain_env import SupplyChainEnv
from train import train_model, convert_to_tflite, test_model
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Test the environment
env = SupplyChainEnv()
obs, _ = env.reset()
print(f"Initial observation: {obs}")
print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")

# Take a few random actions
for i in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    action_names = ["Hold", "Produce", "Ship"]
    print(f"Step {i+1}: Action={action_names[action]}, Reward={reward:.2f}, Obs={obs}")
    if terminated or truncated:
        break

In [None]:
# Train the PPO agent
print("🚀 Starting PPO training...")
model = train_model(timesteps=10000)
print("✅ Training completed!")

In [None]:
# Test the trained model
print("🧪 Testing trained model...")
test_model(episodes=3)

In [None]:
# Convert to TensorFlow Lite
print("📱 Converting to TensorFlow Lite...")
tflite_model = convert_to_tflite()
print(f"✅ TFLite model size: {len(tflite_model)} bytes")

In [None]:
# Download the TFLite model
from google.colab import files

# Download the TensorFlow Lite model
files.download('../edge_agent/models/supply_chain_model.tflite')

print("📥 TensorFlow Lite model downloaded!")
print("📋 Next steps:")
print("1. Upload the .tflite file to your edge_agent/models/ directory")
print("2. Run: docker-compose up --build")
print("3. Open http://localhost:3000 to see the dashboard")

In [None]:
# Visualize training progress (if tensorboard logs exist)
import os
if os.path.exists('tensorboard_logs'):
    %load_ext tensorboard
    %tensorboard --logdir tensorboard_logs
else:
    print("No tensorboard logs found. Training logs would appear here.")