# 🧠 Training PPO on DiabetesEnv (Simulated Healthcare RL)
Let's train a reinforcement learning agent to recommend treatments for a simulated diabetic patient using PPO!

In [None]:
# 📦 Install dependencies (Colab only)
!pip install stable-baselines3 gymnasium wandb

In [None]:
# 🔁 Import libraries
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from gym.envs.registration import register
from env.diabetes_env import DiabetesEnv

In [None]:
# ✅ Register the environment with Gym (optional)
register(
    id='DiabetesEnv-v0',
    entry_point='env.diabetes_env:DiabetesEnv',
)

env = DiabetesEnv()
check_env(env)  # Check if environment follows Gym interface

In [None]:
# 🎯 Train PPO agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# 💾 Save model
model.save("ppo_diabetes")

In [None]:
# ▶️ Test the trained agent
obs = env.reset()
for _ in range(30):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        break