A collection of deep reinforcement learning experiments on the Crafter environment, featuring both DQN and PPO implementations with various improvements.
Crafter is a challenging open-world survival game designed for benchmarking RL agents. It requires learning complex behaviors like resource gathering, crafting, and combat across diverse terrains.
- Python 3.10
- CUDA-capable GPU (optional, but recommended for faster training)
- Conda or Miniconda
conda env create -f environment.yml
conda activate crafter_envpython -c "import crafter; import stable_baselines3; print('Setup successful!')"conda env create -f crafterGA/environment.yml
conda activate crafter_env_gaBaseline PPO:
python train.py --outdir logdir/ppo_baseline --steps 1000000Improved PPO:
python train2.py --outdir logdir/ppo_improved --steps 1000000PPO with Curiosity:
python train3.py --outdir logdir/ppo_curiosity --steps 1000000Baseline DQN:
python train_dqn.py --env_id CrafterPartial-v1 --total_timesteps 1000000DQN with Reward Shaping:
python train_dqn_RShape.py --env_id CrafterPartial-v1 --total_timesteps 1000000DQN with N-step Learning:
python train_dqn_per_nstep1.py --env_id CrafterPartial-v1 --total_timesteps 1000000DQN with Noisy Networks + N-step:
python train_dqn_noisy_nstep.py --env_id CrafterPartial-v1 --total_timesteps 1000000Baseline Training:
python crafterGA/train_ga.py --outdir crafterGA/logdir/crafter_ga_base/final --steps 200Improvement 1 Training:
python crafterGA/train_ga_surv_v3.py --outdir crafterGA/logdir/crafter_ga_imp2/final --steps 200Improvement 2 Training:
python crafterGA/train_ga_surv_v4.py --outdir crafterGA/logdir/crafter_ga_imp3/final --steps 200Baseline DQN:
python eval_crafter_metrics.py --model dqn_crafter_baseline.zip --episodes 20 --logdir logs/dqn_csv
python plot_learning_curve.py --csv logs/dqn_csv/progress.csv --out dqn_base_curve.png
python crafter_eval_summary.py --path logs/dqn_csv/stats.jsonl --out dqn_summary.png --title "DQN Baseline Evaluation"DQN with Reward Shaping:
python eval_crafter_metrics.py --model dqn_crafter_rShape.zip --episodes 20 --logdir logs/dqn_rShape_csv
python plot_learning_curve.py --csv logs/dqn_rShape_csv/progress.csv --out dqn_rShape_curve.png
python crafter_eval_summary.py --path logs/dqn_rShape_csv/stats.jsonl --out dqn_rShape_summary.png --title "DQN Reward Shaping Evaluation"DQN with N-step Learning:
python eval_crafter_metrics.py --model dqn_nstep1.zip --episodes 20 --logdir eval_logs/dqn_nstep
python plot_learning_curve.py --csv logs/dqn_nstep_csv/progress.csv --out dqn_nstep_curve.png
python crafter_eval_summary.py --path eval_logs/dqn_nstep/stats.jsonl --out dqn_nstep_summary.png --title "DQN N-step Evaluation"DQN with Noisy Networks + N-step:
python eval_crafter_metrics.py --model dqn_noisy_nstep.zip --episodes 20 --logdir logs/dqn_noisy_nstep_csv
python plot_learning_curve.py --csv logs/dqn_noisy_nstep_csv/progress.csv --out dqn_noisy_nstep_curve.png
python crafter_eval_summary.py --path logs/dqn_noisy_nstep_csv/stats.jsonl --out dqn_noisy_nstep_summary.png --title "DQN Noisy + N-step Evaluation"Baseline Evaluation:
python crafterGA/pygadEval.py --model_path crafterGA/logdir/crafter_ga_base/final/best_ga_policy.pth --outdir crafterGA/logdir/crafter_ga_eval/base --seed 42Improvment 1 Evaluation:
python crafterGA/pygadEvalImp1.py --model_path crafterGA/logdir/crafter_ga_imp1/final/best_ga_policy.pth --outdir crafterGAlogdir/crafter_ga_eval/imp2/final --seed 42Improvment 2 Evaluation:
python crafterGA/pygadEvalImp1.py --model_path crafterGA/logdir/crafter_ga_imp2/final/best_ga_policy.pth --outdir crafterGA/logdir/crafter_ga_eval/imp3/final --seed 42Animation Creation:
python crafterGA/viewEpisode.py --filename crafterGA/logdir/crafter_ga_eval/base/episode.npzpython crafterGA/viewEpisode.py --filename crafterGA/logdir/crafter_ga_eval/imp2/final/episode.npzpython crafterGA/viewEpisode.py --filename crafterGA/logdir/crafter_ga_eval/imp3/final/episode.npzGraph Creation:
python crafterGA/pygadPlots.pyMonitor PPO training with TensorBoard:
tensorboard --logdir logdir/.
├── environment.yml # Conda environment
├── train.py # Baseline PPO
├── train2.py # Improved PPO
├── train3.py # PPO + Curiosity
├── train_dqn.py # Baseline DQN
├── train_dqn_RShape.py # DQN + Reward Shaping
├── train_dqn_per_nstep1.py # DQN + N-step
├── train_dqn_noisy_nstep.py # DQN + Noisy + N-step
├── crafterGA/ # GA Files
└── logdir/ # Training outputs
Happy Training! 🚀