Added experiment specification examples

arnomoonens · Mar 15, 2018 · a7b98f4 · a7b98f4
1 parent 7155abe
commit a7b98f4
Show file tree

Hide file tree

Showing 20 changed files with 373 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -93,7 +93,7 @@ ENV/
 /DeepRL.sublime-workspace
 Environment/*.json
 *.orig
-*-experiment.json
+/*-experiment.json
 *~
 .*.swp
 .vscode/

diff --git a/README.md b/README.md
@@ -41,15 +41,15 @@ pip install -r requirements.txt
 
 ### Algorithms/experiments
 
-You can run algorithms by passing an experiment specification (in _json_ format) to `main.py`:
+You can run algorithms by passing the path to an experiment specification (which is a file in _json_ format) to `main.py`:
 
 ```Shell
 
-python main.py <experiment_description>
+python main.py <path_to_experiment_specification>
 
 ```
 
-[Example of an experiment specification](./experiment_spec_example.json)
+Examples of experiment specifications can be found in the [_experiment_specs_](./experiment_specs) folder.
 
 ### Statistics
 

diff --git a/experiment_specs/CartPole-v0-A2C-experiment.json b/experiment_specs/CartPole-v0-A2C-experiment.json
@@ -0,0 +1,16 @@
+{
+    "experiment_name": "CartPole-CEM",
+    "environments": {
+        "type": "single",
+        "source": "CartPole-v0"
+    },
+    "agent": {
+        "name": "REINFORCE",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-RE",
+            "video": false,
+            "save_model": true,
+            "monitor": true
+        }
+    }
+}
diff --git a/experiment_specs/DPPO-experiment.json b/experiment_specs/DPPO-experiment.json
@@ -0,0 +1,27 @@
+{
+    "experiment_name": "CartPole-DPPO",
+    "environments": {
+        "type": "single",
+        "source": "CartPole-v0"
+    },
+    "agent": {
+        "name": "DPPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-DPPO",
+            "n_hidden_units": 10,
+            "n_hidden_layers": 1,
+            "gradient_clip_value": 50.0,
+            "batch_size": 64,
+            "learning_rate": 3e-4,
+            "vf_coef": 1.0,
+            "n_local_steps": 512,
+            "n_workers": 4,
+            "cso_epsilon": 0.2,
+            "n_epochs": 10,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	        "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/DTRPO-experiment.json b/experiment_specs/DTRPO-experiment.json
@@ -0,0 +1,27 @@
+{
+    "experiment_name": "CartPole-DPPO",
+    "environments": {
+        "type": "single",
+        "source": "CartPole-v0"
+    },
+    "agent": {
+        "name": "TRPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-DPPO",
+            "n_hidden_units": 10,
+            "n_hidden_layers": 1,
+            "gradient_clip_value": 50.0,
+            "batch_size": 64,
+            "learning_rate": 3e-4,
+            "vf_coef": 1.0,
+            "n_local_steps": 512,
+            "n_workers": 4,
+            "cso_epsilon": 0.2,
+            "n_epochs": 10,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	        "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/FrozenLake-DPPO-experiment.json b/experiment_specs/FrozenLake-DPPO-experiment.json
@@ -0,0 +1,27 @@
+{
+    "experiment_name": "CartPole-DPPO",
+    "environments": {
+        "type": "single",
+        "source": "FrozenLake8x8-v0"
+    },
+    "agent": {
+        "name": "DPPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-DPPO",
+            "n_hidden_units": 10,
+            "n_hidden_layers": 1,
+            "gradient_clip_value": 50.0,
+            "batch_size": 64,
+            "learning_rate": 3e-4,
+            "vf_coef": 1.0,
+            "n_local_steps": 512,
+            "n_workers": 4,
+            "cso_epsilon": 0.2,
+            "n_epochs": 10,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	        "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/FrozenLake-PPO-experiment.json b/experiment_specs/FrozenLake-PPO-experiment.json
@@ -0,0 +1,23 @@
+{
+    "experiment_name": "CartPole-PPO",
+    "environments": {
+        "type": "single",
+        "source": "FrozenLake8x8-v0"
+    },
+    "agent": {
+        "name": "PPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-PPO",
+			"n_hidden": 100,
+			"gradient_clip_value": 50.0,
+			"batch_size": 256,
+			"learning_rate": 3e-4,
+			"vf_coef": 1.0,
+			"n_local_steps": 256,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	    "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_spec_example.json → ...cs/MountainCar-v0-SarsaFA-experiment.json b/experiment_spec_example.json → ...cs/MountainCar-v0-SarsaFA-experiment.json
@@ -1,16 +1,16 @@
 {
-    "experiment_name": "CartPole-SarsaFA",
+    "experiment_name": "MountainCar-SarsaFA",
     "environments": {
         "type": "single",
         "source": "MountainCar-v0"
     },
     "agent": {
         "name": "SarsaFA",
         "args": {
-            "monitor_path": "/tmp/CartPole-v0-SarsaFA",
-            "video": false,
+            "monitor_path": "/tmp/MountainCar-SarsaFA",
+            "video": true,
             "save_model": true,
             "monitor": true
         }
     }
-}
+}
diff --git a/experiment_specs/PPO-experiment.json b/experiment_specs/PPO-experiment.json
@@ -0,0 +1,23 @@
+{
+    "experiment_name": "CartPole-PPO",
+    "environments": {
+        "type": "single",
+        "source": "CartPole-v0"
+    },
+    "agent": {
+        "name": "PPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-PPO",
+			"n_hidden": 10,
+			"gradient_clip_value": 50.0,
+			"batch_size": 64,
+			"learning_rate": 3e-4,
+			"vf_coef": 1.0,
+			"n_local_steps": 2048,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	    "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/Pendulum-A3C-experiment.json b/experiment_specs/Pendulum-A3C-experiment.json
@@ -0,0 +1,20 @@
+{
+    "experiment_name": "CartPole-A3C",
+    "environments": {
+        "type": "single",
+        "source": "Pendulum-v0"
+    },
+    "agent": {
+        "name": "A3C",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-A3C",
+			"n_tasks": 2,
+            "video": false,
+            "save_model": false,
+            "monitor": true,
+			"T_max": 100000,
+     	    "n_iter": 1000,
+			"shared_optimizer": true
+        }
+    }
+}
diff --git a/experiment_specs/Pendulum-DDPG-experiment.json b/experiment_specs/Pendulum-DDPG-experiment.json
@@ -0,0 +1,18 @@
+{
+    "experiment_name": "CartPole-PPO",
+    "environments": {
+        "type": "single",
+        "source": "Pendulum-v0"
+    },
+    "agent": {
+        "name": "DDPG",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-PPO",
+            "video": false,
+            "n_hidden_units": 64,
+            "save_model": true,
+            "monitor": true,
+	        "n_episodes": 400000
+        }
+    }
+}
diff --git a/experiment_specs/Pendulum-PPO-experiment.json b/experiment_specs/Pendulum-PPO-experiment.json
@@ -0,0 +1,24 @@
+{
+    "experiment_name": "CartPole-PPO",
+    "environments": {
+        "type": "single",
+        "source": "Pendulum-v0"
+    },
+    "agent": {
+        "name": "PPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-PPO",
+			"n_hidden_units": 20,
+			"n_hidden_layers": 2,
+			"batch_size": 64,
+			"learning_rate": 3e-4,
+			"vf_coef": 1.0,
+			"entropy_coef": 0.0,
+			"n_local_steps": 2048,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	    "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/Pendulum-v0-DPPO-experiment.json b/experiment_specs/Pendulum-v0-DPPO-experiment.json
@@ -0,0 +1,27 @@
+{
+    "experiment_name": "CartPole-DPPO",
+    "environments": {
+        "type": "single",
+        "source": "Pendulum-v0"
+    },
+    "agent": {
+        "name": "DPPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-DPPO",
+            "n_hidden_units": 10,
+            "n_hidden_layers": 1,
+            "gradient_clip_value": 50.0,
+            "batch_size": 64,
+            "learning_rate": 3e-4,
+            "vf_coef": 1.0,
+            "n_local_steps": 512,
+            "n_workers": 4,
+            "cso_epsilon": 0.2,
+            "n_epochs": 10,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	        "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/PongDeterministic-v4-A2C-experiment.json b/experiment_specs/PongDeterministic-v4-A2C-experiment.json
@@ -0,0 +1,18 @@
+{
+    "experiment_name": "Pong-REINFORCE",
+    "environments": {
+        "type": "single",
+        "source": "PongDeterministic-v4"
+    },
+    "agent": {
+        "name": "A2C",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-KT",
+			"RNN": true,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+			"n_iter": 800000
+        }
+    }
+}
diff --git a/experiment_specs/PongDeterministic-v4-A3C-experiment.json b/experiment_specs/PongDeterministic-v4-A3C-experiment.json
@@ -0,0 +1,20 @@
+{
+    "experiment_name": "Pong-REINFORCE",
+    "environments": {
+        "type": "single",
+        "source": "PongDeterministic-v4"
+    },
+    "agent": {
+        "name": "A3C",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-KT",
+			"RNN": true, 
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+			"T_max": 8000,
+			"n_tasks": 1,
+			"shared_optimizer": false 
+        }
+    }
+}
diff --git a/experiment_specs/PongDeterministic-v4-DPPO-experiment.json b/experiment_specs/PongDeterministic-v4-DPPO-experiment.json
@@ -0,0 +1,18 @@
+{
+    "experiment_name": "CartPole-A2C",
+    "environments": {
+        "type": "single",
+        "source": "PongDeterministic-v4"
+    },
+    "agent": {
+        "name": "DPPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-A2C",
+			"gradient_clip_value": 1.0,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+	    "n_iter": 400000
+        }
+    }
+}
diff --git a/experiment_specs/PongDeterministic-v4-PPO-experiment.json b/experiment_specs/PongDeterministic-v4-PPO-experiment.json
@@ -0,0 +1,18 @@
+{
+    "experiment_name": "Pong-REINFORCE",
+    "environments": {
+        "type": "single",
+        "source": "PongDeterministic-v4"
+    },
+    "agent": {
+        "name": "PPO",
+        "args": {
+            "monitor_path": "/tmp/CartPole-v0-KT",
+			"RNN": true,
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+			"T_max": 8000
+        }
+    }
+}
diff --git a/experiment_specs/PongDeterministic-v4-RE-experiment.json b/experiment_specs/PongDeterministic-v4-RE-experiment.json
@@ -0,0 +1,19 @@
+{
+    "experiment_name": "CartPole-RE",
+    "environments": {
+        "type": "single",
+        "source": "PongDeterministic-v4"
+    },
+    "agent": {
+        "name": "REINFORCE",
+        "args": {
+            "RNN": false,
+            "monitor_path": "/tmp/CartPole-v0-RE",
+            "video": false,
+            "save_model": true,
+            "monitor": true,
+            "n_iter": 200,
+            "switch_at_iter": 10
+        }
+    }
+}