This repository has been archived by the owner on Mar 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
MultiLearningRunner.scala
160 lines (153 loc) · 7.82 KB
/
MultiLearningRunner.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package it.unibo.launcher
import ch.qos.logback.classic.Level
import it.unibo.Logging
import it.unibo.alchemist.Alchemist
import it.unibo.learning.Q.MutableQ
import it.unibo.scafi.casestudy.GlobalStore
import it.unibo.scafi.casestudy.algorithm.gradient.TemporalGradientRL.{Action, History}
import it.unibo.storage.LocalStorage
import org.slf4j.LoggerFactory
import org.yaml.snakeyaml.Yaml
import upickle.default.read
import java.io.FileInputStream
import java.util.concurrent.{CountDownLatch, Executors, Semaphore}
import scala.collection.compat.immutable.ArraySeq
import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future}
import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, SeqHasAsJava}
/** Main used to run multiple simulation with reinforcement learning. It could be configured using a file or via main
* args. The file should be structured as: ["key", "value", "key-2", "value-2", "key-3", "value-3"] The accepted key
* are: "greedyEpisodes", "10", "gamma", "[0.9]", "alphaBeta", "[[0.5,0.01]]", "epsilonCombination",
* "[[0.5,50],[0.01,40]]", "bucketsMax", "[[32,4]]"]
* - program: with the base alchemist configuration file. e.g "program", "test.yaml"
* - learningEpisodes: how many episodes are used to train the Q table?. e.g "learningEpisodes", "100"
* - greedyEpisodes: in how many episodes the agent follow a greedy policy? e.g. "greedyEpisodes", "10"
* - gamma: value for Q learning. e.g "gamma", [0.9, 0.1] ==> accept an array of double
* - alphaBeta: value to tune Hysteric Q learning. e.g. "alphaBeta", "[[0.5, 0.1], [0.1, 0.01]]" ==> accept an array
* of tuple
* - epsilonCombination: value to rule the epsilon decay. e.g. "epislonCombination", "[[0.5, 50]]" ==> accept an
* array of tuple. each tuple expresses [epsilon_0, decay factor]
* - bucketsMax: used to tune the discretisation process. "bucketsMax", "[[32, 4]]" ==> accept an array of tuple.
* each tuple expresses: [buckets, radiusMultiplier]
* - useFile: it is used to load the Q table from file or not. The q table file should have the same simulation id
* of the current simulation
*
* This will launch |alphaBeta| * |gamma| * |epsilonCombination| * |bucketsMax| simulations
*
* An example of that file is in launch.txt.
*
* If you use gradle, you can launch these simulation with: ./gradlew startMultipleLearning -PXfile="launch.txt" If you
* launch this application using java, the argument should be passed as: name value
*/
@SuppressWarnings(Array("org.wartremover.warts.All")) //because we have to deal with java world
object MultiLearningRunner extends App {
LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME).as[ch.qos.logback.classic.Logger].setLevel(Level.WARN)
val configuration: Seq[String] = {
if (args.contains("file")) {
val fileToRead = args(args.indexOf("file") + 1)
upickle.default.read[List[String]](os.read(os.pwd / fileToRead))
} else {
ArraySeq.unsafeWrapArray(args)
}
}
private val storage = new LocalStorage[String]("table")
private val yaml = new Yaml()
private val baseFolder = "src/main/yaml/"
private val experimentName = findInArgs("program").getOrElse("swapSourceGradientMultiEnvironment.yml")
private val startingFile = s"$baseFolder$experimentName"
val dir = os.temp.dir(prefix = "simulations")
val gamma = findInArgs("gamma").map(read[List[Double]](_)).getOrElse(List(0.9, 0.5, 0.99))
val alphaBetaCombination =
findInArgs("alphaBeta").map(read[List[(Double, Double)]](_)).getOrElse(List((0.5, 0.1), (0.1, 0.01)))
val epsilonCombination = findInArgs("epsilonCombination")
.map(read[List[(Double, Int)]](_))
.getOrElse(List((0.01, 1000), (0.03, 1000), (0.1, 500), (0.2, 400)))
val bucketsAndMax =
findInArgs("bucketsMax").map(read[List[(Int, Int)]](_)).getOrElse(List((32, 4), (64, 4), (128, 5)))
val learningEpisodes = findInArgs("learningEpisodes")
val greedyEpisodes = findInArgs("greedyEpisodes")
Logging().warn("---- RECAP -----")
Logging().warn(s"- Program = $startingFile")
Logging().warn(s"- Gamma = $gamma")
Logging().warn(s"- Alpha and Beta = $alphaBetaCombination")
Logging().warn(s"- epsilon = $epsilonCombination")
Logging().warn(s"- bucket count and max range = $bucketsAndMax")
Logging().warn(s"- has learning episodes?: ${learningEpisodes.nonEmpty}")
Logging().warn(s"- has greedy episodes?: ${greedyEpisodes.nonEmpty}")
Logging().warn("--- END ------")
val allSimulations = for {
((alpha, beta), i) <- alphaBetaCombination.zipWithIndex
((epsilon, decay), j) <- epsilonCombination.zipWithIndex
((buckets, max), k) <- bucketsAndMax.zipWithIndex
(gamma, z) <- gamma.zipWithIndex
} yield {
def suffix = s"$alpha-$beta-$epsilon-$decay-$buckets-$max-$gamma"
def suffixNumber = s"$i$j$k$z"
Logging().warn(s"Prepare: $suffix")
val base = baseYaml
val molecules = base.dict.get("deployments").head.dict.get("contents").list.asScala.map(_.dict)
val variables = base.dict.get("variables").dict
val exportsSection = base.get("export").list.get(0).dict
learningEpisodes.foreach { length =>
variables.dict.get("learning_episodes").dict.put("formula", length.toDouble)
}
greedyEpisodes.foreach { length =>
variables.dict.get("last_greedy_episodes").dict.put("formula", length.toDouble)
}
def findMoleculeAndUpdate(name: String, value: String): Unit =
molecules.filter(_.get("molecule") == name).foreach(_.put("concentration", value))
/// Put molecules
findMoleculeAndUpdate("beta", independent(beta))
findMoleculeAndUpdate("alpha", independent(alpha))
findMoleculeAndUpdate("epsilon", decayWith(epsilon, decay))
findMoleculeAndUpdate("buckets", buckets.toDouble.toString)
findMoleculeAndUpdate("gamma", gamma.toString)
findMoleculeAndUpdate("maxRadiusMultiplier", max.toDouble.toString)
// Put simulation maps
val simulationId = Map("molecule" -> "simulation_id", "concentration" -> s"\"$suffixNumber\"").asJava
base.dict.get("deployments").head.dict.get("contents").list.add(simulationId)
exportsSection.put(
"parameters",
List(s"gradientExperiments-$suffix-$suffixNumber", 1.0, s"./data/$suffixNumber").asJava
)
val file = dir / s"sim-$suffix-$suffixNumber.yml"
os.write.over(file, yaml.dump(base))
findInArgs("useFile") match {
case Some("true") =>
GlobalStore.store(suffixNumber, storage.load[MutableQ[History, Action]](suffixNumber).withDefault(0.0))
case _ => GlobalStore.store(suffixNumber, new MutableQ[History, Action](Map.empty.withDefault(_ => 0.0)))
}
file
}
val howMany = Runtime.getRuntime.availableProcessors
implicit val executionContext: ExecutionContextExecutor =
ExecutionContext.fromExecutor(Executors.newFixedThreadPool(howMany))
val synch = new Semaphore(howMany)
val latch = new CountDownLatch(allSimulations.size)
allSimulations
.foreach { file =>
synch.acquire()
Logging().warn(s"Launch :${file.toIO.getName}")
Future {
Alchemist.main(Array("-y", file.toString(), "-var", "episode", "-p", "1", "-i", "1", "-hl"))
synch.release()
latch.countDown()
Logging().warn(s"End: $file")
}
}
latch.await()
System.exit(0)
private def findInArgs(name: String): Option[String] = {
configuration.zipWithIndex
.find(_._1 == name)
.flatMap { case (_, index) => configuration.zipWithIndex.map { case (v, k) => k -> v }.toMap.get(index + 1) }
}
private def baseYaml = {
val loader = new FileInputStream(startingFile)
val result = yaml.load[java.util.Map[String, Object]](loader)
loader.close()
result
}
private def independent(value: Any): String = s"it.unibo.learning.TimeVariable.independent($value)"
private def decayWith(value: Any, decay: Any): String =
s"it.unibo.learning.TimeVariable.exponentialDecayFunction($value, $decay)"
}