-
Notifications
You must be signed in to change notification settings - Fork 4.1k
/
Academy.cs
638 lines (552 loc) · 22.4 KB
/
Academy.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
using System;
using UnityEngine;
using System.Collections.Generic;
#if UNITY_EDITOR
using UnityEditor;
#endif
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
using Unity.MLAgents.SideChannels;
using Unity.Barracuda;
/**
* Welcome to Unity Machine Learning Agents (ML-Agents).
*
* The ML-Agents toolkit contains four entities: Academy, Agent, Communicator and
* Python API. The academy and connected agents live within
* a learning environment (herein called Environment), while the communicator
* manages the communication between the learning environment and the Python
* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/docs/
*/
namespace Unity.MLAgents
{
/// <summary>
/// Helper class to step the Academy during FixedUpdate phase.
/// </summary>
internal class AcademyFixedUpdateStepper : MonoBehaviour
{
void FixedUpdate()
{
// Check if the stepper belongs to the current Academy and destroy it if it's not.
// This is to prevent from having leaked stepper from previous runs.
if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this))
{
Destroy(this.gameObject);
}
else
{
Academy.Instance.EnvironmentStep();
}
}
}
/// <summary>
/// The Academy singleton manages agent training and decision making.
/// </summary>
/// <remarks>
/// Access the Academy singleton through the <see cref="Instance"/>
/// property. The Academy instance is initialized the first time it is accessed (which will
/// typically be by the first <see cref="Agent"/> initialized in a scene).
///
/// At initialization, the Academy attempts to connect to the Python training process through
/// the external communicator. If successful, the training process can train <see cref="Agent"/>
/// instances. When you set an agent's <see cref="BehaviorParameters.BehaviorType"/> setting
/// to <see cref="BehaviorType.Default"/>, the agent exchanges data with the training process
/// to make decisions. If no training process is available, agents with the default behavior
/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{
/// <summary>
/// Communication protocol version.
/// When connecting to python, this must be compatible with UnityEnvironment.API_VERSION.
/// We follow semantic versioning on the communication version, so existing
/// functionality will work as long the major versions match.
/// This should be changed whenever a change is made to the communication protocol.
/// </summary>
const string k_ApiVersion = "1.0.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.
/// This must match the version string in package.json and is checked in a unit test.
/// </summary>
internal const string k_PackageVersion = "1.0.6";
const int k_EditorTrainingPort = 5004;
const string k_PortCommandLineFlag = "--mlagents-port";
// Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());
/// <summary>
///Reports whether the Academy has been initialized yet.
/// </summary>
/// <value><c>True</c> if the Academy is initialized, <c>false</c> otherwise.</value>
public static bool IsInitialized
{
get { return s_Lazy.IsValueCreated; }
}
/// <summary>
/// The singleton Academy object.
/// </summary>
/// <value>Getting the instance initializes the Academy, if necessary.</value>
public static Academy Instance { get { return s_Lazy.Value; } }
// Fields not provided in the Inspector.
/// <summary>
/// Reports whether or not the communicator is on.
/// </summary>
/// <seealso cref="ICommunicator"/>
/// <value>
/// <c>True</c>, if communicator is on, <c>false</c> otherwise.
/// </value>
public bool IsCommunicatorOn
{
get { return Communicator != null; }
}
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int m_EpisodeCount;
/// The number of steps completed within the current episode. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// <see cref="EnvironmentReset"/>.
int m_StepCount;
/// The number of total number of steps completed during the whole simulation. Incremented
/// each time a step is taken in the environment.
int m_TotalStepCount;
/// Pointer to the communicator currently in use by the Academy.
internal ICommunicator Communicator;
bool m_Initialized;
List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;
// Detect an Academy step called by user code that is also called by the Academy.
private RecursionChecker m_StepRecursionChecker = new RecursionChecker("EnvironmentStep");
// Random seed used for inference.
int m_InferenceSeed;
/// <summary>
/// Set the random seed used for inference. This should be set before any Agents are added
/// to the scene. The seed is passed to the ModelRunner constructor, and incremented each
/// time a new ModelRunner is created.
/// </summary>
public int InferenceSeed
{
set { m_InferenceSeed = value; }
}
/// <summary>
/// Returns the RLCapabilities of the python client that the unity process is connected to.
/// </summary>
internal UnityRLCapabilities TrainerCapabilities { get; set; }
// The Academy uses a series of events to communicate with agents
// to facilitate synchronization. More specifically, it ensures
// that all the agents perform their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).
// Signals to all the Agents at each environment step so they can use
// their Policy to decide on their next action.
internal event Action DecideAction;
// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals to the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
/// <summary>
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
/// This is a good time for an <see cref="Agent"/> to decide if it would like to
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
/// for this step. Any other pre-step setup could be done during this even as well.
/// </summary>
public event Action<int> AgentPreStep;
// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.
internal event Action AgentSendState;
// Signals to all the agents at each environment step so they can act if
// they have requested a decision.
internal event Action AgentAct;
// Signals to all the agents each time the Academy force resets.
internal event Action AgentForceReset;
/// <summary>
/// Signals that the Academy has been reset by the training process.
/// </summary>
public event Action OnEnvironmentReset;
AcademyFixedUpdateStepper m_FixedUpdateStepper;
GameObject m_StepperObject;
/// <summary>
/// Private constructor called the first time the Academy is used.
/// Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.
/// </summary>
Academy()
{
Application.quitting += Dispose;
LazyInitialize();
#if UNITY_EDITOR
EditorApplication.playModeStateChanged += HandleOnPlayModeChanged;
#endif
}
#if UNITY_EDITOR
/// <summary>
/// Clean up the Academy when switching from edit mode to play mode
/// </summary>
/// <param name="state">State.</param>
void HandleOnPlayModeChanged(PlayModeStateChange state)
{
if (state == PlayModeStateChange.ExitingEditMode)
{
Dispose();
}
}
#endif
/// <summary>
/// Initialize the Academy if it hasn't already been initialized.
/// This method is always safe to call; it will have no effect if the Academy is already
/// initialized.
/// </summary>
internal void LazyInitialize()
{
if (!m_Initialized)
{
InitializeEnvironment();
m_Initialized = true;
}
}
/// <summary>
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating
/// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep().
/// </summary>
void EnableAutomaticStepping()
{
if (m_FixedUpdateStepper != null)
{
return;
}
m_StepperObject = new GameObject("AcademyFixedUpdateStepper");
// Don't show this object in the hierarchy
m_StepperObject.hideFlags = HideFlags.HideInHierarchy;
m_FixedUpdateStepper = m_StepperObject.AddComponent<AcademyFixedUpdateStepper>();
try
{
// This try-catch is because DontDestroyOnLoad cannot be used in Editor Tests
GameObject.DontDestroyOnLoad(m_StepperObject);
}
catch {}
}
/// <summary>
/// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be
/// stepped manually by the user by calling Academy.EnvironmentStep().
/// </summary>
void DisableAutomaticStepping()
{
if (m_FixedUpdateStepper == null)
{
return;
}
m_FixedUpdateStepper = null;
if (Application.isEditor)
{
UnityEngine.Object.DestroyImmediate(m_StepperObject);
}
else
{
UnityEngine.Object.Destroy(m_StepperObject);
}
m_StepperObject = null;
}
/// <summary>
/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase.
/// </summary>
/// <value>Set <c>true</c> to enable automatic stepping; <c>false</c> to disable.</value>
public bool AutomaticSteppingEnabled
{
get { return m_FixedUpdateStepper != null; }
set
{
if (value)
{
EnableAutomaticStepping();
}
else
{
DisableAutomaticStepping();
}
}
}
// Used to read Python-provided environment parameters
static int ReadPortFromArgs()
{
var args = Environment.GetCommandLineArgs();
var inputPort = "";
for (var i = 0; i < args.Length; i++)
{
if (args[i] == k_PortCommandLineFlag)
{
inputPort = args[i + 1];
}
}
try
{
return int.Parse(inputPort);
}
catch
{
// No arg passed, or malformed port number.
#if UNITY_EDITOR
// Try connecting on the default editor port
return k_EditorTrainingPort;
#else
// This is an executable, so we don't try to connect.
return -1;
#endif
}
}
EnvironmentParameters m_EnvironmentParameters;
StatsRecorder m_StatsRecorder;
/// <summary>
/// Returns the <see cref="EnvironmentParameters"/> instance. If training
/// features such as Curriculum Learning or Environment Parameter Randomization are used,
/// then the values of the parameters generated from the training process can be
/// retrieved here.
/// </summary>
/// <returns></returns>
public EnvironmentParameters EnvironmentParameters
{
get { return m_EnvironmentParameters; }
}
/// <summary>
/// Returns the <see cref="StatsRecorder"/> instance. This instance can be used
/// to record any statistics from the Unity environment.
/// </summary>
/// <returns></returns>
public StatsRecorder StatsRecorder
{
get { return m_StatsRecorder; }
}
/// <summary>
/// Initializes the environment, configures it and initializes the Academy.
/// </summary>
void InitializeEnvironment()
{
TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
TimerStack.Instance.AddMetadata("com.unity.ml-agents_version", k_PackageVersion);
EnableAutomaticStepping();
SideChannelsManager.RegisterSideChannel(new EngineConfigurationChannel());
m_EnvironmentParameters = new EnvironmentParameters();
m_StatsRecorder = new StatsRecorder();
// Try to launch the communicator by using the arguments passed at launch
var port = ReadPortFromArgs();
if (port > 0)
{
Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = port
}
);
}
if (Communicator != null)
{
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
try
{
var unityRlInitParameters = Communicator.Initialize(
new CommunicatorInitParameters
{
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
});
UnityEngine.Random.InitState(unityRlInitParameters.seed);
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
}
catch
{
Debug.Log($"" +
$"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
"Will perform inference instead."
);
Communicator = null;
}
if (Communicator != null)
{
Communicator.QuitCommandReceived += OnQuitCommandReceived;
Communicator.ResetCommandReceived += OnResetCommand;
}
}
// If a communicator is enabled/provided, then we assume we are in
// training mode. In the absence of a communicator, we assume we are
// in inference mode.
ResetActions();
}
void ResetActions()
{
DecideAction = () => {};
DestroyAction = () => {};
AgentPreStep = i => {};
AgentSendState = () => {};
AgentAct = () => {};
AgentForceReset = () => {};
OnEnvironmentReset = () => {};
}
static void OnQuitCommandReceived()
{
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
Application.Quit();
}
void OnResetCommand()
{
ForcedFullReset();
}
/// <summary>
/// The current episode count.
/// </summary>
/// <value>
/// Current episode number.
/// </value>
public int EpisodeCount
{
get { return m_EpisodeCount; }
}
/// <summary>
/// The current step count (within the current episode).
/// </summary>
/// <value>
/// Current step count.
/// </value>
public int StepCount
{
get { return m_StepCount; }
}
/// <summary>
/// Returns the total step count.
/// </summary>
/// <value>
/// Total step count.
/// </value>
public int TotalStepCount
{
get { return m_TotalStepCount; }
}
/// <summary>
/// Forces the full reset. The done flags are not affected. Is either
/// called the first reset at inference and every external reset
/// at training.
/// </summary>
void ForcedFullReset()
{
EnvironmentReset();
AgentForceReset?.Invoke();
m_HadFirstReset = true;
}
/// <summary>
/// Performs a single environment update of the Academy and Agent
/// objects within the environment.
/// </summary>
public void EnvironmentStep()
{
using (m_StepRecursionChecker.Start())
{
if (!m_HadFirstReset)
{
ForcedFullReset();
}
AgentPreStep?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelsManager.GetSideChannelMessage();
}
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();
}
}
}
/// <summary>
/// Resets the environment, including the Academy.
/// </summary>
void EnvironmentReset()
{
m_StepCount = 0;
m_EpisodeCount++;
OnEnvironmentReset?.Invoke();
}
/// <summary>
/// Creates or retrieves an existing ModelRunner that uses the same
/// NNModel and the InferenceDevice as provided.
/// </summary>
/// <param name="model">The NNModel the ModelRunner must use.</param>
/// <param name="brainParameters">The BrainParameters used to create the ModelRunner.</param>
/// <param name="inferenceDevice">
/// The inference device (CPU or GPU) the ModelRunner will use.
/// </param>
/// <returns> The ModelRunner compatible with the input settings.</returns>
internal ModelRunner GetOrCreateModelRunner(
NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
{
var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
if (modelRunner == null)
{
modelRunner = new ModelRunner(model, brainParameters, inferenceDevice, m_InferenceSeed);
m_ModelRunners.Add(modelRunner);
m_InferenceSeed++;
}
return modelRunner;
}
/// <summary>
/// Shut down the Academy.
/// </summary>
public void Dispose()
{
DisableAutomaticStepping();
// Signal to listeners that the academy is being destroyed now
DestroyAction?.Invoke();
Communicator?.Dispose();
Communicator = null;
m_EnvironmentParameters.Dispose();
m_StatsRecorder.Dispose();
SideChannelsManager.UnregisterAllSideChannels(); // unregister custom side channels
if (m_ModelRunners != null)
{
foreach (var mr in m_ModelRunners)
{
mr.Dispose();
}
m_ModelRunners = null;
}
// Clear out the actions so we're not keeping references to any old objects
ResetActions();
// TODO - Pass worker ID or some other identifier,
// so that multiple envs won't overwrite each others stats.
TimerStack.Instance.SaveJsonTimers();
m_Initialized = false;
// Reset the Lazy instance
s_Lazy = new Lazy<Academy>(() => new Academy());
}
/// <summary>
/// Check if the input AcademyFixedUpdateStepper belongs to this Academy.
/// </summary>
internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper)
{
return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject);
}
}
}