-
-
Notifications
You must be signed in to change notification settings - Fork 138
Expand file tree
/
Copy pathenv_player.py
More file actions
551 lines (488 loc) · 21.4 KB
/
env_player.py
File metadata and controls
551 lines (488 loc) · 21.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
"""This module defines a player class exposing the Open AI Gym API with utility functions.
"""
from abc import ABC
from threading import Lock
from typing import Dict, List, Optional, Union
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.player.battle_order import BattleOrder, ForfeitBattleOrder
from poke_env.player.openai_api import ActType, ObsType, OpenAIGymEnv
from poke_env.player.player import Player
from poke_env.ps_client.account_configuration import AccountConfiguration
from poke_env.ps_client.server_configuration import ServerConfiguration
from poke_env.teambuilder.teambuilder import Teambuilder
class EnvPlayer(OpenAIGymEnv[ObsType, ActType], ABC):
"""Player exposing the Open AI Gym Env API."""
_ACTION_SPACE: List[int] = []
_DEFAULT_BATTLE_FORMAT = "gen8randombattle"
def __init__(
self,
opponent: Optional[Union[Player, str]],
account_configuration: Optional[AccountConfiguration] = None,
*,
avatar: Optional[int] = None,
battle_format: Optional[str] = None,
log_level: Optional[int] = None,
save_replays: Union[bool, str] = False,
server_configuration: Optional[ServerConfiguration] = None,
start_listening: bool = True,
start_timer_on_battle_start: bool = False,
ping_interval: Optional[float] = 20.0,
ping_timeout: Optional[float] = 20.0,
team: Optional[Union[str, Teambuilder]] = None,
start_challenging: bool = True,
):
"""
:param opponent: Opponent to challenge.
:type opponent: Player or str, optional
:param account_configuration: Player configuration. If empty, defaults to an
automatically generated username with no password. This option must be set
if the server configuration requires authentication.
:type account_configuration: AccountConfiguration, optional
:param avatar: Player avatar id. Optional.
:type avatar: int, optional
:param battle_format: Name of the battle format this player plays. Defaults to
gen8randombattle.
:type battle_format: Optional, str. Default to randombattles, with specifics
varying per class.
:param log_level: The player's logger level.
:type log_level: int. Defaults to logging's default level.
:param save_replays: Whether to save battle replays. Can be a boolean, where
True will lead to replays being saved in a potentially new /replay folder,
or a string representing a folder where replays will be saved.
:type save_replays: bool or str
:param server_configuration: Server configuration. Defaults to Localhost Server
Configuration.
:type server_configuration: ServerConfiguration, optional
:param start_listening: Whether to start listening to the server. Defaults to
True.
:type start_listening: bool
:param start_timer_on_battle_start: Whether to automatically start the battle
timer on battle start. Defaults to False.
:type start_timer_on_battle_start: bool
:param ping_interval: How long between keepalive pings (Important for backend
websockets). If None, disables keepalive entirely.
:type ping_interval: float, optional
:param ping_timeout: How long to wait for a timeout of a specific ping
(important for backend websockets.
Increase only if timeouts occur during runtime).
If None pings will never time out.
:type ping_timeout: float, optional
:param team: The team to use for formats requiring a team. Can be a showdown
team string, a showdown packed team string, of a ShowdownTeam object.
Defaults to None.
:type team: str or Teambuilder, optional
:param start_challenging: Whether to automatically start the challenge loop
or leave it inactive.
:type start_challenging: bool
"""
self._reward_buffer: Dict[AbstractBattle, float] = {}
self._opponent_lock = Lock()
self._opponent = opponent
b_format = self._DEFAULT_BATTLE_FORMAT
if battle_format:
b_format = battle_format
if opponent is None:
start_challenging = False
super().__init__(
account_configuration=account_configuration,
avatar=avatar,
battle_format=b_format,
log_level=log_level,
save_replays=save_replays,
server_configuration=server_configuration,
start_listening=start_listening,
start_timer_on_battle_start=start_timer_on_battle_start,
team=team,
ping_interval=ping_interval,
ping_timeout=ping_timeout,
start_challenging=start_challenging,
)
def reward_computing_helper(
self,
battle: AbstractBattle,
*,
fainted_value: float = 0.0,
hp_value: float = 0.0,
number_of_pokemons: int = 6,
starting_value: float = 0.0,
status_value: float = 0.0,
victory_value: float = 1.0,
) -> float:
"""A helper function to compute rewards.
The reward is computed by computing the value of a game state, and by comparing
it to the last state.
State values are computed by weighting different factor. Fainted pokemons,
their remaining HP, inflicted statuses and winning are taken into account.
For instance, if the last time this function was called for battle A it had
a state value of 8 and this call leads to a value of 9, the returned reward will
be 9 - 8 = 1.
Consider a single battle where each player has 6 pokemons. No opponent pokemon
has fainted, but our team has one fainted pokemon. Three opposing pokemons are
burned. We have one pokemon missing half of its HP, and our fainted pokemon has
no HP left.
The value of this state will be:
- With fainted value: 1, status value: 0.5, hp value: 1:
= - 1 (fainted) + 3 * 0.5 (status) - 1.5 (our hp) = -1
- With fainted value: 3, status value: 0, hp value: 1:
= - 3 + 3 * 0 - 1.5 = -4.5
:param battle: The battle for which to compute rewards.
:type battle: AbstractBattle
:param fainted_value: The reward weight for fainted pokemons. Defaults to 0.
:type fainted_value: float
:param hp_value: The reward weight for hp per pokemon. Defaults to 0.
:type hp_value: float
:param number_of_pokemons: The number of pokemons per team. Defaults to 6.
:type number_of_pokemons: int
:param starting_value: The default reference value evaluation. Defaults to 0.
:type starting_value: float
:param status_value: The reward value per non-fainted status. Defaults to 0.
:type status_value: float
:param victory_value: The reward value for winning. Defaults to 1.
:type victory_value: float
:return: The reward.
:rtype: float
"""
if battle not in self._reward_buffer:
self._reward_buffer[battle] = starting_value
current_value = 0
for mon in battle.team.values():
current_value += mon.current_hp_fraction * hp_value
if mon.fainted:
current_value -= fainted_value
elif mon.status is not None:
current_value -= status_value
current_value += (number_of_pokemons - len(battle.team)) * hp_value
for mon in battle.opponent_team.values():
current_value -= mon.current_hp_fraction * hp_value
if mon.fainted:
current_value += fainted_value
elif mon.status is not None:
current_value += status_value
current_value -= (number_of_pokemons - len(battle.opponent_team)) * hp_value
if battle.won:
current_value += victory_value
elif battle.lost:
current_value -= victory_value
to_return = current_value - self._reward_buffer[battle]
self._reward_buffer[battle] = current_value
return to_return
def action_space_size(self) -> int:
return len(self._ACTION_SPACE)
def get_opponent(self) -> Union[Player, str, List[Player], List[str]]:
with self._opponent_lock:
if self._opponent is None:
raise RuntimeError(
"Unspecified opponent. "
"Specify it in the constructor or use set_opponent"
)
return self._opponent
def set_opponent(self, opponent: Union[Player, str]):
"""
Sets the next opponent to the specified opponent.
:param opponent: The next opponent to challenge
:type opponent: Player or str
"""
with self._opponent_lock:
self._opponent = opponent
def reset_env(
self, opponent: Optional[Union[Player, str]] = None, restart: bool = True
):
"""
Resets the environment to an inactive state: it will forfeit all unfinished
battles, reset the internal battle tracker and optionally change the next
opponent and restart the challenge loop.
:param opponent: The opponent to use for the next battles. If empty it
will not change opponent.
:type opponent: Player or str, optional
:param restart: If True the challenge loop will be restarted before returning,
otherwise the challenge loop will be left inactive and can be
started manually.
:type restart: bool
"""
self.close(purge=False)
self.reset_battles()
if opponent:
self.set_opponent(opponent)
if restart:
self.start_challenging()
class Gen4EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC):
_ACTION_SPACE = list(range(4 + 6))
_DEFAULT_BATTLE_FORMAT = "gen4randombattle"
def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder:
"""Converts actions to move orders.
The conversion is done as follows:
action = -1:
The battle will be forfeited.
0 <= action < 4:
The actionth available move in battle.available_moves is executed.
4 <= action < 10
The action - 4th available switch in battle.available_switches is executed.
If the proposed action is illegal, a random legal move is performed.
:param action: The action to convert.
:type action: int
:param battle: The battle in which to act.
:type battle: Battle
:return: the order to send to the server.
:rtype: str
"""
if action == -1:
return ForfeitBattleOrder()
elif (
action < 4
and action < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(battle.available_moves[action])
elif 0 <= action - 4 < len(battle.available_switches):
return self.agent.create_order(battle.available_switches[action - 4])
else:
return self.agent.choose_random_move(battle)
class Gen5EnvSinglePlayer(Gen4EnvSinglePlayer[ObsType, ActType], ABC):
_DEFAULT_BATTLE_FORMAT = "gen5randombattle"
class Gen6EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC):
_ACTION_SPACE = list(range(2 * 4 + 6))
_DEFAULT_BATTLE_FORMAT = "gen6randombattle"
def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder:
"""Converts actions to move orders.
The conversion is done as follows:
action = -1:
The battle will be forfeited.
0 <= action < 4:
The actionth available move in battle.available_moves is executed.
4 <= action < 8:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
8 <= action < 14
The action - 8th available switch in battle.available_switches is executed.
If the proposed action is illegal, a random legal move is performed.
:param action: The action to convert.
:type action: int
:param battle: The battle in which to act.
:type battle: Battle
:return: the order to send to the server.
:rtype: str
"""
if action == -1:
return ForfeitBattleOrder()
elif (
action < 4
and action < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(battle.available_moves[action])
elif (
battle.can_mega_evolve
and 0 <= action - 4 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 4], mega=True
)
elif 0 <= action - 8 < len(battle.available_switches):
return self.agent.create_order(battle.available_switches[action - 8])
else:
return self.agent.choose_random_move(battle)
class Gen7EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC):
_ACTION_SPACE = list(range(3 * 4 + 6))
_DEFAULT_BATTLE_FORMAT = "gen7randombattle"
def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder:
"""Converts actions to move orders.
The conversion is done as follows:
action = -1:
The battle will be forfeited.
0 <= action < 4:
The actionth available move in battle.available_moves is executed.
4 <= action < 8:
The action - 4th available move in battle.available_moves is executed, with
z-move.
8 <= action < 12:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
12 <= action < 18
The action - 12th available switch in battle.available_switches is executed.
If the proposed action is illegal, a random legal move is performed.
:param action: The action to convert.
:type action: int
:param battle: The battle in which to act.
:type battle: Battle
:return: the order to send to the server.
:rtype: str
"""
if action == -1:
return ForfeitBattleOrder()
elif (
action < 4
and action < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(battle.available_moves[action])
elif (
not battle.force_switch
and battle.can_z_move
and battle.active_pokemon
and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves)
):
return self.agent.create_order(
battle.active_pokemon.available_z_moves[action - 4], z_move=True
)
elif (
battle.can_mega_evolve
and 0 <= action - 8 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 8], mega=True
)
elif 0 <= action - 12 < len(battle.available_switches):
return self.agent.create_order(battle.available_switches[action - 12])
else:
return self.agent.choose_random_move(battle)
class Gen8EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC):
_ACTION_SPACE = list(range(4 * 4 + 6))
_DEFAULT_BATTLE_FORMAT = "gen8randombattle"
def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder:
"""Converts actions to move orders.
The conversion is done as follows:
action = -1:
The battle will be forfeited.
0 <= action < 4:
The actionth available move in battle.available_moves is executed.
4 <= action < 8:
The action - 4th available move in battle.available_moves is executed, with
z-move.
8 <= action < 12:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
8 <= action < 12:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
12 <= action < 16:
The action - 12th available move in battle.available_moves is executed,
while dynamaxing.
16 <= action < 22
The action - 16th available switch in battle.available_switches is executed.
If the proposed action is illegal, a random legal move is performed.
:param action: The action to convert.
:type action: int
:param battle: The battle in which to act.
:type battle: Battle
:return: the order to send to the server.
:rtype: str
"""
if action == -1:
return ForfeitBattleOrder()
elif (
action < 4
and action < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(battle.available_moves[action])
elif (
not battle.force_switch
and battle.can_z_move
and battle.active_pokemon
and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves)
):
return self.agent.create_order(
battle.active_pokemon.available_z_moves[action - 4], z_move=True
)
elif (
battle.can_mega_evolve
and 0 <= action - 8 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 8], mega=True
)
elif (
battle.can_dynamax
and 0 <= action - 12 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 12], dynamax=True
)
elif 0 <= action - 16 < len(battle.available_switches):
return self.agent.create_order(battle.available_switches[action - 16])
else:
return self.agent.choose_random_move(battle)
class Gen9EnvSinglePlayer(EnvPlayer[ObsType, ActType], ABC):
_ACTION_SPACE = list(range(5 * 4 + 6))
_DEFAULT_BATTLE_FORMAT = "gen9randombattle"
def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder:
"""Converts actions to move orders.
The conversion is done as follows:
action = -1:
The battle will be forfeited.
0 <= action < 4:
The actionth available move in battle.available_moves is executed.
4 <= action < 8:
The action - 4th available move in battle.available_moves is executed, with
z-move.
8 <= action < 12:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
8 <= action < 12:
The action - 8th available move in battle.available_moves is executed, with
mega-evolution.
12 <= action < 16:
The action - 12th available move in battle.available_moves is executed,
while dynamaxing.
16 <= action < 20:
The action - 16th available move in battle.available_moves is executed,
while terastallizing.
20 <= action < 26
The action - 20th available switch in battle.available_switches is executed.
If the proposed action is illegal, a random legal move is performed.
:param action: The action to convert.
:type action: int
:param battle: The battle in which to act.
:type battle: Battle
:return: the order to send to the server.
:rtype: str
"""
if action == -1:
return ForfeitBattleOrder()
elif (
action < 4
and action < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(battle.available_moves[action])
elif (
not battle.force_switch
and battle.can_z_move
and battle.active_pokemon
and 0 <= action - 4 < len(battle.active_pokemon.available_z_moves)
):
return self.agent.create_order(
battle.active_pokemon.available_z_moves[action - 4], z_move=True
)
elif (
battle.can_mega_evolve
and 0 <= action - 8 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 8], mega=True
)
elif (
battle.can_dynamax
and 0 <= action - 12 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 12], dynamax=True
)
elif (
battle.can_tera
and 0 <= action - 16 < len(battle.available_moves)
and not battle.force_switch
):
return self.agent.create_order(
battle.available_moves[action - 16], terastallize=True
)
elif 0 <= action - 20 < len(battle.available_switches):
return self.agent.create_order(battle.available_switches[action - 20])
else:
return self.agent.choose_random_move(battle)