## Game Plan:

We'll have 2 bots play each other 10000 times. Full matches. A couple things I'll need to do for each battle (each side):

1. Generate 6 Pokemon with random base stats (e.g. from 40 to 180); distribute EVs accordingly.

2. Generate 4 random moves for each Pokemon, with random stats (e.g. type, power, accuracy, stat boost, status inflicted, recoil).

    2a. Stat boost 000000 = +1 attack, 000001 = -1 attack, 000010 = +1 attack, etc.
    
    2b. Status 000 = nothing, 001 = sleep, 010 = para, etc.
    
    2c. For recoil, factor in Recover/Roost/etc. as negative recoil
    
3. Generate typings for the Pokemon. Will need a lot more bits for dual types (e.g. Dark + Fighting).
    
4. Each Pokemon will have 9 choices at max: one of the 4 moves, or switch to another Pokemon in the party.

In [19]:
import random
def generate_random_pokemon():
    stats = ['HP', 'Attack', 'Defense', 'Special Attack', 'Special Defense', 'Speed']
    pokemon = {stat: {'Base': random.randint(40, 180), 'EV': 0} for stat in stats}
    
    total_evs = 510
    while total_evs > 0:
        stat = random.choice(stats)
        ev_increase = min(random.randint(1, 252), total_evs, 252 - pokemon[stat]['EV'])
        pokemon[stat]['EV'] += ev_increase
        total_evs -= ev_increase

    return pokemon

type_effectiveness = {
    'Normal': {'Rock': 0.5, 'Ghost': 0, 'Steel': 0.5},
    'Fire': {'Fire': 0.5, 'Water': 0.5, 'Grass': 2, 'Ice': 2, 'Bug': 2, 'Rock': 0.5, 'Dragon': 0.5, 'Steel': 2},
    'Water': {'Fire': 2, 'Water': 0.5, 'Grass': 0.5, 'Ground': 2, 'Rock': 2, 'Dragon': 0.5},
    'Electric': {'Water': 2, 'Electric': 0.5, 'Grass': 0.5, 'Ground': 0, 'Flying': 2, 'Dragon': 0.5},
    'Grass': {'Fire': 0.5, 'Water': 2, 'Grass': 0.5, 'Poison': 0.5, 'Ground': 2, 'Flying': 0.5, 'Bug': 0.5, 
              'Rock': 2, 'Dragon': 0.5, 'Steel': 0.5},
    'Ice': {'Fire': 0.5, 'Water': 0.5, 'Grass': 2, 'Ice': 0.5, 'Ground': 2, 'Flying': 2, 'Dragon': 2, 'Steel': 0.5},
    'Fighting': {'Normal': 2, 'Ice': 2, 'Poison': 0.5, 'Flying': 0.5, 'Psychic': 0.5, 'Bug': 0.5, 'Rock': 2, 
                 'Ghost': 0, 'Dark': 2, 'Steel': 2},
    'Poison': {'Grass': 2, 'Poison': 0.5, 'Ground': 0.5, 'Rock': 0.5, 'Ghost': 0.5, 'Steel': 0},
    'Ground': {'Fire': 2, 'Electric': 2, 'Grass': 0.5, 'Poison': 2, 'Flying': 0, 'Bug': 0.5, 'Rock': 2, 'Steel': 2},
    'Flying': {'Electric': 0.5, 'Grass': 2, 'Fighting': 2, 'Bug': 2, 'Rock': 0.5, 'Steel': 0.5},
    'Psychic': {'Fighting': 2, 'Poison': 2, 'Psychic': 0.5, 'Dark': 0, 'Steel': 0.5},
    'Bug': {'Fire': 0.5, 'Grass': 2, 'Fighting': 0.5, 'Poison': 0.5, 'Flying': 0.5, 'Psychic': 2, 'Ghost': 0.5, 
            'Dark': 2, 'Steel': 0.5},
    'Rock': {'Fire': 2, 'Ice': 2, 'Fighting': 0.5, 'Ground': 0.5, 'Flying': 2, 'Bug': 2, 'Steel': 0.5},
    'Ghost': {'Normal': 0, 'Psychic': 2, 'Ghost': 2, 'Dark': 0.5, 'Steel': 0.5},
    'Dragon': {'Dragon': 2, 'Steel': 0.5},
    'Dark': {'Fighting': 0.5, 'Psychic': 2, 'Ghost': 2, 'Dark': 0.5, 'Steel': 0.5},
    'Steel': {'Fire': 0.5, 'Water': 0.5, 'Electric': 0.5, 'Ice': 2, 'Rock': 2, 'Steel': 0.5}
}

def calculate_damage(move_power, attacker_attack, defender_defense, attack_type, defender_types, level, type_effectiveness):
    # Base damage calculation.
    base_damage = (((((2 * level) / 5) + 2) * move_power * (attacker_attack / defender_defense)) / 50) + 2

    effectiveness = 1
    for defender_type in defender_types:
        effectiveness *= type_effectiveness.get(attack_type, {}).get(defender_type, 1)

    # Factor in typing here mate.
    total_damage = base_damage * effectiveness

    return total_damage

## OK, Now What

I've got the random Pokemon generated. I've got an accurate calculator.

Now, I need to simulate matches.

1. Create scoring function.

2. Define rules (don't care about PP here):

    2a. Accuracy, 
    
    2b. Status, 
    
    2c. Speed tiers,
    
    2d. Stat raises,
    
    2e. Switching out,
    
    2f. Making a pokemon 'faint'.

3. Generate 6 random Pokemon for both agent and bots.

4. Crack on 10000 times, letting it know when to stop (e.g. if len(party_1) == 0).

In [20]:
BURN = 1
FREEZE = 2
PARALYSIS = 3
POISON = 4
SLEEP = 5

class Pokemon:
    def __init__(self, hp, att, defence, sp_att, sp_defence, speed, move_1, move_2, move_3, move_4, type_1, type_2):
        self.hp = hp
        self.att = att
        self.defence = defence
        self.sp_att = sp_att
        self.sp_defence = sp_defence
        self.speed = speed
        self.move_1 = move_1
        self.move_2 = move_2
        self.move_3 = move_3
        self.move_4 = move_4
        self.type_1 = type_1
        self.type_2 = type_2
        
        self.status = None
        self.att_change = 0
        self.def_change = 0
        self.sp_att_change = 0
        self.sp_def_change = 0
        self.speed_change = 0
        
        self.remaining_hp = hp
        
    def __str__(self):
        types = f"{self.type_1}" if self.type_2 is None else f"{self.type_1}/{self.type_2}"
        return (
            f"Pokemon: \n"
            f"  Types: {types}\n"
            f"  HP: {self.hp}, Remaining HP: {self.remaining_hp}\n"
            f"  Attack: {self.att} (Change: {self.att_change})\n"
            f"  Defence: {self.defence} (Change: {self.def_change})\n"
            f"  Special Attack: {self.sp_att} (Change: {self.sp_att_change})\n"
            f"  Special Defence: {self.sp_defence} (Change: {self.sp_def_change})\n"
            f"  Speed: {self.speed} (Change: {self.speed_change})\n"
            f"  Status: {self.status or 'None'}\n"
            f"  Moves:\n"
            f"    1. {self.move_1}\n"
            f"    2. {self.move_2}\n"
            f"    3. {self.move_3}\n"
            f"    4. {self.move_4}\n"
        )
    
class Move:
    def __init__(self, move_type, phys=0, power=0, acc=100, att_raise=0, def_raise=0, 
                 sp_att_raise=0, sp_def_raise=0, speed_raise=0):
        self.move_type = move_type
        self.power = power
        self.acc = acc
        self.phys = phys # 0 is false
        
        self.att_raise = att_raise
        self.def_raise = def_raise
        self.sp_att_raise = sp_att_raise
        self.sp_def_raise = sp_def_raise
        self.speed_raise = speed_raise

    def __str__(self):
        stats = []
        if self.att_raise: stats.append(f"Attack: {self.att_raise:+}")
        if self.def_raise: stats.append(f"Defense: {self.def_raise:+}")
        if self.sp_att_raise: stats.append(f"Sp. Attack: {self.sp_att_raise:+}")
        if self.sp_def_raise: stats.append(f"Sp. Defense: {self.sp_def_raise:+}")
        if self.speed_raise: stats.append(f"Speed: {self.speed_raise:+}")

        stat_changes = ", ".join(stats) if stats else "No stat changes"
        phys_str = "Physical" if self.phys else "Special"
        return (f"Type: {self.move_type}, ({phys_str}), Power: {self.power}, Accuracy: {self.acc}%, "
        f"Stat Changes: {stat_changes}")

In [21]:
class Party:
    def __init__(self, pokemons):
        """
        Initializes a new Party with a list of Pokémon.

        :param pokemons: A list of Pokémon instances.
        """
        self.pokemons = pokemons
        self.current_pokemon_index = 0  # Index to track the current Pokémon in battle
        self.dead = set() # set of dead indices

    def get_current_pokemon(self):
        """
        Returns the current Pokémon that is in battle.

        :return: Pokémon instance.
        """
        return self.pokemons[self.current_pokemon_index]

    def switch_pokemon(self):
        """
        Switches to the next available Pokémon in the party.

        :return: Boolean indicating if the switch was successful.
        """
        for i in range(len(self.pokemons)):
            # Find the next Pokémon that is not fainted.
            if self.pokemons[i].hp > 0:
                self.current_pokemon_index = i
                return True
        return False  # No available Pokémon to switch to

    def all_fainted(self):
        """
        Checks if all Pokémon in the party have fainted.

        :return: Boolean indicating if all Pokémon have fainted.
        """
        return all(pokemon.remaining_hp <= 0 for pokemon in self.pokemons)

    def __str__(self):
        return f'Party({[pokemon.__dict__ for pokemon in self.pokemons]})'

## Simulation

Got the parties and helper functions defined. Now, crack on.

In [22]:
import random
def generate_random_pokemon():
    stats = ['HP', 'Attack', 'Defense', 'Special Attack', 'Special Defense', 'Speed']
    pokemon = {stat: {'Base': random.randint(40, 180), 'EV': 0} for stat in stats}
    
    total_evs = 510
    while total_evs > 0:
        stat = random.choice(stats)
        ev_increase = min(random.randint(1, 252), total_evs, 252 - pokemon[stat]['EV'])
        pokemon[stat]['EV'] += ev_increase
        total_evs -= ev_increase

    return pokemon

type_effectiveness = {
    'Normal': {'Rock': 0.5, 'Ghost': 0, 'Steel': 0.5},
    'Fire': {'Fire': 0.5, 'Water': 0.5, 'Grass': 2, 'Ice': 2, 'Bug': 2, 'Rock': 0.5, 'Dragon': 0.5, 'Steel': 2},
    'Water': {'Fire': 2, 'Water': 0.5, 'Grass': 0.5, 'Ground': 2, 'Rock': 2, 'Dragon': 0.5},
    'Electric': {'Water': 2, 'Electric': 0.5, 'Grass': 0.5, 'Ground': 0, 'Flying': 2, 'Dragon': 0.5},
    'Grass': {'Fire': 0.5, 'Water': 2, 'Grass': 0.5, 'Poison': 0.5, 'Ground': 2, 'Flying': 0.5, 'Bug': 0.5, 
              'Rock': 2, 'Dragon': 0.5, 'Steel': 0.5},
    'Ice': {'Fire': 0.5, 'Water': 0.5, 'Grass': 2, 'Ice': 0.5, 'Ground': 2, 'Flying': 2, 'Dragon': 2, 'Steel': 0.5},
    'Fighting': {'Normal': 2, 'Ice': 2, 'Poison': 0.5, 'Flying': 0.5, 'Psychic': 0.5, 'Bug': 0.5, 'Rock': 2, 
                 'Ghost': 0, 'Dark': 2, 'Steel': 2},
    'Poison': {'Grass': 2, 'Poison': 0.5, 'Ground': 0.5, 'Rock': 0.5, 'Ghost': 0.5, 'Steel': 0},
    'Ground': {'Fire': 2, 'Electric': 2, 'Grass': 0.5, 'Poison': 2, 'Flying': 0, 'Bug': 0.5, 'Rock': 2, 'Steel': 2},
    'Flying': {'Electric': 0.5, 'Grass': 2, 'Fighting': 2, 'Bug': 2, 'Rock': 0.5, 'Steel': 0.5},
    'Psychic': {'Fighting': 2, 'Poison': 2, 'Psychic': 0.5, 'Dark': 0, 'Steel': 0.5},
    'Bug': {'Fire': 0.5, 'Grass': 2, 'Fighting': 0.5, 'Poison': 0.5, 'Flying': 0.5, 'Psychic': 2, 'Ghost': 0.5, 
            'Dark': 2, 'Steel': 0.5},
    'Rock': {'Fire': 2, 'Ice': 2, 'Fighting': 0.5, 'Ground': 0.5, 'Flying': 2, 'Bug': 2, 'Steel': 0.5},
    'Ghost': {'Normal': 0, 'Psychic': 2, 'Ghost': 2, 'Dark': 0.5, 'Steel': 0.5},
    'Dragon': {'Dragon': 2, 'Steel': 0.5},
    'Dark': {'Fighting': 0.5, 'Psychic': 2, 'Ghost': 2, 'Dark': 0.5, 'Steel': 0.5},
    'Steel': {'Fire': 0.5, 'Water': 0.5, 'Electric': 0.5, 'Ice': 2, 'Rock': 2, 'Steel': 0.5}
}

def calculate_damage(move_power, attacker_attack, defender_defense, attack_type, defender_types, level, 
                     type_effectiveness):
    # Base Damage:
    base_damage = (((((2 * level) / 5) + 2) * move_power * (attacker_attack / defender_defense)) / 50) + 2

    effectiveness = 1
    for defender_type in defender_types:
        # Factoring in typing here.
        effectiveness *= type_effectiveness.get(attack_type, {}).get(defender_type, 1)

    total_damage = base_damage * effectiveness

    return total_damage

In [23]:
types = list(type_effectiveness.keys())
def generate_random_move():
    move_type = random.choice([0, 1, 2, 3]) # 0 = attack, 1 = pure stat change

    if move_type: 
        # Generate a damaging move with random power and possible self stat change.
        power = random.choice(range(20, 181, 5))  # Power between 10 and 100, in multiples of 5
        move_type = random.choice(types)
        if random.random() < 0.3:  # 30% chance of having self-affecting stat changes
            changed_att = random.choice(['Attack', 'Defence', 'Special Attack', 'Special Defense', 'Speed'])
            stat_change = random.randint(-3, 3)
            phys = random.choice([0, 1])
            acc = random.choice(range(55, 101, 5))
            
            if changed_att == "Attack":
                return Move(move_type, phys = phys, power=power, acc=acc, att_raise = stat_change)
            elif changed_att == "Defence":
                return Move(move_type, phys = phys, power=power, acc=acc, def_raise = stat_change)
            elif changed_att == "Special Attack":
                return Move(move_type, phys = phys, power=power, acc=acc, sp_att_raise = stat_change)
            elif changed_att == "Special Defense":
                return Move(move_type, phys = phys, power=power, acc=acc, sp_def_raise = stat_change)
            else:
                return Move(move_type, phys = phys, power=power, acc=acc, speed_raise = stat_change)
        else:
            acc = random.choice(range(55, 101, 5))
            phys = random.choice([0, 1])
            return Move(move_type, phys = phys, power=power, acc=acc)

    else:
        changed_att = random.choice(['Attack', 'Defence', 'Special Attack', 'Special Defense', 'Speed'])
        move_type = random.choice(types)
        stat_change = random.randint(1, 3)
        acc = 100
        
        if changed_att == "Attack":
            return Move(move_type, acc=acc, att_raise = stat_change)
        elif changed_att == "Defence":
            return Move(move_type, acc=acc, def_raise = stat_change)
        elif changed_att == "Special Attack":
            return Move(move_type, acc=acc, sp_att_raise = stat_change)
        elif changed_att == "Special Defense":
            return Move(move_type, acc=acc, sp_def_raise = stat_change)
        else:
            return Move(move_type, acc=acc, speed_raise = stat_change)

def generate_random_moves(n=4):
    return [generate_random_move() for _ in range(n)]

In [24]:
import random

def pick_pokemon_types(types):
    if not types or len(types) == 0:
        return None, None

    number_of_types = random.choice([1, 2])
    if number_of_types == 1:
        type_1 = random.choice(types)
        return type_1, None
    else:
        type_1, type_2 = random.sample(types, 2)
        return type_1, type_2

def generate_2_teams():
    party1_pokemon, party2_pokemon = [], []
    # (hp, att, defence, sp_att, sp_defence, speed, move_1, move_2, move_3, move_4, type_1, type_2)
    for i in range(6):
        p_dict = generate_random_pokemon()
        type_1, type_2 = pick_pokemon_types(types)

        rand_poke = Pokemon(
            hp = p_dict["HP"],
            att = p_dict["Attack"],
            defence = p_dict["Defense"],
            sp_att = p_dict["Special Attack"],
            sp_defence = p_dict["Special Defense"],
            speed = p_dict["Speed"],
            move_1 = generate_random_move(),
            move_2 = generate_random_move(),
            move_3 = generate_random_move(),
            move_4 = generate_random_move(),
            type_1 = type_1,
            type_2 = type_2
        )
        party1_pokemon.append(rand_poke)
    
    for i in range(6):
        p_dict = generate_random_pokemon()
        type_1, type_2 = pick_pokemon_types(types)

        rand_poke = Pokemon(
            hp = p_dict["HP"],
            att = p_dict["Attack"],
            defence = p_dict["Defense"],
            sp_att = p_dict["Special Attack"],
            sp_defence = p_dict["Special Defense"],
            speed = p_dict["Speed"],
            move_1 = generate_random_move(),
            move_2 = generate_random_move(),
            move_3 = generate_random_move(),
            move_4 = generate_random_move(),
            type_1 = type_1,
            type_2 = type_2
        )
        party2_pokemon.append(rand_poke)
        
    return party1_pokemon, party2_pokemon

## Actual Simulation (I Think)

In [25]:
def calculate_stat(hp, base_stats, evs): # hp = 0 or 1
    if hp:
        return (2 * base_stats + (evs // 4)) + 110
    else:
        # Not entirely accuate because no nature, but good enough.
        return ((2 * base_stats + (evs // 4)) + 5)

In [26]:
# 0: calculate pokemon
p_dict = generate_random_pokemon()
type_1, type_2 = pick_pokemon_types(types)
pokemon1 = Pokemon(
    hp = p_dict["HP"],
    att = p_dict["Attack"],
    defence = p_dict["Defense"],
    sp_att = p_dict["Special Attack"],
    sp_defence = p_dict["Special Defense"],
    speed = p_dict["Speed"],
    move_1 = generate_random_move(),
    move_2 = generate_random_move(),
    move_3 = generate_random_move(),
    move_4 = generate_random_move(),
    type_1 = type_1,
    type_2 = type_2
)

p_dict = generate_random_pokemon()
type_1, type_2 = pick_pokemon_types(types)
pokemon2 = Pokemon(
    hp = p_dict["HP"],
    att = p_dict["Attack"],
    defence = p_dict["Defense"],
    sp_att = p_dict["Special Attack"],
    sp_defence = p_dict["Special Defense"],
    speed = p_dict["Speed"],
    move_1 = generate_random_move(),
    move_2 = generate_random_move(),
    move_3 = generate_random_move(),
    move_4 = generate_random_move(),
    type_1 = type_1,
    type_2 = type_2
)

# 2. pick a random move for each
p1_move_chosen = random.choice([pokemon1.move_1, pokemon1.move_2, pokemon1.move_3, pokemon1.move_4])
p2_move_chosen = random.choice([pokemon2.move_1, pokemon2.move_2, pokemon2.move_3, pokemon2.move_4])

# TODO: speed tier (later)
# TODO: STAB (later)
# TODO: boosts (later)
# 3. calculate damage done to each
# 3a. get the move type
p1_move_type = p1_move_chosen.move_type
p1_damage_dealt = calculate_damage(p1_move_chosen.power, 
                                    calculate_stat(False, pokemon1.att["Base"], pokemon1.att["EV"]), 
                                    calculate_stat(False, pokemon2.defence["Base"], pokemon2.defence["EV"]), 
                                    p1_move_type, 
                                    [pokemon2.type_1, pokemon2.type_2],
                                    100,
                                    type_effectiveness)

p2_move_type = p2_move_chosen.move_type
p2_damage_dealt = calculate_damage(p2_move_chosen.power, 
                                    calculate_stat(False, pokemon2.att["Base"], pokemon2.att["EV"]), 
                                    calculate_stat(False, pokemon1.defence["Base"], pokemon1.defence["EV"]), 
                                    p2_move_type, 
                                    [pokemon1.type_1, pokemon1.type_2],
                                    100,
                                    type_effectiveness)

# 3b. convert pokemon health into actual raw stats
p1_hp = calculate_stat(True, pokemon1.hp["Base"], pokemon1.hp["EV"])
p2_hp = calculate_stat(True, pokemon2.hp["Base"], pokemon2.hp["EV"])
pokemon1.remaining_hp = p1_hp
pokemon2.remaining_hp = p2_hp

# TODO: check accuarcy (later)
# TODO: check death after damage dealt
# 4. proceed with damage, checking if hits (e.g. accuarcy) and if pokes die along the way.
if pokemon1.move_1.power:
    pokemon2.remaining_hp = max(0, pokemon2.remaining_hp - p1_damage_dealt)
    if pokemon2.remaining_hp == 0:
        print("Pokemon 2 died, ending prematurely lad")

if pokemon2.move_2.power:
    pokemon1.remaining_hp = max(0, pokemon1.remaining_hp - p2_damage_dealt)
    if pokemon1.remaining_hp == 0:
        print("Pokemon 1 died, ending prematurely lad")

# 5. check for end condition (match done)
# TODO

#### Things Remaining:

1. Have the battles go on until one party runs out of Pokemon.

2. Have a way to keep track of remaining Pokemon in a party.

In [27]:
t1, t2 = generate_2_teams()
a1, a2 = Party(t1), Party(t2)

In [28]:
for pokemon in a1.pokemons:
    p_hp = calculate_stat(True, pokemon.hp["Base"], pokemon.hp["EV"])
    pokemon.remaining_hp = p_hp
    
for pokemon in a2.pokemons:
    p_hp = calculate_stat(True, pokemon.hp["Base"], pokemon.hp["EV"])
    pokemon.remaining_hp = p_hp

i = 0
while True:
    pokemon1 = a1.pokemons[a1.current_pokemon_index]
    pokemon2 = a2.pokemons[a2.current_pokemon_index]
    
    # 2. pick a random move for each
    p1_move_chosen = random.choice([pokemon1.move_1, pokemon1.move_2, pokemon1.move_3, pokemon1.move_4])
    p2_move_chosen = random.choice([pokemon2.move_1, pokemon2.move_2, pokemon2.move_3, pokemon2.move_4])

    # TODO: speed tier (later)
    # TODO: STAB (later)
    # TODO: boosts (later)
    # 3. calculate damage done to each
    # 3a. get the move type
    p1_move_type = p1_move_chosen.move_type
    p1_damage_dealt = calculate_damage(p1_move_chosen.power, 
                                       calculate_stat(False, pokemon1.att["Base"], pokemon1.att["EV"]), 
                                       calculate_stat(False, pokemon2.defence["Base"], pokemon2.defence["EV"]), 
                                       p1_move_type, 
                                       [pokemon2.type_1, pokemon2.type_2],
                                       100,
                                       type_effectiveness)

    p2_move_type = p2_move_chosen.move_type
    p2_damage_dealt = calculate_damage(p2_move_chosen.power, 
                                       calculate_stat(False, pokemon2.att["Base"], pokemon2.att["EV"]), 
                                       calculate_stat(False, pokemon1.defence["Base"], pokemon1.defence["EV"]), 
                                       p2_move_type, 
                                       [pokemon1.type_1, pokemon1.type_2],
                                       100,
                                       type_effectiveness)

    # TODO: check accuarcy (later)
    # TODO: check death after damage dealt
    # 4. proceed with damage, checking if hits (e.g. accuarcy) and if pokes die along the way.
    if pokemon1.move_1.power:
        pokemon2.remaining_hp = max(0, pokemon2.remaining_hp - p1_damage_dealt)
        if pokemon2.remaining_hp == 0:
            a2.dead.add(a2.current_pokemon_index)
            
            # 5. check for end condition (match done)
            if (len(a2.dead) == 6):
                print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                break
                
            numbers = list(range(6))
            available_numbers = [num for num in numbers if num not in a2.dead]
            random_number = random.choice(available_numbers)
            a2.current_pokemon_index = random_number
            print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
            
    if pokemon2.move_2.power:
        pokemon1.remaining_hp = max(0, pokemon1.remaining_hp - p2_damage_dealt)
        if pokemon1.remaining_hp == 0:
            a1.dead.add(a1.current_pokemon_index)
            
            if (len(a1.dead) == 6):
                print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                break
                
            numbers = list(range(6))
            available_numbers = [num for num in numbers if num not in a1.dead]
            random_number = random.choice(available_numbers)
            a1.current_pokemon_index = random_number
            print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                
    print(pokemon1.remaining_hp, pokemon2.remaining_hp)
    i += 1
    if i > 500:
        break

323.2 296.79685039370077
195.2 252.407874015748
123.99999999999999 245.9842519685039
105.19999999999999 232.7811023622047
86.39999999999999 226.3574803149606
67.6 181.96850393700785
67.6 175.54488188976376
Team 1 deaths: {0} ; team 2 deaths: set()
0 162.69763779527557
Team 1 deaths: {0, 2} ; team 2 deaths: set()
0 58.72913385826769
231.46526315789473 56.72913385826769
170.93052631578945 5.949606299212562
Team 1 deaths: {0, 2} ; team 2 deaths: {0}
110.39578947368418 0
108.39578947368418 309.0467289719626
Team 1 deaths: {0, 2, 5} ; team 2 deaths: {0}
0 168.0934579439252
77.46835443037972 168.0934579439252
Team 1 deaths: {0, 2, 3, 5} ; team 2 deaths: {0}
0 168.0934579439252
161.69644670050764 48.87289719626163
Team 1 deaths: {0, 2, 3, 5} ; team 2 deaths: {0, 3}
76.27005076142134 0
76.27005076142134 210.70852017937221
76.27005076142134 143.3919282511211
76.27005076142134 85.23991031390138
Team 1 deaths: {0, 2, 3, 5} ; team 2 deaths: {0, 3, 4}
76.27005076142134 0
63.70406091370561 364.97947

## Q-Learning

In [29]:
def reward_function(my_hp, opp_hp, my_hp_change, opp_hp_change):
    reward = 0
    reward += opp_hp_change
    reward -= my_hp_change

    if opp_hp == 0:  # Win? Good.
        reward += 5000
    elif my_hp == 0:  # Lose? Bad.
        reward -= 5000

    return reward

## Simulate

In [47]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np

class DQN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

gamma = 0.2
class DeepQLearningAgent:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate):
        self.model = DQN(input_dim, hidden_dim, output_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.loss_fn = nn.MSELoss()

    def choose_action(self, state):
        with torch.no_grad():
            q_values = self.model(state)
        action = torch.argmax(q_values).item()
        return action

    def train(self, state, action, reward, next_state, done):
        state = torch.tensor(state, dtype=torch.float32)
        next_state = torch.tensor(next_state, dtype=torch.float32)
        action = torch.tensor(action, dtype=torch.int64)
        reward = torch.tensor(reward, dtype=torch.float32)

        q_value = self.model(state)[0][action]
        with torch.no_grad():
            target_q_value = reward + gamma * torch.max(self.model(next_state)) * (1 - done)

        # Compute loss.
        loss = self.loss_fn(q_value, target_q_value)

        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

# Example usage.
input_dim = 14
hidden_dim = 128
output_dim = 4
learning_rate = 0.001
agent = DeepQLearningAgent(input_dim, hidden_dim, output_dim, learning_rate)

type_to_bit = {typ: i for i, typ in enumerate(types)}
type_to_bit[None] = 100 # need to account for pokemon of a single type

In [61]:
for episode in range(50000):
    t1, t2 = generate_2_teams()
    a1, a2 = Party(t1), Party(t2)
    
    # Initialize health:
    for pokemon in a1.pokemons + a2.pokemons:
        p_hp = calculate_stat(True, pokemon.hp["Base"], pokemon.hp["EV"])
        pokemon.remaining_hp = p_hp

    i = 0
    while True:
        pokemon1 = a1.pokemons[a1.current_pokemon_index]
        pokemon2 = a2.pokemons[a2.current_pokemon_index]
        
        # Define the current state
        current_state = ([move for move in
                          sorted([pokemon1.move_1.power, pokemon1.move_2.power, pokemon1.move_3.power, pokemon1.move_4.power])]+  
                            [pokemon1.remaining_hp, pokemon2.remaining_hp] + 
                            [type_to_bit[pokemon1.type_1], type_to_bit[pokemon1.type_2]] + 
                            [type_to_bit[pokemon2.type_1], type_to_bit[pokemon2.type_2]] +
                            [type_to_bit[pokemon1.move_1.move_type], type_to_bit[pokemon1.move_2.move_type],
                             type_to_bit[pokemon1.move_3.move_type], type_to_bit[pokemon1.move_4.move_type]]
                         )
        
        # current_state = ([move for move in
        #                   sorted([pokemon1.move_1.power, pokemon1.move_2.power, pokemon1.move_3.power, pokemon1.move_4.power])])

        current_state_tensor = torch.tensor(current_state, dtype=torch.float32).unsqueeze(0)
        
        # Pick a move:
        p1_action_index = agent.choose_action(current_state_tensor)
        p1_move_chosen = [pokemon1.move_1, pokemon1.move_2, pokemon1.move_3, pokemon1.move_4][p1_action_index]

        # Random move for opponent (who cares):
        p2_move_chosen = random.choice([pokemon2.move_1, pokemon2.move_2, pokemon2.move_3, pokemon2.move_4])

        # Damage:
        p1_move_type = p1_move_chosen.move_type
        p1_damage_dealt = calculate_damage(p1_move_chosen.power, 
                                           calculate_stat(False, pokemon1.att["Base"], pokemon1.att["EV"]), 
                                           calculate_stat(False, pokemon2.defence["Base"], pokemon2.defence["EV"]), 
                                           p1_move_type, 
                                           [pokemon2.type_1, pokemon2.type_2],
                                           100,
                                           type_effectiveness)

        p2_move_type = p2_move_chosen.move_type
        p2_damage_dealt = calculate_damage(p2_move_chosen.power, 
                                           calculate_stat(False, pokemon2.att["Base"], pokemon2.att["EV"]), 
                                           calculate_stat(False, pokemon1.defence["Base"], pokemon1.defence["EV"]), 
                                           p2_move_type, 
                                           [pokemon1.type_1, pokemon1.type_2],
                                           100,
                                           type_effectiveness)

        # After applying damage, define the new state and reward
        if pokemon1.move_1.power:
            pokemon2.remaining_hp = max(0, pokemon2.remaining_hp - p1_damage_dealt)
            if pokemon2.remaining_hp == 0:
                a2.dead.add(a2.current_pokemon_index)
            
                # 5. check for end condition (match done)
                if (len(a2.dead) == 6):
                    # print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                    break
                
                numbers = list(range(6))
                available_numbers = [num for num in numbers if num not in a2.dead]
                random_number = random.choice(available_numbers)
                a2.current_pokemon_index = random_number
                # print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                
        # if i > 450:
            # print("Power of Move:", p1_move_chosen.power, sorted([pokemon1.move_1.power, 
            # pokemon1.move_2.power, pokemon1.move_3.power, pokemon1.move_4.power]))
            
        if pokemon2.move_2.power:
            pokemon1.remaining_hp = max(0, pokemon1.remaining_hp - p2_damage_dealt)
            if pokemon1.remaining_hp == 0:
                a1.dead.add(a1.current_pokemon_index)
            
                if (len(a1.dead) == 6):
                    # print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                    break
                
                numbers = list(range(6))
                available_numbers = [num for num in numbers if num not in a1.dead]
                random_number = random.choice(available_numbers)
                a1.current_pokemon_index = random_number
                # print("Team 1 deaths:", a1.dead, "; team 2 deaths:", a2.dead)
                
        # print(pokemon1.remaining_hp, pokemon2.remaining_hp)
        i += 1
        if i > 500:
            break
        
        new_state = (
                    [pokemon1.move_1.power, pokemon1.move_2.power, 
                     pokemon1.move_3.power, pokemon1.move_4.power, pokemon1.remaining_hp, pokemon2.remaining_hp] +
                    [type_to_bit[pokemon1.type_1], type_to_bit[pokemon1.type_2]] +
                    [type_to_bit[pokemon2.type_1], type_to_bit[pokemon2.type_2]] +
                    [type_to_bit[pokemon1.move_1.move_type], type_to_bit[pokemon1.move_2.move_type],
                     type_to_bit[pokemon1.move_3.move_type], type_to_bit[pokemon1.move_4.move_type]])
        
        
        # new_state = [pokemon1.move_1.power, pokemon1.move_2.power, 
        #              pokemon1.move_3.power, pokemon1.move_4.power]
        new_state_tensor = torch.tensor(new_state, dtype=torch.float32).unsqueeze(0)
        
        # Define a simple reward function:
        reward = 0
        if p1_damage_dealt > 80:
            reward += 2000
        elif p1_damage_dealt > 70:
            reward += 1500
        elif p1_damage_dealt > 60:
            reward += 1250
        elif p1_damage_dealt > 50:
            reward += 1000
        elif p1_damage_dealt > 40:
            reward += 500
        elif p1_damage_dealt > 20:
            reward += 150
        elif p1_damage_dealt > 10:
            reward += 50
        elif p1_damage_dealt > 2:
            reward += 15

        # Agent learns from the outcome
        agent.train(current_state_tensor, p1_action_index, reward, new_state_tensor, False)

        # print(pokemon1.remaining_hp, pokemon2.remaining_hp)
        i += 1
        if i > 500:
            break
            
    if episode % 1000 == 0:
        print(episode, "mate.")

  state = torch.tensor(state, dtype=torch.float32)
  next_state = torch.tensor(next_state, dtype=torch.float32)


0 mate.
1000 mate.
2000 mate.
3000 mate.
4000 mate.
5000 mate.
6000 mate.
7000 mate.
8000 mate.
9000 mate.
10000 mate.
11000 mate.
12000 mate.
13000 mate.
14000 mate.
15000 mate.
16000 mate.
17000 mate.
18000 mate.
19000 mate.
20000 mate.
21000 mate.
22000 mate.
23000 mate.
24000 mate.
25000 mate.
26000 mate.
27000 mate.
28000 mate.
29000 mate.
30000 mate.
31000 mate.
32000 mate.
33000 mate.
34000 mate.
35000 mate.
36000 mate.
37000 mate.
38000 mate.
39000 mate.
40000 mate.
41000 mate.
42000 mate.
43000 mate.
44000 mate.
45000 mate.
46000 mate.
47000 mate.
48000 mate.
49000 mate.


In [75]:
# move 1 power
# move 2 power 
# move 3 power 
# move 4 power
# pokemon1 remaining hp
# pokemon2 remaining hp
# pokemon1 type 1 
# pokemon1 type 2
# pokemon2 type 1
# pokemon2 type 2
# pokemon1 move_1 type 
# pokemon1 move_2 type 
# pokemon1 move 3 type
# pokemon1 move 4 type
def get_move(moves):
    current_state_tensor = torch.tensor(moves, dtype=torch.float32).unsqueeze(0)
    action_idx = agent.choose_action(current_state_tensor)
    print(f"Move {action_idx}, base power", moves[action_idx])

In [79]:
print(type_to_bit)

{'Normal': 0, 'Fire': 1, 'Water': 2, 'Electric': 3, 'Grass': 4, 'Ice': 5, 'Fighting': 6, 'Poison': 7, 'Ground': 8, 'Flying': 9, 'Psychic': 10, 'Bug': 11, 'Rock': 12, 'Ghost': 13, 'Dragon': 14, 'Dark': 15, 'Steel': 16, None: 100}


In [84]:
# (Psychic) vs. (Poison + Fighting)
get_move([50, 150, 180, 160] + [60, 60] + [10, 100] + [7, 6] + [10, 7, 5, 16]) 

Move 0, base power 50


In [78]:
# 
get_move([40, 90, 100, 200] + [100, 60] + [1, 5] + [4, 11] + [1, 7, 5, 16]) 

Move 0, base power 40
