# Install libraries

In [1]:
!pip install pyspark
!pip install matplotlib
!pip3 install pygad



# Import libraries

In [2]:
from pyspark.sql import SparkSession, functions as F, SQLContext
from pyspark.sql.functions import when, lit, col, lower
from IPython.display import display, Image
from pygad import GA
import numpy as np
import random

# Setup Spark

In [3]:
spark = SparkSession.builder.master("local[*]").getOrCreate()
spark

# Read data

In [4]:
data_path = "/home/jovyan/work/BCU/ModernOptimisation/Assignment/Data/"
df = spark.read.csv(data_path+"pokemons.csv", header=True, inferSchema=True)
TypeData_df = spark.read.csv(data_path+"TypeData.csv", header=True, inferSchema=True)

# Get 3 random pokemon

### Set seed

In [5]:
def set_seed(seed = 42):
    return random.seed(seed)

set_seed()

### Get 3 random Pokemon IDs

In [6]:
# Get IDs
pokemon_ids = df.select("id").distinct().rdd.map(lambda x: x[0]).collect()

# Randomly select 3 unique pokemon_ids
random_pokemon_ids = random.sample(pokemon_ids, 3)

# Filter the DataFrame based on the randomly selected pokemon_ids
random_pokemon = df.filter(col("id").isin(random_pokemon_ids))
#random_pokemon.show()

### Display Pokemon information

In [7]:
print("Opponents pokemons: ")
print()
for id in random_pokemon_ids:

    # Filter the random_pokemon DataFrame to get the details of the current pokemon_id
    pokemon_details = random_pokemon.filter(col("id") == id)\
        .select("id", "name", "rank", "generation", "evolves_from", "type1", "type2", "hp", "atk", "def", "spatk", "speed", "abilities", "desc")\
        .first()
    
    # Print the details in the specified format
    print("id:", pokemon_details["id"])
    print("name:", pokemon_details["name"])
    print("rank:", pokemon_details["rank"])
    print("generation:", pokemon_details["generation"])
    print("evolves_from:", pokemon_details["evolves_from"])
    print("type1:", pokemon_details["type1"])
    print("type2:", pokemon_details["type2"])
    print("hp:", pokemon_details["hp"])
    print("atk:", pokemon_details["atk"])
    print("def:", pokemon_details["def"])
    print("spatk:", pokemon_details["spatk"])
    print("speed:", pokemon_details["speed"])
    print("abilities:", pokemon_details["abilities"])
    # print("desc:", pokemon_details["desc"])
    desc = pokemon_details["desc"]
    print("desc:", end=" ")
    char_count = 0
    for word in desc.split():
        if char_count + len(word) > 64:
            print("\n\t", end="")
            char_count = 0
        print(word, end=" ")
        char_count += len(word) + 1  # Add 1 for the space between words
    print()

    
    # Convert the ID to a string with leading zeros if necessary
    pokemon_id_str = str(id).zfill(3)

    # Generate the URL for the Pokémon image
    url = f"https://assets.pokemon.com/assets/cms2/img/pokedex/full/{pokemon_id_str}.png"
    
    # Display the image
    display(Image(url=url))


Opponents pokemons: 

id: 896
name: glastrier
rank: legendary
generation: generation-viii
evolves_from: nothing
type1: ice
type2: None
hp: 100
atk: 145
def: 130
spatk: 65
speed: 30
abilities: chilling-neigh 
desc: Glastrier emits intense cold from its hooves. It’s also a 
	belligerent Pokémon—anything it wants, it takes by force. 


id: 375
name: metang
rank: ordinary
generation: generation-iii
evolves_from: beldum
type1: steel
type2: psychic
hp: 60
atk: 75
def: 100
spatk: 55
speed: 50
abilities: clear-body light-metal 
desc: When two BELDUM fuse together, METANG is formed. The brains of 
	the BELDUM are joined by a magnetic nervous system. By linking 
	its brains magnetically, this POKéMON generates strong 
	psychokinetic power. 


id: 314
name: illumise
rank: ordinary
generation: generation-iii
evolves_from: nothing
type1: bug
type2: None
hp: 65
atk: 47
def: 75
spatk: 73
speed: 85
abilities: oblivious tinted-lens prankster 
desc: With its sweet aroma, it guides Volbeat to draw signs with light 
	in the night sky. 


### Create df of enemy pokemon and list of opponents

In [8]:
def get_n_row(df, number_in_df):
    num_rows = df.count()
    nth_row_df = df.limit(num_rows).collect()[num_rows - number_in_df]
    return spark.createDataFrame([nth_row_df], schema=df.schema)

def create(primary_type):
    # 1) Locate the type1 in TypeData based on primary_type
    TypeData_df_2 = TypeData_df.filter(TypeData_df.Type == primary_type.first()["type1"])
    # 2) Shorten df
    df_updated = df.select("id", "name", "type1", "hp", "atk").withColumn("bns", lit(None))
    # 3) Iterate over dataframe
    for column in TypeData_df_2.columns:
        df_updated = df_updated.withColumn("bns", when(col("type1") == column, TypeData_df_2.first()[column]).otherwise(df_updated["bns"]))
    # 4) Multiply value of column type with HP of orignal dataset
    return df_updated.withColumn("hp", col("hp") * col("bns")).drop("bns")


In [9]:
pokemon_1 = get_n_row(df=random_pokemon, number_in_df=1) # returns pyspark dataframe
pokemon_2 = get_n_row(df=random_pokemon, number_in_df=2) # returns pyspark dataframe
pokemon_3 = get_n_row(df=random_pokemon, number_in_df=3) # returns pyspark dataframe

all_enemies_of_pk1 = create(pokemon_1) # returns pyspark dataframe
#all_enemies_of_pk1.show()
all_enemies_of_pk2 = create(pokemon_2) # returns pyspark dataframe
#all_enemies_of_pk2.show()
all_enemies_of_pk3 = create(pokemon_3) # returns pyspark dataframe
#all_enemies_of_pk3.show()

# Genetic algorithm

In [10]:
def regular_ga(df1, df2, seed =42):
    #np.random.seed(42)
    # Define fitness function
    target_stats = df1.select(F.sum("hp") + F.sum("atk")).collect()[0][0]
    
    def fitness_function(ga_instance, solution, solution_idx):
        """
        Fitness function to evaluate the strength of a Pokemon.
        Here, we calculate the total stats (hp + atk) and the closer it is to the target Pokemon, the better.
        """
        total_stats = solution[0] + solution[1]  # hp + atk
        return 1 / (abs(total_stats - target_stats) + 0.0001)  # Avoid division by zero

    # Create initial population with seed
    def create_population():
        """
        Create initial population by randomly selecting Pokemon from df2.
        """
        population = df2.select("hp", "atk").collect()
        return [[float(p.hp), float(p.atk)] for p in population]

    # Initialize genetic algorithm without mutation
    ga = GA(num_generations=100,
            num_parents_mating=2,
            fitness_func=fitness_function,
            sol_per_pop=10,
            num_genes=2,
            gene_type=float,
            initial_population=create_population(),
            parent_selection_type="sss",
            crossover_type="single_point",
            mutation_type=None)  # Disable mutation

    # Set seed for the genetic algorithm instance
    set_seed(seed)
    ga.seed = seed

    # Run genetic algorithm
    ga.run()

    # Get the final best solution
    best_solution = ga.best_solution()

    # Find the Pokemon closest to the best solution
    target_stats = np.array(best_solution[0])
    df2_stats = np.array(df2.select("hp", "atk").collect())
    distances = np.linalg.norm(df2_stats - target_stats, axis=1)
    closest_pokemon_idx = np.argmin(distances)
    closest_pokemon = df2.select("name").collect()[closest_pokemon_idx][0]

    print("Best Solution:", best_solution)
    print("Closest Pokemon to Solution:", closest_pokemon)


### Pokemon set 1

In [11]:
regular_ga(pokemon_1.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk1.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)



Best Solution: (array([140., 105.]), 10000.0, 0)
Closest Pokemon to Solution: lurantis


### Pokemon set 2

In [12]:
regular_ga(pokemon_2.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk2.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)

Best Solution: (array([62., 73.]), 10000.0, 0)
Closest Pokemon to Solution: raging-bolt


### Pokemon set 3

In [13]:
regular_ga(pokemon_3.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk3.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)

Best Solution: (array([42., 70.]), 10000.0, 0)
Closest Pokemon to Solution: noivern


# Genetic algorithm with Monte Carlo

In [14]:
def ga_with_mc(df1, df2, seed=42, num_simulations=100):
    # Define target stats from df1
    target_stats = df1.select(F.sum("hp") + F.sum("atk")).collect()[0][0]
    
    def fitness_function(ga_instance, solution, solution_idx):
        """
        Fitness function to evaluate the strength of a Pokemon.
        Here, we use Monte Carlo simulations to estimate the fitness.
        """
        total_stats = solution[0] + solution[1]  # hp + atk
        # Perform Monte Carlo simulations to estimate fitness
        return monte_carlo_simulation(total_stats, target_stats, num_simulations)

    def create_population():
        """
        Create initial population by randomly selecting Pokemon from df2.
        """
        population = df2.select("hp", "atk").collect()
        return [[float(p.hp), float(p.atk)] for p in population]

    # Initialize genetic algorithm without mutation
    ga = GA(num_generations=100,
            num_parents_mating=2,
            fitness_func=fitness_function,
            sol_per_pop=10,
            num_genes=2,
            gene_type=float,
            initial_population=create_population(),
            parent_selection_type="sss",
            crossover_type="single_point",
            mutation_type=None)  # Disable mutation

    # Set seed for the genetic algorithm instance
    set_seed(seed)
    ga.seed = seed

    # Run genetic algorithm
    ga.run()

    # Get the final best solution
    best_solution = ga.best_solution()

    # Find the Pokemon closest to the best solution
    target_stats = np.array(best_solution[0])
    df2_stats = np.array(df2.select("hp", "atk").collect())
    distances = np.linalg.norm(df2_stats - target_stats, axis=1)
    closest_pokemon_idx = np.argmin(distances)
    closest_pokemon = df2.select("name").collect()[closest_pokemon_idx][0]

    print("Best Solution:", best_solution)
    print("Closest Pokemon to Solution:", closest_pokemon)

def monte_carlo_simulation(total_stats, target_stats, num_simulations):
    """
    Perform Monte Carlo simulations to estimate the fitness.
    """
    total_fitness = 0
    for _ in range(num_simulations):
        # Generate random perturbation around total_stats
        perturbation = np.random.normal(loc=0, scale=0.1, size=1)[0]
        simulated_stats = total_stats + perturbation
        # Evaluate fitness based on the difference between simulated_stats and target_stats
        fitness = 1 / (abs(simulated_stats - target_stats) + 0.0001)  # Avoid division by zero
        total_fitness += fitness
    # Average fitness over simulations
    return total_fitness / num_simulations

### Pokemon set 1

In [15]:
ga_with_mc(pokemon_1.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk1.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)

Best Solution: (array([160.,  85.]), 113.98506935386752, 0)
Closest Pokemon to Solution: rhyhorn


### Pokemon set 2

In [16]:
ga_with_mc(pokemon_2.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk2.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)

Best Solution: (array([45., 90.]), 112.10207952561393, 0)
Closest Pokemon to Solution: zapdos


### Pokemon set 3

In [17]:
ga_with_mc(pokemon_3.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"), 
      all_enemies_of_pk3.selectExpr("id","name","type1", "CAST(hp AS INT) AS hp", "CAST(atk AS INT) AS atk"),
      seed=69
)

Best Solution: (array([47., 65.]), 97.30532798874296, 0)
Closest Pokemon to Solution: sylveon
