In [7]:
# Importing libraries

import math
import hashlib
import string
import random
import numpy as np
import pandas as pd


from genetic_algorithm.password_fitness import get_password, get_normalised_fitness


# Section 3: Genetic Algorithm

This notebook demonstrates the implementation of a genetic algorithm to find the correct password. The attached report refers to the following code.

## Section 3.1 & 3.2

The Operators and the main implementation is organised as functions below. Functions for getting the password and computing fitness score are the same as the ones provided in the coursework description. They can be found at `genetic_algorithm/password_fitness.py`.

In [141]:
# The following functions are used to implement the genetic algorithm

def mutate(chromosome, mutation_rate, gene_mutation_rate=0.1):
    '''
    Mutation Operator

    Parameters
    ----------
    chromosome : str
        The chromosome to be mutated
    mutation_rate : float
        The mutation rate
    gene_mutation_rate : float
        The gene mutation rate
    
    Returns
    -------
    str
        The mutated chromosome
    '''
    if random.uniform(0, 1) > mutation_rate:
        return chromosome
    
    chromosome = list(chromosome)
    options = string.digits + string.ascii_uppercase  + "_"
    # Perform global mutation
    for i in range(len(chromosome)):
        if random.normalvariate(0, 1) < gene_mutation_rate:
            chromosome[i] = random.choice(options)

    return ''.join(chromosome)


def crossover(parent1, parent2, r=0.5, crossover_rate=0.8,type='uniform'):
    '''
    Crossover (recombination) Operator
    
    Parameters
    ----------
    parent1 : str
        The first parent
    parent2 : str
        The second parent
    r : float
        Recombination factor for uniform crossover
    type : str
        The type of crossover to be performed. Can be 'uniform' or 'one-point'.

    Returns
    -------
    str
        The first offspring
    str
        The second offspring
    '''
    parent1= list(parent1)
    parent2 = list(parent2)

    if type == 'uniform':
        # Perform uniform crossover
        offspring1 = []
        offspring2 = []
        for i in range(len(parent1)):
            if random.uniform(0, 1) < r:
                offspring1.append(parent1[i])
                offspring2.append(parent2[i])
            else:
                offspring1.append(parent2[i])
                offspring2.append(parent1[i])
    
    elif type == 'one-point':
        # Perform one-point crossover
        point = random.randint(0, len(parent1))
        offspring1 = parent1[:point] + parent2[point:]
        offspring2 = parent2[:point] + parent1[point:]

    if random.uniform(0, 1) < crossover_rate:
        return ''.join(offspring1), ''.join(offspring2)
    else:
        return ''.join(parent1), ''.join(parent2)
    

def genetic_algorithm(target, options, population_size, generations, mutation_rate, selection_ratio, crossover_rate, crossover_type, verbose=False):
    '''
    Genetic Algorithm implementation
    
    Parameters
    ----------
    target : str
        The target password
    options : str
        The string representing the possible characters in the password (gene pool)
    population_size : int
        The size of the population
    generations : int
        The number of generations to run the algorithm for
    mutation_rate : float
        The mutation rate
    selection_ratio : float
        The ratio of the population to be selected for the mating pool
    crossover_type : str
        The type of crossover to be performed. Can be 'uniform' or 'one-point'
    verbose : bool
        True if the algorithm should print the best candidate at each generation

    Returns
    -------
    str
        The best candidate found by the algorithm
    int
        The number of generations the algorithm ran for (returns max generations if no solution is found) 

    '''

    # Create a population of random passwords
    population = []
    for i in range(population_size):
        population.append(''.join(random.choices(options, k=len(target))))
    # Iterate through the generations
    for gen in range(generations):
        # Sort the population by fitness
        fitness = get_normalised_fitness(population, target)
        population = sorted(population, key=lambda x: fitness[x], reverse=True)
        if gen % 5 == 0 and verbose:
            print(f"Generation {gen}: Best fitness - {fitness[population[0]]}, Best candidate - {population[0]}")
        if fitness[population[0]] == 1.0:
            if verbose:
                print(f"Generation {gen}: Password found - {population[0]}")
            return population[0], gen

        # Add the best candidates form the mating pool: Truncation Selection
        new_population = population[:int(selection_ratio * population_size)]
        mating_pool = new_population.copy()

        # While mating pool is not empty, perform crossover
        while len(mating_pool) > 0:
            # Select two parents
            parent1 = random.choice(mating_pool)
            mating_pool.remove(parent1)
            parent2 = random.choice(mating_pool)
            mating_pool.remove(parent2)
            # Perform crossover
            offspring1, offspring2 = crossover(parent1, parent2, crossover_rate, type=crossover_type)
            # Perform mutation
            offspring1 = mutate(offspring1, mutation_rate)
            offspring2 = mutate(offspring2, mutation_rate)
            # Add the offspring to the new population
            new_population.append(offspring1)
            new_population.append(offspring2)

        # Replace the old population with the new population
        population = new_population

    # Failure: Return the best candidate and max generations
    return population[0], generations

        


The code cell below uses the above function to demonstrate the working of the algorithm. 

In [147]:
Hyperparameters = {
    "population_size": 800,
    "generations": 10000,
    "mutation_rate": 0.2,
    "selection_ratio": 0.5,
    "crossover_type": 'uniform',
    "crossover_rate": 0.9,
    "verbose": True
}

# Run the genetic algorithm
student_password = "acw723"
target = get_password(student_password)
print(f"True password: {target}")
options = string.digits + string.ascii_uppercase  + "_"
print("###### Running Genetic Algorithm ######")
best_candidate, gen = genetic_algorithm(target, options, **Hyperparameters)

print(f"Found password: {best_candidate}. Number of reproductions: {gen}")


True password: POV_VCKYXT
###### Running Genetic Algorithm ######
Generation 0: Best fitness - 0.7125095818727778, Best candidate - GPTZC7LYSU
Generation 5: Best fitness - 0.7648682505043704, Best candidate - MVTYRBHYZW
Generation 10: Best fitness - 0.8204614621426476, Best candidate - NNVVLBHYWT
Generation 15: Best fitness - 0.883677169292898, Best candidate - PQU_TEKYUU
Generation 20: Best fitness - 0.9249340191432497, Best candidate - QOV_UCIYXW
Generation 25: Best fitness - 0.9708270017004211, Best candidate - PPV_UCKYXT
Generation 29: Password found - POV_VCKYXT
Found password: POV_VCKYXT. Number of reproductions: 29


## Section 3.3 

The following code cell calculates the mean and standard deviation of number of generations required to find the password.

In [132]:
def get_stats(target, options, hyperparameters, num_runs=10):
    num_generations = []
    for i in range(num_runs):
        # Setting off verbose to avoid printing the best candidate at each generation
        hyperparameters['verbose'] = False
        best_candidate, gen = genetic_algorithm(target, options, **hyperparameters)
        num_generations.append(gen)
    return np.mean(num_generations), np.std(num_generations)

mean, std = get_stats(target, options, Hyperparameters)
print(f"Mean number of generations: {mean}, Standard Deviation: {std}")

Mean number of generations: 24.4, Standard Deviation: 1.4966629547095767


## Section 3.4

As mentioned in the attached report, the implementation uses the Truncation Selection strategy. The code below computes the effect of changing the selection ratio on the number of generations required for the algorithm to converge. 

In [128]:
# Effect of selection ratio on the number of generations required to find the password
# Note that this code takes about 10 minutes to run

selection_ratios = [0.01, 0.05, 0.1, 0.2, 0.5, 0.8, 0.95, 0.99]
generation_stats = []
for selection_ratio in selection_ratios:
    Hyperparameters['selection_ratio'] = selection_ratio
    try:
        mean, std = get_stats(target, options, Hyperparameters, num_runs=10)
    except TypeError:
        print('Incorrect hyperparameters')
    generation_stats.append((mean, std))

# Print pandas daframe
df = pd.DataFrame(generation_stats, index=selection_ratios, columns=['mean', 'std'])
df.index.name = 'selection_ratio'
df

Unnamed: 0_level_0,mean,std
selection_ratio,Unnamed: 1_level_1,Unnamed: 2_level_1
0.01,10000.0,0.0
0.05,6719.8,3328.073851
0.1,5054.9,3489.990443
0.2,1378.7,1301.052731
0.5,26.8,2.039608
0.8,26.2,0.979796
0.95,80.1,162.308626
0.99,25.1,1.868154
