In [6]:
import math
import pickle
import time
import random
import numpy as np

import kmeans
import Cplex_CalculateFitness

import matplotlib.pyplot as plt
from matplotlib import figure

In [7]:
import concurrent.futures
import os
from functools import wraps
import requests

In [8]:
# variables
file = open("fitness_values.pickle", "rb")
fitness_values = pickle.load(file)
file.close()

solution_individuals = []
solution_costs = []

# ideas

- update mutation probability adaptively

# definitions

Gene: a city (represented as (x, y) coordinates)       
Individual (aka “chromosome”): a single route satisfying the conditions above        
Population: a collection of possible routes (i.e., collection of individuals)    
Parents: two routes that are combined to create a new route         
Mating pool: a collection of parents that are used to create our next population (thus creating the next generation of routes)   
Fitness: a function that tells us how good each route is (in our case, how short the distance is)       
Mutation: a way to introduce variation in our population by randomly swapping two cities in a route          
Elitism: a way to carry the best individuals into the next generation      

# decisions in ga

+ Chromosome representation or coding of an individual solution   
+ Fitness function   
- Population size, generation of initial population  
+ Parent selection for reproduction  
+ Crossover operator, crossover rate/probability (pc)  
+ Mutation operator, mutation probability (pm),  
+ Forming the population for next generation  
+ Stopping (convergence) condition  

key of a population dictionary: list(ind.ravel())

### k-means

size=(500, 100)
points = np.random.random(size=size)*10
clusters, centroids = kmeans.kmeans(points, k)

### generate initial population

x adet  initial population için 5x random çözüm bul ve bunların fitnesslarını ve birbiriyle olan benzemezliklerini hesapla. bu iki değere göre en iyi x taneyi initial olarak al.
sadece benzemezlik değerine göre al.

initial population alırken 5x feasible initial solution'u alıp dbscan ya da kmeans ile diversiy sonuçlar elde edebiliriz.

In [4]:
def initialize_population(pop_size):
    '''
    Initializing a population which has feasible and random individuals.
    
    Args:
        ind_size (tuple): size of each individual
        pop_size (int) : number of individuals will be created
        lower_limit (int) : the lower limit of each value can take
        upper_limit (int) : the upper limit of each value can take
    Returns:
        initial_population (array) : list of individuals formed by random integers
    '''

    initial_population = Cplex_CalculateFitness.generate_random_array(pop_size)
    
    # initial_population = np.random.randint(lower_limit, upper_limit, size=(pop_size, ind_size))
    
    return initial_population

In [43]:
def fitness(individual):
    
    key = str(individual)
    
    def cplex_cost(key, individual):
        try:
            fitness_values[key] = Cplex_CalculateFitness.calculate_fitness(individual)
        except:
            return 999999.9 # if a solution is not feasible then assign its obj func
        
    return fitness_values.get(key, cplex_cost(key, individual))

In [44]:
def make_parallel(func):
    """
        Decorator used to decorate any function which needs to be parallized.
        After the input of the function should be a list in which each element is a instance of input fot the normal function.
        You can also pass in keyword arguements seperatley.
        :param func: function
            The instance of the function that needs to be parallelized.
        :return: function
    """

    @wraps(func)
    def wrapper(lst):

        # the number of threads that can be max-spawned.
        # If the number of threads are too high, then the overhead of creating the threads will be significant.
        # Here we are choosing the number of CPUs available in the system and then multiplying it with a constant.
        # In my system, i have a total of 8 CPUs so i will be generating a maximum of 16 threads in my system.
        number_of_threads_multiple = 2 # You can change this multiple according to you requirement
        number_of_workers = int(os.cpu_count() * number_of_threads_multiple)
        
        print(number_of_workers)
        
        if len(lst) < number_of_workers:
            # If the length of the list is low, we would only require those many number of threads.
            # Here we are avoiding creating unnecessary threads
            number_of_workers = len(lst)
            print(number_of_workers)

        if number_of_workers:
            if number_of_workers == 1:
                # If the length of the list that needs to be parallelized is 1, there is no point in
                # parallelizing the function.
                # So we run it serially.
                result = [func(lst[0])]
            else:
                # Core Code, where we are creating max number of threads and running the decorated function in parallel.
                result = []
                with concurrent.futures.ThreadPoolExecutor(max_workers=number_of_workers) as executer:
                    bag = {executer.submit(func, i): i for i in lst}
                    for future in concurrent.futures.as_completed(bag):
                        result.append(future.result())
        else:
            result = []
        return result
    return wrapper

In [45]:
def calculate_fitness(population):
  
    res = make_parallel(fitness)(population)
    
    return res

In [46]:
pop = np.random.randint(0,100, size=(100,120))
start = time.time()
k1 = [fitness(i) for i in pop]
end =time.time()
print(end-start)

6.0723676681518555


In [47]:
start = time.time()
k2 = calculate_fitness(pop)
end =time.time()
print(end-start)

64
6.397441387176514


In [48]:
k1[0:5], k2[0:5]

([14.11600029991251,
  14.254000336058699,
  13.93900031635773,
  14.21200031765966,
  14.443000327754591],
 [14.11600029991251,
  14.254000336058699,
  14.377000372855369,
  14.383000332964771,
  14.683000376925733])

In [11]:
# technical settings
individual_size = 120
stop_thrs = 10

### algorithm settings

In [12]:
k_list = [5, 10, 20] 
population_size_list = [20,30,50,100]

# crossover parameters
crossover_probability_list = [0.5, 0.7, 1.0]
crossover_type_list = [1, 2, 3] 
fitness_function_list = ['cost', 'distance']

# must be even!
elitism_ratio_list = [0, 0.2, 0.4] 
max_parent_allowance_list = [0.125, 0.25, 0.5]

#mutation parameters
mutation_probability_list = [0, 0.05, 0.1]
mutation_rate_list = [0.05, 0.5] 
mutation_level_list = [0.1, 0.3, 0.5]

# algorithm settings
max_iter_list = [100, 500, 1000] 


### run with above parameters

In [None]:
stop

### tuning parameters

In [None]:
k = [5, 10, 20] 
population_size = [20,40, 100]

# crossover parameters
crossover_probability = [0.5, 0.7, 1.0]
crossover_type = [1, 2, 3] 
fitness_function = ['cost', 'distance']

# must be even!
elitism_ratio = [0, 0.2, 0.4] 
max_parent_allowance = [0.125, 0.25, 0.5]

#mutation parameters
mutation_probability = [0, 0.05, 0.1] # probability of mutation
mutation_rate = [0.05, 0.5] # rate of mutation for an individual
mutation_level = [0.1, 0.3, 0.5] # percentage of increase/decrease for a gene during mutation

# algorithm settings
max_iter = [100, 500, 1000] 
stop_thrs = 10

In [None]:
parameters = [
                individual_size, population_size, k,\
                max_iter, \
                fitness_function,  crossover_type, crossover_probability, \
                elitism_ratio, max_parent_allowance, \
                mutation_probability, mutation_rate, mutation_level, \
                max_iter, stop_thrs
             ]

In [None]:
plot_cost(costs) # costs func ile çalışır

In [None]:
results = run(parameters)

In [None]:
dataframe'in ilk kolonu run_number
her bir run'ı 10 kere al sonuçların averag'ını bul