In [10]:
from sklearn import neighbors
import numpy as np
import networkx as nx
import time
import sklearn.manifold
from scipy.spatial import distance
from sklearn.decomposition import KernelPCA
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import check_array
from datetime import datetime
import random
from sklearn.utils.graph_shortest_path import graph_shortest_path
import pandas as pd
from scipy.spatial.distance import pdist, squareform
import multiprocessing as mp
import os
from sklearn.preprocessing import MinMaxScaler
from numpy.random import randint
from sklearn.metrics.pairwise import euclidean_distances

In [2]:
class SammonError:
    
    def __init__(self, full_matrix, reduced_matrix, k = 100, method = 'd'):
        
        self.k = k
        self.method = method
        self.full_matrix = full_matrix
        self.reduced_matrix = reduced_matrix
        self.value_of_stress = self.sammon_function()
        
    def sammon_function(self):
        old_distances = euclidean_distances(self.full_matrix, self.full_matrix)
        new_distances = euclidean_distances(self.reduced_matrix, self.reduced_matrix)
        differences = (old_distances-new_distances)**2
        coef = 1/np.sum(np.triu(old_distances, k=1))
        modified_old_distances = old_distances + np.eye(old_distances.shape[0])
        big_sum = np.sum(np.triu(np.true_divide(differences, modified_old_distances), k=1))
        value = coef*big_sum
        return value

In [3]:
class GA():
    
    
    def __init__(self, costFunc, n_dim, n_feat, data_set, num_population, maxiter, r_cross = 0.6, r_mut = 0.6):
        
        self.n_dim = n_dim
        self.n_feat = n_feat
        self.data = data_set
        self.n_pop = num_population
        self.result_list = self.genetic_algorithm(costFunc, n_dim, maxiter, num_population, r_cross, r_mut)
    
    @staticmethod
    def selection(pop, scores, k=3):
        # first random selection
        selection_ix = randint(len(pop))
        for ix in randint(0, len(pop), k-1):
            # check if better (e.g. perform a tournament)
            if scores[ix] < scores[selection_ix]:
                selection_ix = ix
        return pop[selection_ix]

    @staticmethod
    def crossover(p1, p2, r_cross):
        c1, c2 = p1.copy(), p2.copy() # children are copies of parents by default
        if np.random.rand() < r_cross:
            pt = np.random.randint(1, len(p1)-2) # select crossover point that is not on the end of the string
            c1 = list(p1[:pt]) + list(p2[pt:])
            c2 = list(p2[:pt]) + list(p1[pt:])
        return [np.array(c1), np.array(c2)]

    @staticmethod
    def mutation(chromosome):
        for i in range(len(chromosome)):
            r = np.random.randint(0, 1)
            gamma = np.random.random() 
            if r == 0:
                chromosome[i] = (chromosome[i] + gamma) % 1
        return chromosome
    
   
    def evaluate(self, costFunc, chromosome):
        indices = np.argsort(chromosome)[-self.n_feat:]
        reduced_matrix = self.data.iloc[:,indices]
        return costFunc(self.data, reduced_matrix).value_of_stress
    
    
    def genetic_algorithm(self, costFunc, n_bits, n_iter, n_pop, r_cross, r_mut):
        print('HERE')
        # initial population of random bitstring
        results = list()
        population = list()
        for i in range(0, self.n_pop):
            population.append([random.uniform(0,1) for _ in range(self.n_dim)])
            
        
            
        # keep track of best solution
        population_best, error_best = 0, self.evaluate(costFunc, population[0])
        
        
        
        # enumerate generations
        for gen in range(n_iter):
            t1 = datetime.now()
            print(t1)
            
            # evaluate all candidates in the population
            scores = [self.evaluate(costFunc, c) for c in population]
            # check for new best solution
            for i in range(n_pop):
                if scores[i] < error_best:
                    population_best, error_best = population[i], scores[i]
                    
            # select parents
            selected = [self.selection(population, scores) for _ in range(self.n_pop)]
            # create the next generation
            children = list()
            for i in range(0, self.n_pop, 2):
                # get selected parents in pairs
                p1, p2 = selected[i], selected[i+1]
                # crossover and mutation
                for c in self.crossover(p1, p2, r_cross):
                    # mutation
                    self.mutation(c)
                    # store for next generation
                    children.append(c)
            print(population_best, error_best)
            # replace population
            population = children
            t2 = datetime.now()
            print(t2)
            delta_t = t2-t1
            print(delta_t)
            results.append((self.n_feat, gen, population_best, error_best, delta_t))
        return results

In [None]:
n_iterations = #number-of-iterations
n_particles = #number-of-population
number_of_features = #initial-number-of-features
n_features = #desired-number-of-features
repo = #matrix representing the repository

results = [pool.apply(GA, args=row) for row in [(SammonError, number_of_features, n_features, 
                                                          repo, n_particles, n_iterations)]]