In [39]:
%cd E:\DM\sample data
import numpy as np
import pandas as pd
import pyodbc
import datetime
from time import gmtime, strftime

E:\DM\sample data


In [2]:
# -*- coding: utf-8 -*-
"""
    pyeasyga module
"""

import random
import copy
from operator import attrgetter
import numpy as np
from time import gmtime, strftime
import matplotlib.pyplot as plt

from six.moves import range


class GeneticAlgorithm(object):
    """Genetic Algorithm class.
    This is the main class that controls the functionality of the Genetic
    Algorithm.
    A simple example of usage:
    >>> # Select only two items from the list and maximise profit
    >>> from pyeasyga.pyeasyga import GeneticAlgorithm
    >>> input_data = [('pear', 50), ('apple', 35), ('banana', 40)]
    >>> easyga = GeneticAlgorithm(input_data)
    >>> def fitness (member, data):
    >>>     return sum([profit for (selected, (fruit, profit)) in
    >>>                 zip(member, data) if selected and
    >>>                 member.count(1) == 2])
    >>> easyga.fitness_function = fitness
    >>> easyga.run()
    >>> print easyga.best_individual()
    """

    def __init__(self,
                 seed_data,
                 meta_data,
                 population_size=50,
                 generations=100,
                 crossover_probability=0.8,
                 mutation_probability=0.2,
                 elitism=True,
                 maximise_fitness=True):
        """Instantiate the Genetic Algorithm.
        :param seed_data: input data to the Genetic Algorithm
        :type seed_data: list of objects
        :param int population_size: size of population
        :param int generations: number of generations to evolve
        :param float crossover_probability: probability of crossover operation
        :param float mutation_probability: probability of mutation operation
        """

        self.seed_data = seed_data
        self.meta_data = meta_data
        self.population_size = population_size
        self.generations = generations
        self.crossover_probability = crossover_probability
        self.mutation_probability = mutation_probability
        self.elitism = elitism
        self.maximise_fitness = maximise_fitness

        self.current_generation = []

        def create_individual(data,meta_data):  
            individual = data[:]
            for col in individual.columns :                  
                individual[col] = np.random.choice(meta_data.index.values.tolist(),
                                                   size=len(individual))
            return individual
        
        def create_individual_elitism(data,meta_data, count):  
            individual = data[:]
            
            row, col = individual.shape
            if (count==0 and self.elitism):
                individual = data[:]                                                          
            else:
                for r in range(row):
                    crossover_index = (random.randrange(1, col - 1))
                    colt = crossover_index
                    individual.iloc[r] = np.append(individual.iloc[r, colt:],
                                                   individual.iloc[r, :colt]) 
#                print('else: ')
#                print(individual.loc[101,1])
            return individual



        def crossover(parent_1, parent_2):                       
            child_1, child_2 = parent_1, parent_2
            row, col = parent_1.shape
            for r in range(row):
                crossover_index = (random.randrange(1, col - 1))
                colt = crossover_index
                child_1.iloc[r] = np.append(parent_1.iloc[r, :colt],parent_2.iloc[r, colt:])    
                child_2.iloc[r] = np.append(parent_1.iloc[r , colt:],parent_2.iloc[r, :colt])            
            return child_1, child_2

        def mutate(individual):
            parent = individual
            row , col = parent.shape
            shift_list = np.flip(meta_data.index.values.tolist())
            for r in range(row):
                mutate_index1 = random.randrange(1, col)
                mutate_index2 = random.randrange(1, col)                
                parent.iloc[r][mutate_index1] = np.random.choice(shift_list,
                                                 p=[0.0,0.0,0.0,
                                                    0.05,0.15,0.35,0.45],
                                                 size=1)
                parent.iloc[r][mutate_index2] = np.random.choice(shift_list,
                                                 p=[0.0,0.0,0.0,
                                                    0.05,0.15,0.35,0.45],
                                                 size=1)
        

        def random_selection(population):
            """Select and return a random member of the population."""
            return random.choice(population)

        def tournament_selection(population):
            """Select a random number of individuals from the population and
            return the fittest member of them all.
            """
            if self.tournament_size == 0:
                self.tournament_size = 2
            members = random.sample(population, self.tournament_size)
            members.sort(
                key=attrgetter('fitness'), reverse=self.maximise_fitness)
            return members[0]

        self.fitness_function = None
        self.tournament_selection = tournament_selection
        self.tournament_size = self.population_size // 10
        self.random_selection = random_selection
        self.create_individual = create_individual_elitism
        self.crossover_function = crossover
        self.mutate_function = mutate
        self.selection_function = self.tournament_selection

    def create_initial_population(self):
        """Create members of the first population randomly.
        """
        initial_population = []
        for i in range(self.population_size):
            genes = self.create_individual(self.seed_data,self.meta_data, i)            
            individual = Chromosome(genes)
            initial_population.append(individual)
        self.current_generation = initial_population

    def calculate_population_fitness(self):
        """Calculate the fitness of every member of the given population using
        the supplied fitness_function.
        """
        for individual in self.current_generation:
            individual.fitness = self.fitness_function(
                individual.genes, self.meta_data)

    def rank_population(self):
        """Sort the population by fitness according to the order defined by
        maximise_fitness.
        """
        self.current_generation.sort(
            key=attrgetter('fitness'), reverse=self.maximise_fitness)
        print('best cost: ' + str(self.current_generation[0].fitness))
        

    def create_new_population(self):
        """Create a new population using the genetic operators (selection,
        crossover, and mutation) supplied.
        """
        new_population = []
        elite = copy.deepcopy(self.current_generation[0])
        selection = self.selection_function

        while len(new_population) < self.population_size:
            parent_1 = copy.deepcopy(selection(self.current_generation))
            parent_2 = copy.deepcopy(selection(self.current_generation))

            child_1, child_2 = parent_1, parent_2
            child_1.fitness, child_2.fitness = 0, 0

            can_crossover = random.random() < self.crossover_probability
            can_mutate = random.random() < self.mutation_probability

            if can_crossover:
                child_1.genes, child_2.genes = self.crossover_function(
                    parent_1.genes, parent_2.genes)

            if can_mutate:
                self.mutate_function(child_1.genes)
                self.mutate_function(child_2.genes)

            new_population.append(child_1)
            if len(new_population) < self.population_size:
                new_population.append(child_2)

        if self.elitism:
            new_population[0] = elite

        self.current_generation = new_population

    def create_first_generation(self):
        """Create the first population, calculate the population's fitness and
        rank the population by fitness according to the order specified.
        """
        self.create_initial_population()
        self.calculate_population_fitness()
        self.rank_population()

    def create_next_generation(self):
        """Create subsequent populations, calculate the population fitness and
        rank the population by fitness in the order specified.
        """
        self.create_new_population()
        self.calculate_population_fitness()
        self.rank_population()

    def run(self):
        """Run (solve) the Genetic Algorithm."""
        print('start: '+ strftime("%Y-%m-%d %H:%M:%S:%SS", gmtime()))
        self.create_first_generation()
        lagr_t = 0.0001
        for g in range(1, self.generations):
            print('---------- Start ---------------')            
            print('generation-' +str(g) + ' -> start: '+ strftime("%Y-%m-%d %H:%M:%S:%SS", gmtime()))
            if (g>100):
                self.crossover_probability = (g/self.generations)
                self.mutation_probability =  1.0 - (g/self.generations)
            self.create_next_generation()
            if (g/100 - g//100 == 0):
                csv_name = './output/out_GA_' + str(g/100) + '.csv'
                self.current_generation[0].genes.to_csv(csv_name)
            print('----------- End ----------------')

    def best_individual(self):
        """Return the individual with the best fitness in the current
        generation.
        """
        best = self.current_generation[0]        
        return (best.fitness, best.genes)

    def last_generation(self):
        """Return members of the last generation as a generator function."""
        return ((member.fitness, member.genes) for member
                in self.current_generation)


class Chromosome(object):
    """ Chromosome class that encapsulates an individual's fitness and solution
    representation.
    """
    def __init__(self, genes):
        """Initialise the Chromosome."""
        self.genes = genes
        self.fitness = 0

    def __repr__(self):
        """Return initialised Chromosome representation in human readable form.
        """
        return repr((self.fitness, self.genes))

In [32]:
chromosom_df = pd.read_csv('last.csv')
personnel_df = pd.read_csv('personnel.csv')
shift_df = pd.read_csv('shifts.csv')
day_req_df = pd.read_csv('WorkSectionRequirements.csv')
is_new = 0

In [33]:
chromosom_df = chromosom_df.merge(personnel_df, 
                                  left_on='PersonnelBaseId', 
                                  right_on='PersonnelBaseId', 
                                  how='inner')

 
chromosom_df = pd.pivot_table(chromosom_df, values='ShiftCode', 
                              index=['PersonnelBaseId',
                                      'prs_typ_id',
                                      'EfficiencyRolePoint',
                                      'RequirementWorkMins_esti'                                                                           
                                    ],
                              columns=['Day'], aggfunc=np.max)

In [35]:
# ----------------------- set personnel_df -----------------------------------#
personnel_df = personnel_df.set_index('PersonnelBaseId')
personnel_df['DiffNorm'] = 0
# ----------------------- set shift_df ---------------------------------------#
shift_df = shift_df.set_index('ShiftCode')
# ----------------------- set day_req_df -------------------------------------#
day_req_df = day_req_df.set_index(['Day','prs_typ_id','ShiftTypeID'])
day_req_df['day_diff_typ'] = 0
day_count =len(day_req_df.groupby(axis=0, level=0, as_index=True).count())
# -----------------------Randomize gene---------------------------------------#
if (is_new):      
    shift_list = np.flip(shift_df.index.values.tolist())   
    for prs in chromosom_df.index :       
        chromosom_df.loc[prs] = np.random.choice(shift_list,
                                                 p=[1/14,1/14,1/14,
                                                    1/14,2/14,3/14,5/14],
                                                 size=len(chromosom_df.columns))
     
# ---------------------- calcute typid_req_day---------------------------------------#
req_day = day_req_df.reset_index()
typid_req_day = req_day.groupby(['Day','prs_typ_id','ShiftTypeID']).agg(
                ReqMinCount = pd.NamedAgg(column='ReqMinCount', 
                                          aggfunc='sum'),
                ReqMaxCount = pd.NamedAgg(column='ReqMaxCount', 
                                          aggfunc='sum')
                )
typid_req_day['ReqMean'] = (typid_req_day['ReqMaxCount'] + 
                            typid_req_day['ReqMinCount'])/2   
# ---------------------- Calcute diff require and resource--------------------# 
                    #---------------sum_typid_req---------------#
sum_typid_req = typid_req_day.reset_index()          
sum_typid_req = sum_typid_req.groupby('prs_typ_id').agg(
            req_min  = pd.NamedAgg(column='ReqMinCount', 
                                          aggfunc='sum'), 
            req_max = pd.NamedAgg(column='ReqMaxCount', 
                                          aggfunc='sum'),
            req_mean= pd.NamedAgg(column='ReqMean', 
                                          aggfunc='sum'),            
        )
sum_typid_req = sum_typid_req[:]*480
                     #--------------sum_typid_prs----------------#
sum_typid_prs = personnel_df.groupby('prs_typ_id').agg(
            all_rec  = pd.NamedAgg(column='RequirementWorkMins_esti', 
                                          aggfunc='sum'), 
            count_prs = pd.NamedAgg(column='RequirementWorkMins_esti', 
                                          aggfunc='count'),
        )
                     #--------------sum_typid_prs----------------#
diff_req_rec = sum_typid_req.join(sum_typid_prs,how='inner')                   
diff_req_rec['diff_min'] = (diff_req_rec['req_min'] - 
                            diff_req_rec['all_rec'] )/diff_req_rec['count_prs'] 
diff_req_rec['diff_max'] = (diff_req_rec['req_max'] - 
                            diff_req_rec['all_rec'] )/diff_req_rec['count_prs'] 
diff_req_rec['diff_mean'] = (diff_req_rec['req_mean'] - 
                            diff_req_rec['all_rec'] )/diff_req_rec['count_prs']
#diff_req_rec = diff_req_rec.reset_index()

In [36]:
#------------------------fitness_day_const function for day-------------------# 
def calc_day_const (individual,meta_data):   
    df = individual           
    df = df[df['Length']>0].groupby(['Day',
                                     'prs_typ_id',
                                     'ShiftTypeID']).agg(
                        prs_count = pd.NamedAgg(column='Length', 
                                          aggfunc='count'), 
                        prs_points = pd.NamedAgg(column='EfficiencyRolePoint', 
                                          aggfunc='sum'),
                        )
    df = df.merge(meta_data, left_on=['Day','prs_typ_id','ShiftTypeID'], 
                  right_on=['Day','prs_typ_id','ShiftTypeID'], how='right') 
    df.fillna(0,inplace=True)
    df['diff_max'] = abs(df['prs_count'] - df['ReqMaxCount'])
    df['diff_min'] = abs(df['prs_count'] - df['ReqMinCount'])  
    df['diff'] = df[['diff_max','diff_min']].apply(np.min, axis=1)
    df['diff_norm'] = df['diff']/df['ReqMaxCount']
#    cost = np.mean(df['diff_norm'])
    df['diff_norm'] = df['diff_norm']**2
    cost = np.sum(df['diff_norm']) / len(df)
#    print('cost: ' + str(cost))
    return cost

#------------------------fitness_prs_const function---------------------------# 
def calc_prs_const (individual, meta_data):
    df = individual    
    df = df.groupby(['PersonnelBaseId',
                      'prs_typ_id',
                      'EfficiencyRolePoint',
                      'RequirementWorkMins_esti',                      
                     ]).sum().drop(columns=['ShiftCode', 'StartTime', 
                                            'EndTime', 'ShiftTypeID'])    
    df = df.reset_index()
    meta_data = meta_data.reset_index()
    df = df.merge(meta_data, left_on='prs_typ_id', right_on='prs_typ_id'
                  ,how='inner')
    
    df['diff'] = abs(df['RequirementWorkMins_esti'] + 
                     df['diff_min'] - df['Length'])         
    df['diff_norm'] = df['diff']/df['RequirementWorkMins_esti']
#    cost = np.mean(df['diff_norm'])    
    df['diff_norm'] = df['diff_norm']**2
    cost = np.sum(df['diff_norm']) / len(df)
#    print('cost: ' + str(cost))
    return cost 
# ----------------------- fitness all ----------------------------------------#
def fitness (individual, meta_data):
    sht = shift_df.reset_index()
    sht_2 = sht[sht['ShiftCode']>10]
    sht_2['Length'] = sht_2['Length'] // 2
    sht_2['ShiftTypeID'] = sht_2['ShiftTypeID'] // 10
    sht_2.index = [7,8,9]
    sht['Length'] = sht['Length'] // 2
    sht['ShiftTypeID'] = sht['ShiftTypeID'] % 10
    sht = sht.append(sht_2)
    #sht[sht['ShiftCode']>10]
    df = pd.melt(individual.reset_index(), 
                 id_vars=['PersonnelBaseId',
                          'prs_typ_id',
                          'EfficiencyRolePoint',
                          'RequirementWorkMins_esti',
                          
                         ],
                 var_name='Day', 
                 value_name='ShiftCode')
    df = df.merge(sht, left_on='ShiftCode', right_on='ShiftCode', how='inner')
    day_const = 0.8*calc_day_const(df, typid_req_day)
    prs_const = 0.2*calc_prs_const(df, diff_req_rec)
    cost = day_const + prs_const
    return cost

In [40]:
# -----------------------Define GA--------------------------------------------#   
ga = GeneticAlgorithm( seed_data=chromosom_df,
                          meta_data=shift_df,
                          population_size=50,
                          generations=20,
                          crossover_probability=0.8,
                          mutation_probability=0.2,
                          elitism=True,
                          maximise_fitness=False)
 
 # -----------------------run ga----------------------------------------------# 
ga.fitness_function = fitness         # set the GA's fitness function
start_time = datetime.datetime.now()
ga.run()                                    # run the GA
start_time = datetime.datetime.now()
sol_fitness, sol_df = ga.best_individual()

#########################################################
sht = shift_df.reset_index()
sht_2 = sht[sht['ShiftCode']>10]
sht_2['Length'] = sht_2['Length'] // 2
sht_2['ShiftTypeID'] = sht_2['ShiftTypeID'] // 10
sht_2.index = [7,8,9]
sht['Length'] = sht['Length'] // 2
sht['ShiftTypeID'] = sht['ShiftTypeID'] % 10
sht = sht.append(sht_2)
df = pd.melt(sol_df.reset_index(), 
             id_vars=['PersonnelBaseId',
                      'prs_typ_id',
                      'EfficiencyRolePoint',
                      'RequirementWorkMins_esti',
                     
                     ],
             var_name='Day', 
             value_name='ShiftCode')
df = df.merge(sht, left_on='ShiftCode', right_on='ShiftCode', how='inner')
#######################################################
prs_cons = df.groupby(['PersonnelBaseId',
                      'prs_typ_id',
                      'EfficiencyRolePoint',
                      'RequirementWorkMins_esti',
                      
                     ]).sum().drop(columns=['ShiftCode', 'StartTime', 
                                            'EndTime', 'ShiftTypeID'])
prs_cons = prs_cons.reset_index(level=3)
prs_cons['diff'] = (prs_cons['RequirementWorkMins_esti'] - prs_cons['Length'])
#########################################################3

day_cons = df[df['Length']>0].groupby(['Day',
                                       'prs_typ_id',
                                       'ShiftTypeID']).agg(
                              prs_count = pd.NamedAgg(column='Length', 
                                          aggfunc='count'), 
                              prs_points = pd.NamedAgg(column='EfficiencyRolePoint', 
                                          aggfunc='sum'),
                            )
                              
day_cons = day_cons.merge(typid_req_day, 
                          left_on=['Day','prs_typ_id','ShiftTypeID'], 
                          right_on=['Day','prs_typ_id','ShiftTypeID'], 
                          how='right') 
day_cons.fillna(0,inplace=True)            
day_cons['diff_max'] = abs(day_cons['prs_count'] - day_cons['ReqMaxCount'])
day_cons['diff_min'] = abs(day_cons['prs_count'] - day_cons['ReqMinCount'])  
day_cons['diff'] = day_cons[['diff_max','diff_min']].apply(np.min, axis=1) 
day_cons.sort_index(axis=0, level=[0,1,2], ascending=True, inplace=True)

start: 2019-11-05 15:58:21:21S


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


best cost: 0.15117392241528005
---------- Start ---------------
generation-1 -> start: 2019-11-05 15:58:27:27S
best cost: 0.1080436954140853
----------- End ----------------
---------- Start ---------------
generation-2 -> start: 2019-11-05 15:58:33:33S
best cost: 0.10663982731595369
----------- End ----------------
---------- Start ---------------
generation-3 -> start: 2019-11-05 15:58:38:38S
best cost: 0.0945543047192709
----------- End ----------------
---------- Start ---------------
generation-4 -> start: 2019-11-05 15:58:44:44S
best cost: 0.0945543047192709
----------- End ----------------
---------- Start ---------------
generation-5 -> start: 2019-11-05 15:58:52:52S
best cost: 0.08760398062270679
----------- End ----------------
---------- Start ---------------
generation-6 -> start: 2019-11-05 15:58:59:59S
best cost: 0.08231314481143201
----------- End ----------------
---------- Start ---------------
generation-7 -> start: 2019-11-05 15:59:08:08S
best cost: 0.081538167650734

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [44]:
day_cons.to_csv('./output/day_cons.csv')
prs_cons.to_csv('./output/prs_cons.csv')
sol_df.to_csv('./output/sol_df.csv')

In [43]:
diff_req_rec

Unnamed: 0_level_0,req_min,req_max,req_mean,all_rec,count_prs,diff_min,diff_max,diff_mean
prs_typ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,119040,133920,126480.0,75710,7,6190.0,8315.714286,7252.857143
2,29760,59520,44640.0,21580,2,4090.0,18970.0,11530.0
