In [None]:

# Import all the required packages
import sys
import time
import warnings
from random import choices

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    
#####                                                    #####   
##### ===== Task 1: Data Reading and Preprocessing ===== #####
#####                                                    #####

# Reading the data
url = "dataset.csv"
data = pd.read_csv(url)

# Transposing the data frame to make it easier to process data with job numbers
data = data.transpose()

# Changing the name of the columns to clarify columns are for machine numbers
cols = data.columns
cols = [F'M{i+1}' for i in cols]
data.columns = cols

# Checking data types
data.head()
data.info()

# Counting the number of missing values and negative values
print(F"\nThere are {data.isna().sum().sum()} missing values")
print(F"\nThere are {data[data < 0].count().sum()} negative values")

# Calculating the total time for each jobs
data['totalTime'] = data.apply(np.sum, axis = 1)

# Finding Outliers by using Q1 -1.5*IQR and Q1+1.5*IQR rule
# Calculating outliers threshold
def calOutliers(dataCol):
    Q1 = dataCol.quantile(.25, interpolation='midpoint')
    Q3 = dataCol.quantile(.75, interpolation='midpoint')
    IQR = Q3 - Q1

    lowerThreshold = float(Q1 - (1.5 * IQR))
    upperThreshold = float(Q3 + (1.5 * IQR))

    return lowerThreshold, upperThreshold

# Assigning variables for lower and upper outlier thresholds of the total time
lower, upper = calOutliers(data['totalTime'])
print(F"\nOutliers lie below {lower} and above {upper}")


# Function to draw a boxplot and histogram
def plot(data, bins=None,
                  title = None,  
                  stat=None):
    
    # setting the style
    sns.set(style = 'ticks')
    
    # drawing two subplots
    fig, (ax_boxplot, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)}, figsize=(9,8))
    
    
    # Creating a boxplot and histogram with seaborn package
    sns.boxplot(data,ax=ax_boxplot, whis=1.55)
    sns.histplot(data, ax=ax_hist, kde=True, stat=stat, bins=bins)
    
   # Drawing a mean
    ax_hist.axvline(np.mean(data), linestyle = 'dashed', color = 'r')
    plt.text(np.mean(data), 20, 'mean', color ='r', rotation = 270)
    
    
    # Calling global threshold values 
    global lower, upper
    
    # Drawing a lower boundary line
    ax_hist.axvline(lower+0.5, color = 'r',)
    plt.text(lower+10, 20, 'lower boundary', color = 'r', rotation = 270)
    
    # Drawing an upper boundary line
    ax_hist.axvline(upper+0.5, color = 'r')
    plt.text(upper+10, 20, 'upper boundary', color = 'r', rotation = 270)
    fig.suptitle(title, fontsize=16)
    
    plt.show()

# Box plot and histogram before removing the outliers
plot(data['totalTime'], bins=50, title = 'Outliered Jobs based on Total Time', stat='count')

# Removing outliers lower and upper the boundaries
isOutliers = (data['totalTime'] < lower) | (data['totalTime'] > upper)
outliers = data[isOutliers]
data = data[~isOutliers]

print(F"\nThere are {len(outliers.index)} outliers.")
print(F"\nThere are {len(data.index)} jobs left after removing outliers.")

#Plotting the result after the removal of the outliers
plot(data['totalTime'], bins=50,  stat='count', title = 'After Removal of Outliered Jobs')

# Printing out outliers Jobs and their processing time
print('\nThe jobs removed as outliers and their total time:\n',
      pd.DataFrame(outliers['totalTime']))

# Setting all the negative values to NAN 
data[data < 0] = np.nan

# Calculating the avearage processing time of each job
data['averageTime'] = data.drop(columns='totalTime').apply(
    lambda x: np.round(np.mean(x), 2), axis=1)
data.head()

# Filling negative and missing values with the average processing time of each jobs
data.T.fillna(data['averageTime'], inplace=True)

# Checking the preprocessed data
data.head()
data.info()

#####                                                                     #####  
##### ===== Task 2 : create 3 samples of jobs with n=10, 50 and 100 ===== #####
#####                                                                     #####
  
#Removing 'totalTime', 'averageTime' columns for further processing
data.drop(columns=['totalTime', 'averageTime'], inplace=True)
data.head()

# Creating 3 sample sets randomly: sample_1 (10 jobs), sample_2 (50 jobs), sample_3 (100 jobs)
sample_1 = data.sample(10, random_state=42)
sample_2 = data.sample(50, random_state=42)
sample_3 = data.sample(100, random_state=42)

#####                                   #####
##### ==== Task 3 : Random Search ===== #####
#####                                   #####
  
# Function to calculate the makespan (from evaluation.py)
def calMakespan(s, P):
    # create a matrix of job sequences 
    C = P[s, :]
    n, m = C.shape
   
    # Eq 1: Completion time of first job on all machines.
    C[0, :] = np.cumsum(C[0, :])
    
    # Eq 2: Completion time of each job k on first machine.
    C[:, 0] = np.cumsum(C[:, 0])
   
    # loop 1: for each job
        #loop 2: for each machine
    for i in range(1, n):
        for j in range(1, m):
            
            # Eq 3. C[i,j] already contains P[s[i], j]
            C[i, j] += np.maximum(C[i - 1, j], C[i, j - 1])
    
    # makespan of all jobs completion on all machines
    return C[-1, -1]

# Random Search function with parameters n: N(sample size)*1000,
# df: dataset that should be optimized, and seed for initializing random variables
def RandomSearch(n, df, seed=None):
    
    # Converting the DataFrame into a NumPy array, so it can be used by makespan function
    dataArray = np.array(df)
    
    # Number of jobs
    N = dataArray.shape[0]
    
    # Randomly choosing the first sequence from a permutation of jobs (number of jobs) from the given dataset,
    # and calculating the respective makespan
    np.random.seed(seed=seed)
    sBest = np.random.permutation(N)
    makespanBest = calMakespan(sBest, dataArray)
    
    #Repeating the process n= N*1000 times
    for _ in range(n):
        
        # Randomly choosing new sequence and calculating the respective makespan
        s = np.random.permutation(N)
        makespanThis = calMakespan(s, dataArray)
        
        # Comparing the new makespan with the previous best makespan.
        # If the new makespan is lower (better) than the previous makespan, the sequence and makespan are updated
        if makespanThis < makespanBest:
            makespanBest = makespanThis
            sBest = s
    
    # empty list to store results    
    jobs = list()
    #Matching the sequence index with jobs from the given dataset (df) 
    for job in sBest:
        jobs.append(df.reset_index(drop=False).loc[job]['index'])
        
    return jobs, makespanBest


# Function to run Random Search for each dataset 30 times by default with different random seeds
def RunRandomSearch(dataset, numRun=30):
    # Creating empty lists to store the results
    timeT = list()
    jobs = list()
    makespans = list()
    # Iterating ‘numRun’ times (30 by default)
    for i in range(numRun):
        start = time.time()
        n = len(dataset.index)
        # Calling RandomSearch function to test with n*1000 solutions
        job, makespan = RandomSearch(1000 * n, dataset, seed=i)
        jobs.append(job)
        makespans.append(makespan)
    # Keep track of runnings and  time
        print(F"Run {i+1} completed in: {time.time() - start:.2f} seconds.")
        t = time.time() - start
        timeT.append(t)        
    # Storing the result of each run
    result = pd.DataFrame({'jobSequence': jobs, 'makespan': makespans, 'sampleSize':n, 'timeTaken':timeT})
    
    return result

# Getting the results of Random Search for each sample:
RSresult_sample1 = RunRandomSearch(sample_1, numRun=30)
RSresult_sample2= RunRandomSearch(sample_2, numRun=30)
RSresult_sample3 = RunRandomSearch(sample_3, numRun=30)

# Function to return statistics for every result
def ReturnStats(dataset, name=None):
    # For initial results
    if name != None:
        stat = pd.DataFrame(np.round((dataset['makespan'].describe()),2)).T
        stat = stat.rename(index={'makespan':name}).sort_values(by='mean')
        stat = stat.drop(columns = ['count', '50%', '25%','75%'])
    # For results with changing parameters
    else:
        stat = pd.DataFrame(np.round((dataset['makespan'].describe()),2))
        stat = stat.drop(columns = ['count', '50%', '25%','75%'])
    
    return stat

# Calling the function for each Random Search result
table1 = ReturnStats(RSresult_sample1, name='RS_Sample_10')
table2 = ReturnStats(RSresult_sample2, name='RS_Sample_50')
table3 = ReturnStats(RSresult_sample3, name='RS_Sample_100')

# Combining and sort tables with statistics
RandSearchAll = pd.concat([table1, table2, table3],axis=0).sort_values(by='mean')
print('\nStatistical Table for Random Search algorithm results for each size problem:\n', RandSearchAll)


#####                                       ##### 
##### ==== Task 4 : Genetic Algorithm ===== #####
#####                                       ##### 

##The Genetic Algorithm function use the first algorithm described in the coursework: 1-point Cross-over with Exchange Mutation, and takes the following parameters:
## df: dataset (DataFrame) that should be optimized
## P: population (default value = 30)
## Pc: probability of crossover (default value = 1)
## Pm_init: initial probability of mutation (default value = 0.8)
## D: Threshold (default value = 0.95)
## seed: the seed we will use to initialize random variables
## seval: to adjust sthe number of solution evaluations

# Genetic Algorithm Function with default parameters P (population size), Pc, Pm_init
# and D; seed and seval(from solution evaluations calculus)
def GeneticAlgorithm(df, P=30, Pc=1, Pm_init=0.8, D=0.95, seed=None, seval=1000):    
    # Setting theta parameter
    theta = 0.99
    # Setting random seed to default
    np.random.seed(seed=seed)
    # Converting the DataFrame into a NumPy array, so it can be used by makespan function
    dataArray = np.array(df)
    # Counting number of jobs
    N = dataArray.shape[0]
    
    # Creating a dataframe with M sequences (permutations) of N jobs
    sequence = pd.DataFrame(np.array([np.random.permutation(N) for i in range(P)]))
    # Calculating the makespans for all the sequences
    makespanList = sequence.apply(lambda x: calMakespan(x, dataArray), axis=1)
    sequence['makespan'] = makespanList
    # Sorting the sequences according the makespan in descending order 
    sequence = sequence.sort_values(by="makespan", ascending=False, ignore_index=True)
    
    # Calculating the fitness rank (in descending order) with the given formula to initialize the algorithm
    sequence['fitness'] = (sequence.index+1) *2 / (P * (P + 1))
    #Variables population and weights are used for choosing the parents according to the fitness distribution
    population = sequence.index
    weights = sequence['fitness']
    
    # Setting the probability of mutation to the initial
    Pm = Pm_init
     #Setting the number of evaluations to N*seval(*1000 by defaul) for each run, according to the sample size	
    for i in range(N*seval):
        #1-point Crossover section:
        if np.random.random() < Pc:
            # Choosing the parents, first parent with fitness rank and second parent randomly
            chosen_fit = choices(population, weights)[0]
            parent_1 = np.array(sequence.iloc[chosen_fit, 0:N])
            parent_2 = np.array(sequence.iloc[np.random.randint(0,P), 0:N])
            # Checking that parents 1 and 2 are different. if this is true, choose a second parent again
            while np.array_equal(parent_1, parent_2):
                parent_2 = np.array(sequence.iloc[np.random.randint(0,P), 0:N])

            # Choose one crossover point randomly
            crossover_point = np.random.randint(1,N)
            
            # For offsprings, keep the elements before the crossover point of the parent sequences (absolute position)
            offspring_1 = parent_1[0:crossover_point]
            offspring_2 = parent_2[0:crossover_point]
            
            # Crossover with the relative position of the post-crossover section from the parents: 
                #copying all the elements of the other parent that are not already in the offspring after the crossover point
            for i in range(0,N):
                if (parent_2[i] not in offspring_1[:crossover_point]):
                    offspring_1 = np.append(offspring_1, parent_2[i])

            for i in range(0,N):
                if (parent_1[i] not in offspring_2[:crossover_point]):
                    offspring_2 = np.append(offspring_2, parent_1[i])
       
        # Setting the offsprings in case Pc < 1 (when testing parameters), to avoid an error when no crossover happens
        else:
            chosen_fit = choices(population, weights)[0]
            offspring_1 = np.array(sequence.iloc[chosen_fit, 0:N])
            offspring_2 = np.array(sequence.iloc[np.random.randint(0,P), 0:N])
            
        # Exchange mutation section:
        if np.random.random() < Pm:
            # Choosing two random positions for exchange mutation
            pos_1 = np.random.randint(0,N)
            pos_2 = np.random.randint(0,N)
            #Choosing second position again, when it is the  same than first position
            while pos_2 == pos_1:
                pos_2 = np.random.randint(0,N)
            
            # Extracting elements in offsprings with first and second positions
            ele_1A = offspring_1[pos_1]
            ele_1B = offspring_1[pos_2]
            ele_2A = offspring_2[pos_1]
            ele_2B = offspring_2[pos_2]
            # Saving the exchanged elements into offsprings
            offspring_1[pos_1], offspring_1[pos_2] = ele_1B, ele_1A
            offspring_2[pos_1], offspring_2[pos_2] = ele_2B, ele_2A
        
        # Converting the elements inside the offspring into integers
        offspring_1 = offspring_1.astype(int)
        offspring_2 = offspring_2.astype(int)

        # Evaluating the sequences and calculating the makespans for each offspring
        makespan_1 = calMakespan(offspring_1, dataArray)
        makespan_2 = calMakespan(offspring_2, dataArray)
        
        # Choosing 2 different sequences below the median (worse makespans) for exchanging offsprings
        old_pos_1 = choices(population,weights)[0]
        while sequence.iloc[old_pos_1,N+1] > np.median(weights):
            old_pos_1 = choices(population,weights)[0]
    
        old_pos_2 = choices(population,weights)[0]
        while (sequence.iloc[old_pos_2, N+1] > np.median(weights)) or (old_pos_1 == old_pos_2):
            old_pos_2 = choices(population,weights)[0]
    
        # Dropping the two selected sequences and replacing them with the offsprings and respective makespan
        sequence.iloc[old_pos_1,0:N] = offspring_1
        sequence.iloc[old_pos_1,N] = makespan_1
        sequence.iloc[old_pos_2,0:N] = offspring_2
        sequence.iloc[old_pos_2,N] = makespan_2
      
        # Sorting the population with the new sequences (offsprings) and makespans, 
            # and calculating the fitness rank again to initialize the next evaluation
        sequence = sequence.sort_values(by = "makespan", ascending = False, ignore_index = True)
        sequence['fitness'] = (sequence.index+1) *2 / (P * (P + 1))
        
        # Updating the value of Pm (probability of mutation)
        Pm = theta*Pm
        
        #Checking D value, to reinitialize the value of Pm (probability of mutation)
        if sequence['makespan'].min()/sequence['makespan'].mean() > D:
            Pm = Pm_init
    # Choosing the best sequence and best makespan from the last position of the result data frame 
        # (the data frame is sorted by fitness rank)
    sBest = sequence.iloc[P-1,0:N]
    makespanBest = sequence.iloc[P-1,N]
    
    jobs = list()
    
    for job in sBest:
        jobs.append(df.reset_index(drop=False).loc[job]['index'])

    return jobs, makespanBest

# Function to run Genetic Algorithm 30 times with default parameters
def RunGeneticAlgorithm(dataset, numRun=30, P=30, Pc=1, Pm_init=0.8, D=0.95, parameter=None, seval=1000):
    # Creating empty lists to store the result
    timeT = list()
    jobs = list()
    makespans = list()
    for i in range(numRun):
        start = time.time()
        n = len(dataset.index)
        # Calling Genetic Algorithm
        job, makespan = GeneticAlgorithm(dataset, P=P, Pc=Pc, Pm_init=Pm_init, D=D, seed=i, seval=seval)
        jobs.append(job)
        makespans.append(makespan)
      # Helping to track the time it takes to process each run
        print(F"Run {i+1} completed in: {time.time() - start:.2f} seconds.")
        t = time.time() - start
        timeT.append(t)        
    # Storing the result of each run
    if parameter == None:
        result = pd.DataFrame({'jobSequence': jobs, 'makespan': makespans, 'sampleSize':n, 'timeTaken':timeT})
    # Store the result of each run when changing the parameter
    else:
        result = pd.DataFrame({'jobSequence': jobs, 'makespan': makespans, 'sampleSize':n, 'parameter':parameter, 'timeTaken':timeT})   
    
    return result

# Run Genetic Algorithm with initial parameter 30 times for each sample size
GAresult_sample1 = RunGeneticAlgorithm(sample_1, numRun = 30)
GAresult_sample2 = RunGeneticAlgorithm(sample_2, numRun = 30)
GAresult_sample3 = RunGeneticAlgorithm(sample_3, numRun = 30)

# Complete statistical table for each problem size
table11 = ReturnStats(GAresult_sample1, name='GA_Sample_10')
table22 = ReturnStats(GAresult_sample2, name='GA_Sample_50')
table33 = ReturnStats(GAresult_sample3, name='GA_Sample_100')

# Combining, sorting and printing table with statistics
GA_initial = pd.concat([table11, table22, table33],axis=0).sort_values(by='mean')
print('\nStatistical Table for Genetic Algorithm results for each size problem:\n', GA_initial)

# Combining and comparing with Random Search results respective to the sample size problem
GA_RndSearch_sample1 = pd.concat([table1, table11], axis=0).sort_values(by='mean')
GA_RndSearch_sample2 = pd.concat([table2, table22], axis=0).sort_values(by='mean')
GA_RndSearch_sample3 = pd.concat([table3, table33], axis=0).sort_values(by='mean')

# Printing the tables
print('\nStatistical Comparison of results between Random Search and Genetic Algorithms for problem size 10:\n', GA_RndSearch_sample1)
print('\nStatistical Comparison of results between Random Search and Genetic Algorithms for problem size 50:\n', GA_RndSearch_sample2)
print('\nStatistical Comparison of results between Random Search and Genetic Algorithms for problem size 100:\n', GA_RndSearch_sample3)



#####                                                                 ######  
##### ==== Task 5 : Genetic Algorithm with different parameters ===== ######
#####                                                                 ######  

#### Boxplot function to visualise the effect of changing parameters on the datasets
def result_plt(df, x=None, y=None, title = None):
    
    # set the style
    sns.set(style = 'ticks')
    
    fig, ax = plt.subplots(figsize = (15, 10))
    # create boxplot  with seaborn package. 
    sns.boxplot(data=df, x=x,y=y, ax=ax, width=.3)
    fig.suptitle(title, fontsize=16)
   
    plt.show()
    
# Get the Random Search results set for the boxplots
RSresult_sample1['parameter']='RS'
RSresult_sample2['parameter']='RS'
RSresult_sample3['parameter']='RS'

# Creating lists with parameters to change
PList=[5,10,20,50,100]
PcList = [0, 0.5, 0.7, 0.9]
Pm_initList = [0, 0.2, 0.4, 0.6, 1]
DList = [0, 0.5, 1]

#### Changing Parameters for Sample 1 #### 
# changing one parameter, others are fixed to initial #

### P ### Changing the Population size (P)
sample1_P = pd.DataFrame()
for i in PList:
    sample1_P = sample1_P.append(RunGeneticAlgorithm(sample_1, P=i, parameter='P'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['P30_init']
STsample1_P = ReturnStats(sample1_P.groupby('parameter')).append(table11.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different population size parameters (sample 1):\n', STsample1_P)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample1['parameter']= 'P30_init'
sample1_Pall = pd.concat([RSresult_sample1,GAresult_sample1,sample1_P], axis = 0)

# Calling the Boxplot to do visual comparison
result_plt(sample1_Pall, x='parameter', y='makespan', title='Population Parameter effect on Sample 1')


### Pc ### Changing the Crossover rate (Pc)
sample1_Pc = pd.DataFrame()
for i in PcList:
    sample1_Pc = sample1_Pc.append(RunGeneticAlgorithm(sample_1, Pc=i, parameter='Pc'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['Pc1_init']
STsample1_Pc = ReturnStats(sample1_Pc.groupby('parameter')).append(table11.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different crossover probability parameters (sample 1):\n', STsample1_Pc)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample1['parameter']= 'Pc1_init'
sample1_Pcall = pd.concat([RSresult_sample1, GAresult_sample1, sample1_Pc], axis = 0)

# Call the Boxplot to compare
result_plt(sample1_Pcall, x='parameter', y='makespan', title='Crossover Probability Parameter effect on Sample 1')


### Pm_init ### Changing the Initial Mutation rate (Pm_init)
sample1_Pm_init = pd.DataFrame()
for i in Pm_initList:
    sample1_Pm_init = sample1_Pm_init.append(RunGeneticAlgorithm(sample_1, Pm_init=i, parameter='Pm'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['Pm0.8_init']
STsample1_Pm_init = ReturnStats(sample1_Pm_init.groupby('parameter')).append(table11.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different mutation propability (sample 1):\n', STsample1_Pm_init)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample1['parameter']= 'Pm0.8_init'
sample1_Pmall = pd.concat([RSresult_sample1, GAresult_sample1, sample1_Pm_init], axis = 0)

# Call the Boxplot to compare
result_plt(sample1_Pmall, x='parameter', y='makespan', title='Mutation Probability Parameter effect on Sample 1')


### D ### Changing the Threshold value (D)
sample1_D = pd.DataFrame()
for i in DList:
    sample1_D = sample1_D.append(RunGeneticAlgorithm(sample_1, D=i, parameter='D'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['D0.95_init']
STsample1_D = ReturnStats(sample1_D.groupby('parameter')).append(table11.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different threshold (sample 1):\n', STsample1_D)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample1['parameter']= 'D0.95_init'
sample1_Dall = pd.concat([RSresult_sample1, GAresult_sample1, sample1_D], axis = 0)

# Calling the Boxplot to compare
result_plt(sample1_Dall, x='parameter', y='makespan', title='Threshold Parameter effect on Sample 1')


#### Changing Parameters for Sample 2 ####

### P = {50, 100} ### Changing the Population size P
sample2_P = pd.DataFrame()
for i in PList[-2:]:
    sample2_P = sample2_P.append(RunGeneticAlgorithm(sample_2, P=i, parameter='P'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['P30_init']
STsample2_P = ReturnStats(sample2_P.groupby('parameter')).append(table22.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different population size parameters (sample 2):\n', STsample2_P)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample2['parameter']= 'P30_init'
sample2_Pall = pd.concat([RSresult_sample2,GAresult_sample2,sample2_P], axis = 0)

# Calling the Boxplot to compare
result_plt(sample2_Pall, x='parameter', y='makespan', title='Population Parameter effect on Sample 2')


### Pm_init = {0.6, 1.0} ### Changing the Initial Mutation rate Pm_init
sample2_Pm_init = pd.DataFrame()
for i in Pm_initList[-2:]:
    sample2_Pm_init = sample2_Pm_init.append(RunGeneticAlgorithm(sample_2, Pm_init=i, parameter='Pm'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['Pm0.8_init']
STsample2_Pm_init = ReturnStats(sample2_Pm_init.groupby('parameter')).append(table22.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different mutation propability(sample 2):\n', STsample2_Pm_init)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample2['parameter']= 'Pm0.8_init'
sample2_Pmall = pd.concat([RSresult_sample2, GAresult_sample2, sample2_Pm_init], axis = 0)

# Calling the Boxplot to compare
result_plt(sample2_Pmall, x='parameter', y='makespan', title='Mutation Probability Parameter effect on Sample 2')


### D = {0, 0.5} ### Changing the Threshold value D
sample2_D = pd.DataFrame()
for i in DList[:2]:
    sample2_D = sample2_D.append(RunGeneticAlgorithm(sample_2, D=i, parameter='D'+str(i)))

# Returning statistical table to compare the results to the initial parameter
initial=['D0.95_init']
STsample2_D = ReturnStats(sample2_D.groupby('parameter')).append(table22.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different threshold (sample 2):\n', STsample2_D)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample2['parameter']= 'D0.95_init'
sample2_Dall = pd.concat([RSresult_sample2, GAresult_sample2, sample2_D], axis = 0)

# Calling the Boxplot to compare
result_plt(sample2_Dall, x='parameter', y='makespan', title='Threshold Parameter effect on Sample 2')


#### Changing Parameters for Sample 3 ####

### P = {100} ### Changing the Population size P
sample3_P = RunGeneticAlgorithm(sample_3, numRun=3, P=100, parameter='P100')
           
# Returning statistical table to compare the results to the initial parameter 
initial = ['P30_init']
STsample3_P = ReturnStats(sample3_P, name='P100').append(table33.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different population size parameters (sample 3):\n', STsample2_P)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample3['parameter']= 'P30_init'
sample3_Pall = pd.concat([RSresult_sample3,GAresult_sample3,sample3_P], axis = 0)

# Calling the Boxplot to compare
result_plt(sample3_Pall, x='parameter', y='makespan', title='Population Parameter effect on Sample 3')


### Pm_init = 1.0} ### Changing the Initial Mutation rate Pm_init
sample3_Pm_init = RunGeneticAlgorithm(sample_3,numRun=3, Pm_init=1, parameter='Pm1')

initial=['Pm0.8_init']

STsample3_Pm_init = ReturnStats(sample3_Pm_init, name='Pm1').append(table33.set_index([initial])).sort_values(by = 'mean')
print('\nStatistical comparison of results for different mutation propability(sample 3):\n', STsample3_Pm_init)

# Concatenating the result with GA initial parameter and Random Search
GAresult_sample3['parameter']= 'Pm0.8_init'
sample3_Pmall = pd.concat([RSresult_sample3, GAresult_sample3, sample3_Pm_init], axis = 0)

# Calling the Boxplot to compare
result_plt(sample3_Pmall, x='parameter', y='makespan', title='Mutation Probability Parameter effect on Sample 3')