In [7]:
import glob
from pathlib import Path
import pandas as pd
import numpy as np
import csv

# Algorithm ranking

Build ranking files:
1. for each problem in the data_set finds the rank based on makespan average value for the repeated algorithms runs (alg_ranks_per_problem_mean)
2. for each problem in the data_set finds the rank based on rank averange value for the repeted algorithms runs (alg_ranks_per_problem)


In [56]:
def _build_instances_name(input_path, filter):
    """
    Constructs a list with the files name from the input path that matches the filter
    :param: input_path - the path to the dataset files
    :param: filter - file filter
    """
    test_instances = sorted(glob.glob(f'{input_path}/{filter}'))
    result = []
    for instance in test_instances:
        result.append(Path(instance).stem)
    return result

def get_instances_name(root_dir, dataset_name):
    """
    Constructs a list with dataset instances file names
    :param root_dir - the root directory to the data  sets
    :param  dataset_name - one of the datasets (DEEP, WIDE, MIXED, 2ASP, DYUTHI, FJSSP)
    """
    input_path = None
    instances_name = []
    if dataset_name == 'deep':
        input_path = f'{root_dir}/ASP-DEEP/'
        filter="bom_deep*.json"
        instances_name = _build_instances_name(input_path, filter)
        
    elif dataset_name == 'wide':
        input_path = f'{root_dir}/ASP-WIDE/'
        filter="bom_wide*.json"
        instances_name = _build_instances_name(input_path, filter)
        
    elif dataset_name == 'dyuthi':
        input_path = f'{root_dir}/dyuthi/'
        filter="P*.json"
        instances_name = _build_instances_name(input_path, filter)
        
    elif dataset_name == '2asp':
        input_path = f'{root_dir}/2ASP/'
        filter="*.json"
        instances_name = _build_instances_name(input_path, filter)
        
    elif dataset_name == 'fjssp':
        input_path = f'{root_dir}/FJSSP-Hurink-vdata/'
        filter="*.fjs.json"
        instances_name = _build_instances_name(input_path, filter)
        
        input_path = f'{root_dir}/FJSSP/set1'
        filter="bom_fjssp_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/FJSSP/set2'
        filter="bom_fjssp_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        
    elif dataset_name == 'mixed':
        input_path = f'{root_dir}/mixed_boms/set1/'
        filter="bom_mix_*.json"
        instances_name = _build_instances_name(input_path, filter)
        
        input_path = f'{root_dir}/mixed_boms/set2/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/mixed_boms/set3/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/mixed_boms/set4/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/mixed_boms/set5/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/mixed_boms/set6/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))

        input_path = f'{root_dir}/mixed_boms/set7/'
        filter="bom_mix_*.json"
        instances_name.extend(_build_instances_name(input_path, filter))
        
    elif dataset_name == 'dafjs':
        input_path = f'{root_dir}/dafjs/'
        filter="DAFJS*.json"
        instances_name = _build_instances_name(input_path, filter)

    elif dataset_name == 'yfjs':
        input_path = f'{root_dir}/yfjs/'
        filter="YFJS*.json"
        instances_name = _build_instances_name(input_path, filter)

    else:
        printf('Wrong dataset name')
    return instances_name

In [57]:
root_dir = "../datasets/"
print(get_instances_name(root_dir, "yfjs"))

['YFJS01', 'YFJS02', 'YFJS03', 'YFJS04', 'YFJS05', 'YFJS06', 'YFJS07', 'YFJS08', 'YFJS09', 'YFJS10', 'YFJS11', 'YFJS12', 'YFJS13', 'YFJS14', 'YFJS15', 'YFJS16', 'YFJS17', 'YFJS18', 'YFJS19', 'YFJS20']


In [15]:
def alg_ranks_per_problem_mean(data_set_name, data_set, names_alg, output_path, input_file_name=None,input_files_path=None):
    """
    for each problem in the data_set finds the rank based on makespan average value for the repeated
    algorithms runs
    :param input_file_name: the file name with the raw data
    :param data_set: the test instances names
    :param names_alg: algorithms name
    :param output_path: output file to save the  results
    :return:
    """
    if input_files_path:
        input_files_name=[
            f'{input_files_path}/makespan-1minTime/ei-lm/results_bom_{data_set_name}_1min.csv',
            f'{input_files_path}/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-10el-{data_set_name}.csv',
            f'{input_files_path}/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-50el-{data_set_name}.csv',
            f'{input_files_path}/makespan-1minTime/si-lm/results_bom_si_lm_{data_set_name}_1min.csv',
        ]

    print(input_files_name)
    if input_file_name:
        input_data = pd.read_csv(input_file_name)
    if input_files_name:
        list = []
        for file in input_files_name:
            list.append(pd.read_csv(file))
        input_data = pd.concat(list)
    df = input_data[input_data["Bom"].isin(data_set)]

    header_line = ['Problem']
    header_line.extend(names_alg)
    with open(output_path, 'w', newline='') as csvfile:
        out_file = csv.writer(csvfile, delimiter=',')
        out_file.writerow(header_line)
        for test_instance in data_set:
            df1 = df[df["Bom"].isin([test_instance])]
            group = []
            for alg in names_alg:
                group.append(df1[df1["Algorithm"] == alg]["Makespan"].mean())

            s = pd.Series(group)
            r = s.rank()
            ranks = [r[alg] for alg in range(len(names_alg))]
            line = [test_instance]
            line.extend(ranks)
            out_file.writerow(line)

In [46]:
def alg_ranks_per_problem( data_set_name, data_set, names_alg, output_path, input_file_name=None,input_files_path=None, alg_repetions_no=10):
    """
        for each problem in the data_set finds the rank based on rank averange value for the repeted
        algorithms runs
        :param input_file_name: the file name with the raw data
        :param data_set: the test instances names
        :param names_alg: algorithms name
        :param output_path: output file to save the  results
        :pparam alg_repetions_no: number of repetions of an algorithm for an instance
        :return:
        """
    if input_files_path:
        input_files_name=[
            f'{input_files_path}/makespan-1minTime/ei-lm/results_bom_{data_set_name}_1min.csv',
            f'{input_files_path}/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-10el-{data_set_name}.csv',
            f'{input_files_path}/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-50el-{data_set_name}.csv',
            f'{input_files_path}/makespan-1minTime/si-lm/results_bom_si_lm_{data_set_name}_1min.csv',
            ]

    print(input_files_name)
    if input_file_name:
        input_data = pd.read_csv(input_file_name)
    if input_files_name:
        list =[]
        for file in input_files_name:
            list.append(pd.read_csv(file))
        input_data = pd.concat(list)
    df = input_data[input_data["Bom"].isin(data_set)]

    header_line = ['Problem']
    header_line.extend(names_alg)
    with open(output_path, 'w', newline='') as csvfile:
        out_file = csv.writer(csvfile, delimiter=',')
        out_file.writerow(header_line)

        for test_instance in data_set:
            df1 = df[df["Bom"].isin([test_instance])]
            group = []
            for alg in names_alg:
                group.append(df1[df1["Algorithm"] == alg]["Makespan"].values)
                #print('-',test_instance, alg,len(group))
            alg_ranking = [[] for alg in range(len(names_alg))]
            for index in range(alg_repetions_no):
                l = []
                #print(index, group[0])
                for index_alg in range(len(names_alg)):
                    # print(test_instance, index_alg, names_alg[index_alg], group[index_alg], end=', ')
                    # print(group[index_alg][index])
                    l.append(group[index_alg][index])
                s = pd.Series(l)
                r = s.rank()
                for alg in range(len(names_alg)):
                    alg_ranking[alg].append(r[alg])
            line = [test_instance]
            for alg in range(len(names_alg)):
                aux = np.array(alg_ranking[alg])
                #print(aux)
                line.append(aux.mean())
            out_file.writerow(line)

# Construct ranking files for each dataset

In [47]:
algorithms = ["EA-Ei-LM", "TS-Ei-LM", "SA-Ei-LM", "EA-Ei-LM-10", "EA-Ei-LM-50",
          "EAL-Ei-LM", "TSL-Ei-LM", "SAL-Ei-LM", "EAL-Ei-LM-10", "EAL-Ei-LM-50",
          "EA-Si-LM", "TS-Si-LM", "SA-Si-LM",
          "EAL-Si-LM", "TSL-Si-LM", "SAL-Si-LM",
          ]
algorithms = ["TS-Ei-LM", "SA-Ei-LM", 
           "TSL-Ei-LM", "SAL-Ei-LM", 
           "TS-Si-LM", "SA-Si-LM",
           "TSL-Si-LM", "SAL-Si-LM",
          ]

In [4]:
RESULTS_PATH='../datasets/results/ranks/runTime_1min_sa_ts'

In [49]:
datasets  = ['2asp', 
             'deep', 
             'wide', 
             'mixed', 
             'fjssp', 
             'dyuthi', 'dafjs','yfjs'
            ]
#datasets  = ['yfjs']


In [None]:
for dataset in datasets:
    alg_ranks_per_problem(dataset, input_files_path="../datasets/results",
                          data_set=get_instances_name(root_dir, dataset), 
                          names_alg=algorithms,
                          output_path=f'{RESULTS_PATH}/rank_mean_{dataset}_1min_8alg.csv')

In [50]:
for dataset in datasets:
    alg_ranks_per_problem(dataset, input_files_path="../datasets/results",
                          data_set=get_instances_name(root_dir, dataset), 
                          names_alg=algorithms,
                          output_path=f'{RESULTS_PATH}/rank_{dataset}_1min_8alg.csv')

['../datasets/results/makespan-1minTime/ei-lm/results_bom_deep_1min.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-10el-deep.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-50el-deep.csv', '../datasets/results/makespan-1minTime/si-lm/results_bom_si_lm_deep_1min.csv']
['../datasets/results/makespan-1minTime/ei-lm/results_bom_wide_1min.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-10el-wide.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-50el-wide.csv', '../datasets/results/makespan-1minTime/si-lm/results_bom_si_lm_wide_1min.csv']
['../datasets/results/makespan-1minTime/ei-lm/results_bom_fjssp_1min.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-10el-fjssp.csv', '../datasets/results/makespan-1minTime/ei-lm-ea-popSize/ea-time-1min-50el-fjssp.csv', '../datasets/results/makespan-1minTime/si-lm/results_bom_si_lm_fjssp_1min.csv']


 # Information regarding ranking files

In [111]:
from matplotlib import pyplot as plt
def all_algs_equivalent(input_directory, filter="*"):
    """
    identifies problems where all allgorithms are equivalent
    """
    ranks_files = sorted(glob.glob(f'{input_directory}/{filter}'))

    for rank_file in ranks_files:
        print(rank_file)
        data = pd.read_csv(rank_file)
        data['stdev'] = data.std(axis=1, numeric_only=True)
        #data['min'] = data.min(axis=1, numeric_only=True)

        print("All algorithms have the same performance\n",data.query('stdev==0')['Problem'])

        data = data.assign(Min_val = data[algorithms].min(axis=1), Min_col=data[algorithms].idxmin(axis=1))

        df1 = data['Min_col'].value_counts().reset_index(name='Frequency')

        print(df1)
        #print(data)
    pass

In [112]:
all_algs_equivalent(RESULTS_PATH)

../datasets/results/ranks/runTime_1min/rank_2asp_1min_16alg.csv
All algorithms have the same performance
 Series([], Name: Problem, dtype: object)
     Min_col  Frequency
0  SAL-Ei-LM         13
1   TS-Si-LM          9
2  TSL-Si-LM          8
3   SA-Si-LM          7
4   SA-Ei-LM          7
5  SAL-Si-LM          5
6   TS-Ei-LM          1
../datasets/results/ranks/runTime_1min/rank_dafjs_1min_16alg.csv
All algorithms have the same performance
 2    DAFJS03
Name: Problem, dtype: object
     Min_col  Frequency
0   SA-Ei-LM         11
1  SAL-Ei-LM          9
2   EA-Ei-LM          3
3  EAL-Si-LM          3
4  TSL-Si-LM          2
5   TS-Si-LM          1
6   TS-Ei-LM          1
../datasets/results/ranks/runTime_1min/rank_deep_1min_16alg.csv
All algorithms have the same performance
 Series([], Name: Problem, dtype: object)
        Min_col  Frequency
0     SAL-Ei-LM         15
1      SA-Ei-LM          8
2     TSL-Ei-LM          8
3      SA-Si-LM          4
4  EAL-Ei-LM-10          3
5     TSL-S

In [33]:
def encode_lehmer(sigma):
    n = len(sigma)
    c = []
    c.append(0)
    for x in range(1, n):
        sigma_x = sigma[x]
        c_x = 0
        for y in range(0, x):
            sigma_y = sigma[y]
            if sigma_y >= sigma_x:
                c_x += 1
        c.append(c_x)
    return c


In [54]:
from matplotlib import pyplot as plt
from scipy import stats
def replace(row):
    # for i, item in enumerate(row):
 
    #     # updating the value of the row
    #     row[i] = generate_range(item)
    #print("input", row)
    ranks= stats.rankdata(row[1:])
    #print("ranks", ranks)
    encode = encode_lehmer(ranks)
    #print("encode", encode)
    for i, item in enumerate(row):
        if i ==0: continue
        # updating the value of the row
        row[i] = encode[i-1]+1
    return  row
    
def transform_to_lehmer_codes(input_directory, out_directory, filter="*"):
    """
    identifies problems where all allgorithms are equivalent
    """
    ranks_files = sorted(glob.glob(f'{input_directory}/{filter}'))

    for rank_file in ranks_files:
        print(rank_file)
        data = pd.read_csv(rank_file)
        data = data.apply(lambda row: replace(row), axis=1)
        name = Path(rank_file).stem
        data.to_csv(out_directory+"/"+name+".csv", index=False)

        #print(data)
        #print(data)
    pass

In [55]:
IN_PATH='../datasets/results/ranks/runTime_1min_sa_ts'
OUT_PATH='../datasets/results/ranks/runTime_1min_sa_ts_lehmer'
transform_to_lehmer_codes(IN_PATH, OUT_PATH)

../datasets/results/ranks/runTime_1min_sa_ts\rank_2asp_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_dafjs_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_deep_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_dyuthi_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_fjssp_1min_8alg.csv


  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1


../datasets/results/ranks/runTime_1min_sa_ts\rank_mixed_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_wide_1min_8alg.csv
../datasets/results/ranks/runTime_1min_sa_ts\rank_yfjs_1min_8alg.csv


  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1
  row[i] = encode[i-1]+1
