In [1]:
# Data Processing
import pandas as pd
import numpy as np
from enum import Enum

In [1]:
data_names = ['asp-deep','asp-wide','fjssp','Dyuthi','2asp','mixed', 'dafjs', 'yfjs']
data_classes=list(range(len(data_names)))

class InstanceClass(Enum):
    """
    Enumeration that stores instance class: instances labeled by dataset (PROBLEM) or by best performing alg (ALGORITHM)
    """
    PROBLEM = 1
    ALGORTHM = 2

    
def read_info_graph(graph_type, root_path, instance_class=InstanceClass.PROBLEM):
    """
    :param: type: graph attributes type (statistics, operations, disjunctive, heterogeneous)
    :param root_path: directory for the representations
    :param label_type: problem or best_alg
    """
    if type  == "operations":
        graph_type = "operations"
    elif type == "operations-quantity":
        graph_type = "operations-quantity"
    elif type == "operations-shared-machines":
        graph_type = "operations-shared-machines"
    elif type == "operations-exec-time":
        graph_type = "operations-exec-time"
    elif type == "disjunctive":
        graph_type = "disjunctive"
    elif type == "heterogeneous":
        graph_type = "heterogeneous"
    elif type == "statistics":
        graph_type = "statistics"
         
    data_deep   = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_deep.csv')
    data_wide   = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_wide.csv')
    data_fjssp  = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_fjssp.csv')
    data_dyuthi = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_dyuthi.csv')
    data_2asp   = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_2asp.csv')
    data_mixed  = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_mixed.csv')
    data_dafjs  = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_dafjs.csv')
    data_yfjs   = pd.read_csv(f'{root_path}/{graph_type}/bom_info_graph_yfjs.csv')
    
   
    if instance_class == InstanceClass.PROBLEM:
        data_labels = np.array([])
        data_sets_len = [data_deep.shape[0],   data_wide.shape[0], data_fjssp.shape[0], 
                         data_dyuthi.shape[0], data_2asp.shape[0], data_mixed.shape[0],
                         data_dafjs.shape[0],  data_yfjs.shape[0]]
        for c in data_classes:
            for i in range(data_sets_len[c]):
                data_labels = np.append(data_labels, data_names[c])
        data_labels = pd.DataFrame(data_labels)[0]
    elif instance_class == InstanceClass.ALGORITM:
        path = "../datasets/results/ranks/class_alg/"
        class_deep   = pd.read_csv(f'{path}/class_deep.csv')
        class_wide   = pd.read_csv(f'{path}/class_wide.csv')
        class_fjssp  = pd.read_csv(f'{path}/class_fjssp.csv')
        class_dyuthi = pd.read_csv(f'{path}/class_dyuthi.csv')
        class_2asp   = pd.read_csv(f'{path}/class_2asp.csv')
        class_mixed  = pd.read_csv(f'{path}/class_mixed.csv')
        class_dafjs  = pd.read_csv(f'{path}/class_dafjs.csv')
        class_yfjs   = pd.read_csv(f'{path}/class_yfjs.csv')

        data_labels = pd.concat([class_deep, class_wide, class_fjssp, class_dyuthi, 
                      class_2asp, class_mixed, class_dafjs, class_yfjs], ignore_index=True)
        data_labels = data_labels['BestAlg']
    else:
        raise Exception('Unknow class type')
    
    data = pd.concat([data_deep, data_wide, data_fjssp, data_dyuthi, 
                      data_2asp, data_mixed, data_dafjs, data_yfjs], ignore_index=True)

    # print(data.columns[data.isnull().any()].tolist())
    # df1 = data[data.isna().any(axis=1)]
    print (data_labels)
    return data, data_labels

In [2]:
def add_info_from_statistics_to_graph(statistics, graph):  
     graph["st_depth-max"] = statistics["depth_max"]
     graph["st_depth-avg"] = statistics["depth_mean"]
     graph["st_depth-std"] = statistics["depth_stdev"]
     graph["st_depth-q25"] = statistics["depth_q25"]
     graph["st_depth-q75"] = statistics["depth_q75"]
     
     graph["st_assembly-nodes-per-manufacturing-nodes"] = statistics["assembly-nodes-per-manufacturing-nodes"]
     return graph

In [5]:
def  read_output(prefix='rank', sufix='_1min_16alg', 
                 dir_path = '../datasets/results/ranks/run_1min'):
    """
    """
    labels  = ["deep", "wide", "fjssp", "dyuthi", "2asp", "mixed", "dafjs", "yfjs"]
   
    data_deep   = pd.read_csv(f'{dir_path}/{prefix}_deep{sufix}.csv')
    data_wide   = pd.read_csv(f'{dir_path}/{prefix}_wide{sufix}.csv')
    data_fjssp  = pd.read_csv(f'{dir_path}/{prefix}_fjssp{sufix}.csv')
    data_dyuthi = pd.read_csv(f'{dir_path}/{prefix}_dyuthi{sufix}.csv')
    data_2asp   = pd.read_csv(f'{dir_path}/{prefix}_2asp{sufix}.csv')
    data_mixed  = pd.read_csv(f'{dir_path}/{prefix}_mixed{sufix}.csv')
    data_dafjs  = pd.read_csv(f'{dir_path}/{prefix}_dafjs{sufix}.csv')
    data_yfjs   = pd.read_csv(f'{dir_path}/{prefix}_yfjs{sufix}.csv')

    # print('r-data_deep', data_deep.shape)
    # print('r-data_wide', data_wide.shape)
    # print('r-data_fjssp', data_fjssp.shape)
    # print('r-data_dyuthi', data_dyuthi.shape)
    # print('r-data_mixed', data_mixed.shape)
    
    data = pd.concat([data_deep, data_wide, data_fjssp, data_dyuthi, data_2asp, 
                      data_mixed, data_dafjs, data_yfjs], ignore_index=True)

    return data

In [None]:
def find_column_name(row, value, first):
    """
    param row: dataframe row
    param value: filter value for cell
    param first: stop after first match
    """
    name = []
    for col in row.index:
        if row[col] == value:
            name.append(col)
            if first: break
    str = name[0]
    
    for i in range(1, len(name)):
        str += " & " + name[i]
    return str
    
def build_algorithm_class_label(data):
    """
    data: the results dataframe
    """
    data['Label'] = data.apply(find_column_name, axis=1, value=1, first=False)
    return data['Label']

In [6]:
def validate_instances_synchronization(type):
    """
    Validate order of the test instances is the same in graph reeprresentation files and ranking file
    """
    graphs, labels = read_info_graph(type)
    ranks = read_output()
    graphs_names = graphs[graphs.columns[0]]
    ranks_names = ranks[ranks.columns[0]]
    # graphs_names.rename("my_name")  
    # ranks_names.rename("my_name")  
    # print(graphs_names, ranks_names)
    print('graphs_names', graphs_names.shape)
    print('ranks_names', ranks_names.shape)
    c=0
    for i in range(len(ranks[ranks.columns[0]])):
        print(c, graphs_names.iloc[i])
        c +=1
        if  graphs_names.iloc[i] != ranks_names.iloc[i]:
            print( f'-{graphs_names.iloc[i]}-', f'-{ranks_names.iloc[i]}-')
    if graphs_names.equals(ranks_names):
        print(f'Graph representation {type} matches with ranks')
    else:
        print(f'Graph representation {type} DOES NOT match with ranks', graphs_names.equals(ranks_names))
        print(graphs_names.compare(ranks_names))

    

In [7]:
#validate_instances_synchronization("statistics")

In [8]:
def get_table_with_selected_atribute(graph_rep, scale, 
                                     feature_path = '../datasets/results/features_files/ff_septembrie/'):
    data_all = pd.read_csv(f'{feature_path}/{graph_rep}/features_all_{scale}.csv')
    columns_all = data_all.columns
    selection_type = ['FS_K', 'FS_P', 'FS_S', 'LR', 'LR10', 'LR20', 'LR30', 'LR40','RF']
    selected_features_map = {}
    for st in selection_type:
        presence = [0]*len(columns_all)
        data = pd.read_csv(f'{feature_path}/{graph_rep}/features_{st}_{scale}.csv')
        columns = data.columns
        print(st, len(columns))
        for c in columns:
            for index, c_all in enumerate(columns_all):
                if c == c_all:
                    presence[index] = 1
                    break
        selected_features_map[st] = presence.copy()

    
    print("; ", end="; ")
    for st in selection_type:
        print(st, end="; ")
    print()
    for index, c in enumerate(columns_all):
        print(c, end='; ')
        for st in selection_type:
            print(selected_features_map[st][index], end='; ')
        print()
    

In [9]:
#get_table_with_selected_atribute("HG", "minMaxIn01")

In [1]:
idx_X_train= [184, 171, 142,  12, 117, 217,   3, 139, 229,  31,  66, 248, 254, 228, 187, 151,  49,  14,
                  47,  95, 244,   8,  78, 247,  42,  57, 246, 120,  39,  68, 167,  46, 190, 148, 216,  44,
                 158, 192,  76, 153,  15, 157, 241, 196,  82, 127, 122,   9, 181,  40,  69, 258,  38, 249,
                  71, 237, 163, 168, 116, 175,  36,  33, 129, 234,  97, 179,   4, 222, 104,  87, 195,  63,
                   7, 256, 178, 215,  45, 199, 159,  19, 188, 137,  85, 100, 259, 134, 102, 252,  29,  13,
                  52,   1, 180, 155, 113, 267, 223,  58, 160, 218, 150,  53, 119,  86,  26, 118, 110, 238,
                 269, 200, 250, 169, 277, 174, 162, 227, 275, 260, 182,  84,  72, 232, 135, 211, 166, 197,
                 138, 109, 202, 204,  65, 272,  62,  93,  70, 124, 145,  21, 276,  77, 101, 152, 154, 133,
                 136, 209, 183, 266,  20, 268,  24, 203, 265, 251,  94, 236,  89, 233,  96, 132, 239,  54,
                  90, 206, 235, 224,  67, 219, 107, 173, 103,   2,  51, 255,  30, 221,  74, 231, 126,  28,
                 144,   5, 214, 115, 253, 176,  43, 170,  17,  60,  59,  18, 261, 146,  83,  23,  75, 114,
                 274, 164,  81, 185,  48,  88,  79, 193,  25, 147,  56, 172,  37, 198,  35, 212,  10, 230,
                 186,  41, 123,  50, 106, 271]
idx_X_test= [ 22,  32, 143, 105, 270,  99, 207,  16, 201, 156, 125, 264, 263,   6, 213, 111, 257,  73,
             141, 121, 177, 225, 226, 243, 210,  55,  98, 112, 131,  91,   0, 161, 205,  34, 189,  11,
             140, 242, 245,  61, 240, 273,  80, 165,  92, 149, 194, 128, 220, 208, 191,  64, 108, 130,
             262,  27]

def build_labels_train_test(labels, index_test_instances):
    for index in index_test_instances:
        labels[index] = 6
    return labels
    