# EXPERIMENTS WITH GREEDY ALGORITHMS

## PREPARING THE ENVIRONMENT

Importing the relevant libraries:

In [1]:
import re

import numpy as np
import pandas as pd

Declaring the auxiliary functions:

In [2]:
def extract_features(file):
    return [int(n)
            for n in re.findall(r'\d+(?=_)', file)]

Defining the constants:

In [3]:
COLS = [
    'instance',
    'obj_lnsa',
]

FEATURES = [
    'number_items' ,
    'items_sizes'  ,
    'graph_density',
]

NUMBER_ITEMS = {
    1 : 100,
    2 : 200,
}

SIZES = {
    1 :  1,
    2 : 20,
    3 : 50,
}

CONFLICTS = {
    0  :  0.0,
    1  :  0.1,
    2  :  0.2,
    3  :  0.3,
    4  :  0.4,
    5  :  0.5,
    6  :  0.6,
    7  :  0.7,
    8  :  0.8,
    9  :  0.9,
    10 : 0.95,
    11 : 0.99,
}

## PRE-PROCESSING

Loading the data:

In [4]:
data_lb = pd.read_csv('../out/lower_bounds.txt', delimiter=' ')

data_lb.head()

Unnamed: 0,instance,lower_bound
0,instances/train/Correia_Random_2_3_1_7.txt,15070
1,instances/train/Correia_Random_2_1_1_8.txt,10180
2,instances/train/Correia_Random_2_2_9_3.txt,12200
3,instances/train/Correia_Random_1_2_4_9.txt,6060
4,instances/train/Correia_Random_1_2_0_9.txt,6060


In [5]:
data_legado_train = pd.read_csv('../out/results_train_legado.txt', delimiter=' ', usecols=COLS)
data_legado_test  = pd.read_csv('../out/results_test_legado.txt' , delimiter=' ', usecols=COLS)

data_legado = pd.concat([data_legado_train,
                         data_legado_test], ignore_index=True)
data_legado.rename(columns={'obj_lnsa': 'HC'}, inplace=True)

data_legado.head()

Unnamed: 0,instance,HC
0,instances/train/Correia_Random_2_3_1_7.txt,15350
1,instances/train/Correia_Random_2_1_1_8.txt,10180
2,instances/train/Correia_Random_2_2_9_3.txt,12540
3,instances/train/Correia_Random_1_2_4_9.txt,6080
4,instances/train/Correia_Random_1_2_0_9.txt,6070


In [6]:
data_greedy1_train = pd.read_csv('../out/results_train_greedy1.txt', delimiter=' ', usecols=COLS)
data_greedy1_test  = pd.read_csv('../out/results_test_greedy1.txt' , delimiter=' ', usecols=COLS)

data_greedy1 = pd.concat([data_greedy1_train, data_greedy1_test], ignore_index=True)
data_greedy1.rename(columns={'obj_lnsa': 'HC1'},
                    inplace=True)

data_greedy1.head()

Unnamed: 0,instance,HC1
0,instances/train/Correia_Random_2_3_1_7.txt,15350
1,instances/train/Correia_Random_2_1_1_8.txt,10180
2,instances/train/Correia_Random_2_2_9_3.txt,12500
3,instances/train/Correia_Random_1_2_4_9.txt,6090
4,instances/train/Correia_Random_1_2_0_9.txt,6070


In [7]:
data_greedy2_train = pd.read_csv('../out/results_train_greedy2.txt', delimiter=' ', usecols=COLS)
data_greedy2_test  = pd.read_csv('../out/results_test_greedy2.txt' , delimiter=' ', usecols=COLS)

data_greedy2 = pd.concat([data_greedy2_train, data_greedy2_test], ignore_index=True)
data_greedy2.rename(columns={'obj_lnsa': 'HC2'},
                    inplace=True)
data_greedy2.head()

Unnamed: 0,instance,HC2
0,instances/train/Correia_Random_2_3_1_7.txt,15350
1,instances/train/Correia_Random_2_1_1_8.txt,10180
2,instances/train/Correia_Random_2_2_9_3.txt,12520
3,instances/train/Correia_Random_1_2_4_9.txt,6090
4,instances/train/Correia_Random_1_2_0_9.txt,6070


Preprocessing the data:

In [8]:
data = (
    data_lb
    .merge(data_legado , on='instance', how='inner')
    .merge(data_greedy1, on='instance', how='inner')
    .merge(data_greedy2, on='instance', how='inner')
)

data[FEATURES] = data['instance'].apply(lambda x: pd.Series(extract_features(x)))
data['number_items' ] = data['number_items' ].map(NUMBER_ITEMS)
data['items_sizes'  ] = data['items_sizes'  ].map(SIZES)
data['graph_density'] = data['graph_density'].map(CONFLICTS)

data.drop(columns=['instance'], inplace=True)

data.head()

Unnamed: 0,lower_bound,HC,HC1,HC2,number_items,items_sizes,graph_density
0,15070,15350,15350,15350,200,50,0.1
1,10180,10180,10180,10180,200,1,0.1
2,12200,12540,12500,12520,200,20,0.9
3,6060,6080,6090,6090,100,20,0.4
4,6060,6070,6070,6070,100,20,0.0


Obtaining gaps relative to the lower bound

$$
\text{Percentage Gap} = \frac{\text{[HC, HC1, HC2]} - \text{LB}}{\text{[HC, HC1, HC2]}} \times 100\%
$$

and obtaining the percentage of improvement with respect to the original greedy criterion

$$
\text{Percentage Gap} = \frac{\text{HC} - \text{[HC1, HC2]}}{\text{[HC1, HC2]}} \times 100\%
$$

In [9]:
data['gap-hc'       ] = ((data.HC  - data.lower_bound) / data.HC  * 100).round(2)
data['gap-hc1'      ] = ((data.HC1 - data.lower_bound) / data.HC1 * 100).round(2)
data['improving-hc1'] = ((data.HC  - data.HC1        ) / data.HC1 * 100).round(2)
data['gap-hc2'      ] = ((data.HC2 - data.lower_bound) / data.HC2 * 100).round(2)
data['improving-hc2'] = ((data.HC  - data.HC2        ) / data.HC2 * 100).round(2)

data.head()

Unnamed: 0,lower_bound,HC,HC1,HC2,number_items,items_sizes,graph_density,gap-hc,gap-hc1,improving-hc1,gap-hc2,improving-hc2
0,15070,15350,15350,15350,200,50,0.1,1.82,1.82,0.0,1.82,0.0
1,10180,10180,10180,10180,200,1,0.1,0.0,0.0,0.0,0.0,0.0
2,12200,12540,12500,12520,200,20,0.9,2.71,2.4,0.32,2.56,0.16
3,6060,6080,6090,6090,100,20,0.4,0.33,0.49,-0.16,0.49,-0.16
4,6060,6070,6070,6070,100,20,0.0,0.16,0.16,0.0,0.16,0.0


## EXPERIMENTS

Experiment on

- Number of items
- Items sizes
- Conflict graph density

In [10]:
experiments_cols = [
    'gap-hc'       ,
    'gap-hc1'      ,
    'improving-hc1',
    'gap-hc2'      ,
    'improving-hc2',
]


experiment_ni = (
    data
    .filter(items=['number_items'] + experiments_cols)
    .groupby('number_items')
    .mean()
    .reset_index()
    .round(2)
)

avg = experiment_ni[experiments_cols].mean().round(2)
avg['number_items'] = 'avg'

experiment_ni = pd.concat([experiment_ni,
                           avg.to_frame().T],
                          ignore_index=True)

experiment_ni.columns = pd.MultiIndex.from_tuples([
    ('', 'number_items'),
    ('', 'gap-hc'),
    ('HC_1', 'gap-hc1'),
    ('', 'improving-hc1'),
    ('HC_2', 'gap-hc2'),
    ('', 'improving-hc2'),
])

experiment_ni

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,HC_1,Unnamed: 4_level_0,HC_2,Unnamed: 6_level_0
Unnamed: 0_level_1,number_items,gap-hc,gap-hc1,improving-hc1,gap-hc2,improving-hc2
0,100,4.4,4.36,0.04,4.37,0.03
1,200,2.99,2.89,0.1,2.92,0.07
2,avg,3.7,3.62,0.07,3.64,0.05


In [11]:
experiment_is = (
    data
    .filter(items=['items_sizes'] + experiments_cols)
    .groupby('items_sizes')
    .mean()
    .reset_index()
    .round(2)
)

avg = experiment_is[experiments_cols].mean().round(2)
avg['items_sizes'] = 'avg'

experiment_is = pd.concat([experiment_is,
                           avg.to_frame().T],
                          ignore_index=True)

experiment_is.columns = pd.MultiIndex.from_tuples([
    ('', 'items_sizes'),
    ('', 'gap-hc'),
    ('HC_1', 'gap-hc1'),
    ('', 'improving-hc1'),
    ('HC_2', 'gap-hc2'),
    ('', 'improving-hc2'),
])

experiment_is

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,HC_1,Unnamed: 4_level_0,HC_2,Unnamed: 6_level_0
Unnamed: 0_level_1,items_sizes,gap-hc,gap-hc1,improving-hc1,gap-hc2,improving-hc2
0,1,4.41,4.28,0.14,4.31,0.1
1,20,3.2,3.12,0.08,3.14,0.06
2,50,3.47,3.48,-0.01,3.49,-0.02
3,avg,3.69,3.63,0.07,3.65,0.05


In [12]:
experiment_cg = (
    data
    .filter(items=['graph_density'] + experiments_cols)
    .groupby('graph_density')
    .mean()
    .reset_index()
    .round(2)
)

avg = experiment_cg[experiments_cols].mean().round(2)
avg['graph_density'] = 'avg'

experiment_cg = pd.concat([experiment_cg,
                           avg.to_frame().T],
                          ignore_index=True)

experiment_cg.columns = pd.MultiIndex.from_tuples([
    ('', 'graph_density'),
    ('', 'gap-hc'),
    ('HC_1', 'gap-hc1'),
    ('', 'improving-hc1'),
    ('HC_2', 'gap-hc2'),
    ('', 'improving-hc2'),
])

experiment_cg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,HC_1,Unnamed: 4_level_0,HC_2,Unnamed: 6_level_0
Unnamed: 0_level_1,graph_density,gap-hc,gap-hc1,improving-hc1,gap-hc2,improving-hc2
0,0.0,0.58,0.58,0.01,0.57,0.01
1,0.1,0.57,0.56,0.01,0.55,0.02
2,0.2,0.6,0.58,0.02,0.57,0.04
3,0.3,0.61,0.6,0.01,0.59,0.02
4,0.4,0.68,0.65,0.03,0.65,0.03
5,0.5,0.76,0.72,0.05,0.71,0.06
6,0.6,0.98,0.92,0.06,0.91,0.07
7,0.7,1.34,1.19,0.15,1.2,0.14
8,0.8,2.1,1.93,0.17,1.97,0.13
9,0.9,4.29,4.12,0.18,4.22,0.08


Getting latex from tables:

In [13]:
print(experiment_ni.to_latex(index=False))

\begin{tabular}{llllll}
\toprule
\multicolumn{2}{r}{} & HC_1 &  & HC_2 &  \\
number_items & gap-hc & gap-hc1 & improving-hc1 & gap-hc2 & improving-hc2 \\
\midrule
100 & 4.400000 & 4.360000 & 0.040000 & 4.370000 & 0.030000 \\
200 & 2.990000 & 2.890000 & 0.100000 & 2.920000 & 0.070000 \\
avg & 3.700000 & 3.620000 & 0.070000 & 3.640000 & 0.050000 \\
\bottomrule
\end{tabular}



In [14]:
print(experiment_is.to_latex(index=False))

\begin{tabular}{llllll}
\toprule
\multicolumn{2}{r}{} & HC_1 &  & HC_2 &  \\
items_sizes & gap-hc & gap-hc1 & improving-hc1 & gap-hc2 & improving-hc2 \\
\midrule
1 & 4.410000 & 4.280000 & 0.140000 & 4.310000 & 0.100000 \\
20 & 3.200000 & 3.120000 & 0.080000 & 3.140000 & 0.060000 \\
50 & 3.470000 & 3.480000 & -0.010000 & 3.490000 & -0.020000 \\
avg & 3.690000 & 3.630000 & 0.070000 & 3.650000 & 0.050000 \\
\bottomrule
\end{tabular}



In [15]:
print(experiment_cg.to_latex(index=False))

\begin{tabular}{llllll}
\toprule
\multicolumn{2}{r}{} & HC_1 &  & HC_2 &  \\
graph_density & gap-hc & gap-hc1 & improving-hc1 & gap-hc2 & improving-hc2 \\
\midrule
0.000000 & 0.580000 & 0.580000 & 0.010000 & 0.570000 & 0.010000 \\
0.100000 & 0.570000 & 0.560000 & 0.010000 & 0.550000 & 0.020000 \\
0.200000 & 0.600000 & 0.580000 & 0.020000 & 0.570000 & 0.040000 \\
0.300000 & 0.610000 & 0.600000 & 0.010000 & 0.590000 & 0.020000 \\
0.400000 & 0.680000 & 0.650000 & 0.030000 & 0.650000 & 0.030000 \\
0.500000 & 0.760000 & 0.720000 & 0.050000 & 0.710000 & 0.060000 \\
0.600000 & 0.980000 & 0.920000 & 0.060000 & 0.910000 & 0.070000 \\
0.700000 & 1.340000 & 1.190000 & 0.150000 & 1.200000 & 0.140000 \\
0.800000 & 2.100000 & 1.930000 & 0.170000 & 1.970000 & 0.130000 \\
0.900000 & 4.290000 & 4.120000 & 0.180000 & 4.220000 & 0.080000 \\
0.950000 & 8.150000 & 8.080000 & 0.070000 & 8.200000 & -0.050000 \\
0.990000 & 23.640000 & 23.570000 & 0.100000 & 23.610000 & 0.050000 \\
avg & 3.690000 & 3.620000 & 