# Ejecución de los algoritmos y obtención de tablas

## 1.- Ejecución de los algoritmos

Importamos las funciones y algoritmos implementados.

In [1]:
from leer_datos import *
from funciones_auxiliares_y_estadisticos import *
from algoritmos import *
import time
import os
import pandas as pd

Preparamos listas con los nombres de los archivos que contienen los conjuntos de datos que vamos a utilizar, el número de clusters a utilizar y el nombre de los algoritmos (que ya han sido importados desde `algoritmos.py`).

In [2]:
data_folder = "./data"
dataset_names = ["iris_set", "ecoli_set", "rand_set"]
dat_files = [data_folder + "/" + dataset + ".dat" for dataset in dataset_names]
n_clusters = {dat_files[0]:3, dat_files[1]:8, dat_files[2]:3} #Los parámetros k
const_files = [[data_folder + "/" + dataset + "_const_" + percentage + ".const" for dataset in dataset_names] \
               for percentage in ["10","20"]]
algorithms = [copkm_algorithm_with_ini, local_search]
algo_names = ["COPKM", "BL"]
results_folder = os.pardir + "/Results/"

In [3]:
print(dat_files)
print(n_clusters)
print(const_files)

['./data/iris_set.dat', './data/ecoli_set.dat', './data/rand_set.dat']
{'./data/iris_set.dat': 3, './data/ecoli_set.dat': 8, './data/rand_set.dat': 3}
[['./data/iris_set_const_10.const', './data/ecoli_set_const_10.const', './data/rand_set_const_10.const'], ['./data/iris_set_const_20.const', './data/ecoli_set_const_20.const', './data/rand_set_const_20.const']]


In [4]:
def execute_algorithm(algo, dat_file, const_file, k, seed):
    np.random.seed(seed)
    #Leemos los datos
    X = read_dat(dat_file)
    const_matrix = read_constraints_matrix(const_file)
    const_list = constraints_matrix_to_list(const_matrix)
    #Ejecutamos el algoritmo
    t0 = time.perf_counter()
    partition_sol = algo(X, const_matrix, const_list, k)
    t1 = time.perf_counter()
    tiempo = t1 - t0
    tasa_C = general_deviation(X, partition_sol)
    tasa_inf = infeasibility(partition_sol, const_list)
    agr =  objective_func(X, partition_sol, const_list, lambda_ = None) #lambda_ = max_dist(X) / len(const_list)
    return tasa_C, tasa_inf, agr, tiempo

In [5]:
def execute_algorithms(algorithms, dat_files, const_files, seeds):
    results_data = {}
    for seed in seeds:
        print("Seed: ", seed)
        for i,algo in enumerate(algorithms):
            print("Algorithm: ", str(algo))
            for j,dat in enumerate(dat_files):
                print("dat file: ", dat)
                for k,const in enumerate(const_files):
                    print("const file: ", const[j])
                    if (i,j,k) not in results_data:
                        results_data[(i,j,k)] = [] 
                    results_data[(i,j,k)].append(list(execute_algorithm(algo, dat, const[j], n_clusters[dat], seed)))
    return results_data

In [6]:
results_data = execute_algorithms(algorithms, dat_files, const_files, seeds = [0, 14, 17, 25, 31])

Seed:  0
Algorithm:  <function copkm_algorithm_with_ini at 0x0000026EC6F68B70>
dat file:  ./data/iris_set.dat
const file:  ./data/iris_set_const_10.const
const file:  ./data/iris_set_const_20.const
dat file:  ./data/ecoli_set.dat
const file:  ./data/ecoli_set_const_10.const
const file:  ./data/ecoli_set_const_20.const
dat file:  ./data/rand_set.dat
const file:  ./data/rand_set_const_10.const
const file:  ./data/rand_set_const_20.const
Algorithm:  <function local_search at 0x0000026EC6F68E18>
dat file:  ./data/iris_set.dat
const file:  ./data/iris_set_const_10.const
const file:  ./data/iris_set_const_20.const
dat file:  ./data/ecoli_set.dat
const file:  ./data/ecoli_set_const_10.const
const file:  ./data/ecoli_set_const_20.const
dat file:  ./data/rand_set.dat
const file:  ./data/rand_set_const_10.const
const file:  ./data/rand_set_const_20.const
Seed:  14
Algorithm:  <function copkm_algorithm_with_ini at 0x0000026EC6F68B70>
dat file:  ./data/iris_set.dat
const file:  ./data/iris_set_con

In [7]:
results_data

{(0, 0, 0): [[0.1734559124719209, 35, 0.3954629439623927, 0.18069370000000617],
  [0.14882900000000002, 0, 0.14882900000000002, 0.23141910000003918],
  [0.24339159747423122, 83, 0.7698654150087786, 0.16785010000012335],
  [0.14882900000000004, 0, 0.14882900000000004, 0.16758660000004966],
  [0.14882900000000004, 0, 0.14882900000000004, 0.1635945999996693]],
 (0,
  0,
  1): [[0.14402196043176507,
   21,
   0.21059427027736427,
   0.17487090000000194], [0.14882900000000002, 0, 0.14882900000000002, 0.18995669999992515], [0.18649892969511542,
   60,
   0.3767055292539702,
   0.2974619999999959], [0.14882900000000002,
   0,
   0.14882900000000002,
   0.17552419999992708], [0.14882900000000004,
   0,
   0.14882900000000004,
   0.1713472000001275]],
 (0, 1, 0): [[201.29698114689023, 124, 204.62384493499817, 3.5819474000000042],
  [222.75235783916352, 126, 226.13288072062804, 10.429566300000033],
  [232.05898152627566, 115, 235.14437939427899, 14.684767400000055],
  [240.09156079327093, 301, 2

In [8]:
# Save the results
#np.save(results_folder + 'results_P1.npy', results_data) 

## 2.- Obtención de tablas (resultados)

In [3]:
# Load the results
results_data = np.load(results_folder + 'results_P1.npy',allow_pickle='TRUE').item()

In [4]:
def results_data_to_dataframes(results_data):
    dataframes = {}
    index = ["Ejecución " + str(i+1) for i in range(len(results_data[(0,0,0)]))]
    for i,algo in enumerate(algo_names):
        for k,const in enumerate(const_files):
            dfs_to_concat = []
            for j,dat in enumerate(dat_files):
                dfs_to_concat.append(pd.DataFrame(results_data[(i,j,k)], columns=['Tasa_C', 'Tasa_inf', "Agr.", "T"],
                                     index = index))
            concatenated = pd.concat(dfs_to_concat, axis=1)
            micolumns = pd.MultiIndex.from_tuples([(dataset_name, column) for dataset_name in ["Iris", "Ecoli", "Rand"] \
                                                  for column in ['Tasa_C', 'Tasa_inf', "Agr.", "T"]])
            concatenated.columns = micolumns
            concatenated.loc['Media'] = concatenated.mean()
            concatenated.loc['Desviación típica'] = concatenated.std()
            dataframes[(str(algo), (k+1)*10)] = concatenated
    return dataframes

In [5]:
dataframes = results_data_to_dataframes(results_data)
np.save(results_folder + 'dataframes_algorithms.npy', dataframes)

Guardamos los distintos dataframes como archivos excel.

In [6]:
#10% de restricciones - Greedy
with pd.ExcelWriter(results_folder + 'results.xlsx', engine="openpyxl") as writer:  
    dataframes[("COPKM", 10)].to_excel(writer, sheet_name='Greedy10') 
dataframes[("COPKM", 10)]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
Ejecución 1,0.173456,35.0,0.395463,0.180694,201.296981,124.0,204.623845,3.581947,0.426085,0.0,0.426085,0.167952
Ejecución 2,0.148829,0.0,0.148829,0.231419,222.752358,126.0,226.132881,10.429566,0.426085,0.0,0.426085,0.1787
Ejecución 3,0.243392,83.0,0.769865,0.16785,232.058982,115.0,235.144379,14.684767,0.593006,31.0,0.816521,0.167218
Ejecución 4,0.148829,0.0,0.148829,0.167587,240.091561,301.0,248.167254,8.954754,0.426085,0.0,0.426085,0.167369
Ejecución 5,0.148829,0.0,0.148829,0.163595,234.233805,365.0,244.02659,9.654126,0.426085,0.0,0.426085,0.16617
Media,0.172667,23.6,0.322363,0.182229,226.086737,206.2,231.61899,9.461032,0.459469,6.2,0.504172,0.169482
Desviación típica,0.036626,32.647205,0.243288,0.02526,13.592848,105.55643,15.485846,3.551977,0.066768,12.4,0.156175,0.004645


In [7]:
#20% de restricciones - Greedy
with pd.ExcelWriter(results_folder + 'results.xlsx', engine="openpyxl", mode='a') as writer:  
    dataframes[("COPKM", 20)].to_excel(writer, sheet_name='Greedy20') 
dataframes[("COPKM", 20)]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
Ejecución 1,0.144022,21.0,0.210594,0.174871,240.088037,90.0,241.295366,3.699347,0.426085,0.0,0.426085,0.172562
Ejecución 2,0.148829,0.0,0.148829,0.189957,235.489955,250.0,238.843648,9.898984,0.426085,0.0,0.426085,0.123466
Ejecución 3,0.186499,60.0,0.376706,0.297462,177.463791,59.0,178.255263,2.910767,0.420902,18.0,0.485765,0.116006
Ejecución 4,0.148829,0.0,0.148829,0.175524,249.287102,422.0,254.948136,15.851435,0.426085,0.0,0.426085,0.172
Ejecución 5,0.148829,0.0,0.148829,0.171347,197.459443,78.0,198.505795,3.559698,0.426085,0.0,0.426085,0.172146
Media,0.155402,16.2,0.206757,0.201832,219.957665,179.8,222.369642,7.184046,0.425048,3.6,0.438021,0.151236
Desviación típica,0.01566,23.361507,0.088277,0.048238,27.635481,139.002734,29.005158,5.020647,0.002073,7.2,0.023872,0.025828


In [8]:
#10% de restricciones - Búsqueda local
with pd.ExcelWriter(results_folder + 'results.xlsx', mode='a', engine="openpyxl") as writer:  
    dataframes[("BL", 10)].to_excel(writer, sheet_name='BusquedaLocal10') 
dataframes[("BL", 10)]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
Ejecución 1,0.173064,11.0,0.242838,3.549502,90.488749,1076.0,119.357342,164.824874,0.567493,17.0,0.690066,5.421015
Ejecución 2,0.147961,10.0,0.211392,5.472892,88.731481,870.0,112.073186,246.646611,0.471133,9.0,0.536025,4.206658
Ejecución 3,0.164955,14.0,0.253758,4.246338,82.376373,882.0,106.040033,185.752572,0.488334,5.0,0.524385,4.489495
Ejecución 4,0.148829,0.0,0.148829,4.815099,88.213647,653.0,105.733341,194.677624,0.426085,0.0,0.426085,4.098372
Ejecución 5,0.148829,0.0,0.148829,6.975041,73.927553,1154.0,104.88885,231.527741,0.481247,7.0,0.531718,4.364523
Media,0.156728,7.0,0.201129,5.011774,84.747561,927.0,109.61855,204.685884,0.486858,7.6,0.541656,4.516013
Desviación típica,0.010356,5.865151,0.044912,1.168748,6.058741,175.544866,5.498,30.095593,0.045774,5.571355,0.08463,0.471764


In [9]:
#20% de restricciones - Búsqueda local
with pd.ExcelWriter(results_folder +'results.xlsx', mode='a', engine="openpyxl") as writer:  
    dataframes[("BL", 20)].to_excel(writer, sheet_name='BusquedaLocal20') 
dataframes[("BL", 20)]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
Ejecución 1,0.164327,13.0,0.205538,7.089431,85.653598,1799.0,109.786775,493.439849,0.476127,44.0,0.63468,10.438419
Ejecución 2,0.164935,19.0,0.225167,7.804216,93.670943,1459.0,113.243097,344.433477,0.546218,34.0,0.668736,7.594651
Ejecución 3,0.172664,58.0,0.356531,6.142935,87.998345,1733.0,111.246147,328.455965,0.426085,0.0,0.426085,8.187196
Ejecución 4,0.148829,0.0,0.148829,7.71222,81.396033,2436.0,114.07442,352.515751,0.536014,45.0,0.698171,6.599174
Ejecución 5,0.181723,75.0,0.419481,7.238607,93.999634,1456.0,113.531544,467.763391,0.61971,54.0,0.814298,7.056082
Media,0.166496,33.0,0.271109,7.197482,88.54371,1776.6,112.376397,397.321687,0.520831,35.4,0.648394,7.975105
Desviación típica,0.010855,28.544702,0.100672,0.593046,4.812382,358.012625,1.609498,68.917279,0.065768,18.8,0.126511,1.3413


In [10]:
#Función para construir la tabla que nos permite comparar los algoritmos:
def global_results(results_dataframes, algo_names, percentages = [10,20]):
    global_results_dfs = {}
    for perc in percentages:
        df = results_dataframes[(str(algo_names[0]), perc)].loc[["Media"]]
        for algo in algo_names[1:]:
            df = df.append(results_dataframes[(str(algo), perc)].loc[["Media"]])
        df.index = algo_names
        global_results_dfs[perc] = df
    return global_results_dfs        

In [11]:
global_results_dfs = global_results(dataframes, algo_names)
np.save(results_folder + 'dataframes_global_comparison.npy', global_results_dfs)

In [12]:
with pd.ExcelWriter(results_folder + 'results.xlsx', mode='a', engine="openpyxl") as writer:  
    global_results_dfs[10].to_excel(writer, sheet_name='ComparacionAlgoritmos10') 
global_results_dfs[10]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
COPKM,0.172667,23.6,0.322363,0.182229,226.086737,206.2,231.61899,9.461032,0.459469,6.2,0.504172,0.169482
BL,0.156728,7.0,0.201129,5.011774,84.747561,927.0,109.61855,204.685884,0.486858,7.6,0.541656,4.516013


In [13]:
with pd.ExcelWriter(results_folder + 'results.xlsx', mode='a', engine="openpyxl") as writer:  
    global_results_dfs[20].to_excel(writer, sheet_name='ComparacionAlgoritmos20') 
global_results_dfs[20]

Unnamed: 0_level_0,Iris,Iris,Iris,Iris,Ecoli,Ecoli,Ecoli,Ecoli,Rand,Rand,Rand,Rand
Unnamed: 0_level_1,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T,Tasa_C,Tasa_inf,Agr.,T
COPKM,0.155402,16.2,0.206757,0.201832,219.957665,179.8,222.369642,7.184046,0.425048,3.6,0.438021,0.151236
BL,0.166496,33.0,0.271109,7.197482,88.54371,1776.6,112.376397,397.321687,0.520831,35.4,0.648394,7.975105
