In [1]:
from ga import Genetic
from IO import DataWarehouse, Itemset, convert_str_to_number
from pathlib import Path
import os
import pandas as pd

# Helper function

In [2]:
def write_output(solution: list[dict], file_name: str, data) -> None:
    
    output_file_multiple_sets = file_name
    with open(output_file_multiple_sets, "w") as file:
        for s in solution:
            sorted_elements = sorted(s)
            line = " ".join(map(str, sorted_elements)) + f" #UTIL: {Genetic.evaluation(s, data)}"
            file.write(line + "\n")

# Run through all data

In [None]:
from pathlib import Path
from typing import Generator


ga = Genetic(
    number_of_population=25,
    m=10,
    quantity_of_elite=15,
    k_tournament=5,
    number_population_s=5,
    stop_criteria_loop=100,
    time_limit=100,
)

folder_path = Path("data")
all_files: Generator[Path, None, None] = folder_path.glob("*")

for file_path in all_files:
    data_name = os.path.splitext(os.path.basename(file_path))[0]
    dir_output = os.path.join("outputs", data_name)
    # print(data_name)
    if not os.path.exists(dir_output):
        os.makedirs(dir_output)

    with file_path.open("r", encoding="utf-8") as file:
        data = DataWarehouse(file.read())
        for i in range(0, 5):
            solution = ga.solve(data)
            write_output(
                solution, f"{dir_output}/sol_{i}.txt", data
            )

# Read ouput data

In [3]:
all_solution = {}

all_files = Path("data").glob("*")
for file_path in all_files:
    data_name = os.path.splitext(os.path.basename(file_path))[0]
    dir_output = os.path.join("outputs", data_name)
    
    all_solution_name = Path(dir_output).glob("*.txt")
    solution_store = {}
    for solution_name in all_solution_name:
        with open(solution_name, 'r') as file:
            solution = []
            for line in file:
                if '#UTIL:' in line:
                    items, total_utility = line.split('#UTIL:')
                    itemset = set(map(convert_str_to_number, items.split()))
                    total_utility = convert_str_to_number(total_utility)  
                    solution.append(Itemset(itemset, total_utility)) 
                    
            solution_store[ os.path.splitext(os.path.basename(solution_name))[0]] = solution
    all_solution[data_name] = solution_store    

In [4]:
name_data = list(all_solution.keys())
name_sol = list(all_solution['accidents_negative'].keys())
df = pd.DataFrame(index=name_sol)

for name_data, solutions in all_solution.items():
    utility_values = []
    for name_sol, solution in solutions.items():
        utility_values.append(round(sum([i.utility_values for i in solution]) / len(solution)))
        
    df[name_data] = utility_values
df = df.T
mean = df.iloc[:].mean(axis=1)
df["Mean"] = mean
df_formatted = df.applymap(lambda x: f"{x:,}" if isinstance(x, (int, float)) else x)
df_formatted

Unnamed: 0,sol_0,sol_1,sol_2,sol_3,sol_4,Mean
accidents_negative,7700224,5097162,5033845,5113954,5113954,5611827.8
chainstore,17530567,17530567,17530567,17530567,17530567,17530567.0
chess_negative,86958,85752,84977,85375,84783,85569.0
foodmart,21883,21883,21883,21883,21883,21883.0
kosarak_negative,2104057,2104057,2104057,2104057,2104057,2104057.0
kosarak_utility_spmf,2944726,2944726,2944726,2944726,2944726,2944726.0
liquor_11,399794,399794,399794,399794,399794,399794.0
mushroom_negative,194067,191235,192084,191528,194067,192596.2
retail_negative,153174,153174,153174,153174,153174,153174.0
