In [3]:
import pandas as pd
import os
import json
import re

def read_results_to_dataframe(results_dir='results_init'):
    # Initialize an empty list to store the results
    results = []

    # Regex pattern to extract cores from the filename
    pattern = re.compile(r'result_cores_(\d+)_')

    # Iterate over all files in the results directory
    for filename in os.listdir(results_dir):
        if filename.endswith('.json'):
            # Extract cores count from the filename
            match = pattern.search(filename)
            if match:
                cores = int(match.group(1))
            else:
                cores = None

            file_path = os.path.join(results_dir, filename)
            # Read the JSON file
            with open(file_path, 'r') as file:
                for line in file:
                    result = json.loads(line)
                    result['cores'] = cores
                    results.append(result)

    # Convert the list of results to a DataFrame
    df = pd.DataFrame(results)

    return df

# Usage
df = read_results_to_dataframe()
print(df)


     population_size  total_generations  mutation_rate  tournament_size  \
0                500                 50           0.10               10   
1                100                200           0.05               10   
2                100                 48           0.05                5   
3                250                200           0.10               50   
4                100                 48           0.01               10   
..               ...                ...            ...              ...   
571              100                200           0.10               50   
572              500                200           0.05                5   
573              250                 50           0.05               10   
574              250                 48           0.10                5   
575              250                 50           0.01               10   

     initial_cost  best_route_length  elapsed_time  cores  
0      229.804736         204.313856   

In [4]:
def find_best_parameters_by_cores(df):
    # Group by 'cores' and find the index of the minimum 'best_route_length' for each group
    idx = df.groupby('cores')['best_route_length'].idxmin()
    # Use the indices to get the rows with the best parameters for each cores group
    best_parameters_df = df.loc[idx]
    # Sort the resulting DataFrame by 'elapsed_time'
    best_parameters_df = best_parameters_df.sort_values(by='elapsed_time')
    return best_parameters_df

best_params_by_cores = find_best_parameters_by_cores(df)
print("\nBest parameters by cores group:")
print(best_params_by_cores)


Best parameters by cores group:
     population_size  total_generations  mutation_rate  tournament_size  \
503              500                200            0.1               50   
172              500                200            0.2               50   
209              500                200            0.2               50   
391              500                200            0.2               50   

     initial_cost  best_route_length  elapsed_time  cores  
503    227.969922         203.489909     49.038350      8  
172    228.610461         189.265608     66.625455      4  
209    227.373230         168.935239     89.276963      2  
391    228.138936         142.842416    164.121701      1  


In [9]:
def find_best_parameters_by_cores_and_cities(df):
    # Calculate a combined score: Normalize 'best_route_length' and 'elapsed_time', then sum
    df['normalized_best_route_length'] = (df['best_route_length'] - df['best_route_length'].min()) / (df['best_route_length'].max() - df['best_route_length'].min())
    df['normalized_elapsed_time'] = (df['elapsed_time'] - df['elapsed_time'].min()) / (df['elapsed_time'].max() - df['elapsed_time'].min())
    df['combined_score'] = df['normalized_best_route_length'] + df['normalized_elapsed_time']

    # Group by 'cores' and 'num_cities' and find the index of the minimum 'combined_score' for each group
    idx = df.groupby(['cores', 'num_cities'])['combined_score'].idxmin()
    
    # Use the indices to get the rows with the best parameters for each group
    best_parameters_df = df.loc[idx]
    
    # Sort the resulting DataFrame by 'combined_score' for better readability
    best_parameters_df = best_parameters_df.sort_values(by='combined_score')
    
    return best_parameters_df

# Usage
df = read_results_to_dataframe('results_005_25')
best_params_by_cores_and_cities = find_best_parameters_by_cores_and_cities(df)
print("\nBest parameters by cores and cities group:")
print(best_params_by_cores_and_cities)



Best parameters by cores and cities group:
     num_cities  population_size  total_generations  mutation_rate  \
69          100              100                300           0.05   
178         100              100                800           0.05   
115         100              250                400           0.05   
99          100              100               1600           0.05   
144         200              100                200           0.05   
171         200              100                800           0.05   
133         200               50               1200           0.05   
108         200              100               1600           0.05   
216         300              100                800           0.05   
45          300              100                500           0.05   
101         300              100               1200           0.05   
22          300              100               1600           0.05   
202         400              100              

In [10]:
def find_best_parameters_by_cores_and_cities(df):
    # Calculate a combined score: Normalize 'best_route_length' and 'elapsed_time', then sum
    df['normalized_best_route_length'] = (df['best_route_length'] - df['best_route_length'].min()) / (df['best_route_length'].max() - df['best_route_length'].min())
    df['normalized_elapsed_time'] = (df['elapsed_time'] - df['elapsed_time'].min()) / (df['elapsed_time'].max() - df['elapsed_time'].min())
    df['combined_score'] = df['normalized_best_route_length'] + df['normalized_elapsed_time']

    # Group by 'cores' and 'num_cities' and find the index of the minimum 'combined_score' for each group
    idx = df.groupby(['cores', 'num_cities'])['combined_score'].idxmin()
    
    # Use the indices to get the rows with the best parameters for each group
    best_parameters_df = df.loc[idx]
    
    # Sort the resulting DataFrame by 'combined_score' for better readability
    best_parameters_df = best_parameters_df.sort_values(by='combined_score')
    
    return best_parameters_df

# Usage
df = read_results_to_dataframe('results_02_50')
best_params_by_cores_and_cities = find_best_parameters_by_cores_and_cities(df)
print("\nBest parameters by cores and cities group:")
print(best_params_by_cores_and_cities)



Best parameters by cores and cities group:
     num_cities  population_size  total_generations  mutation_rate  \
203         100              100                400            0.2   
39          100              100               1200            0.2   
69          100              100                300            0.2   
99          100              100               1600            0.2   
84          200              100                500            0.2   
111         200              100                600            0.2   
29          200              100               1200            0.2   
108         200              100               1600            0.2   
45          300              100                500            0.2   
23          300              100                600            0.2   
101         300              100               1200            0.2   
22          300              100               1600            0.2   
68          400              100              