In [1]:
import json
from collections import OrderedDict
from functools import cache

# Load the JSON file
@cache
def extract_parameters_timeline(file_path, parameter):
    with open(file_path, 'r') as file:
        data = json.load(file, object_pairs_hook=OrderedDict)

    parameter_list = []
    for _, value in data.items():
        parameter_list.append(value[parameter]['observation'])

    return parameter_list
    

In [2]:
extract_parameters_timeline('../single_parameter/combined_data/codetiming._timer/codetiming._timer#01/parameters_timeline.json', 'ChangeParameterProbability')

[0.1,
 0.1443394184112549,
 0.2212823271751404,
 0.2365305304527283,
 0.15909852981567385,
 0.08932956457138064,
 0.1734938740730286,
 0.23435615301132207,
 0.251189410686493,
 0.21217057704925543,
 0.14793717265129094,
 0.10830913186073307,
 0.03385197520256046,
 0.10178481936454777,
 0.06512374877929691,
 0.08903006315231327,
 0.10829019546508792,
 0.04360390305519107,
 0.06540833115577702,
 0.14916575551033023,
 0.05176494121551517,
 0.00414954423904422,
 0.07992192506790165,
 0.07855555415153508,
 0.03615291714668278,
 0.09077530503273015,
 0.046053129434585616,
 0.12009726166725163,
 0.19137552380561834,
 0.27627542614936834,
 0.191125100851059,
 0.21194228529930118,
 0.11961867213249208,
 0.12369853854179384,
 0.17849218249320986,
 0.0855368196964264,
 0.11172662377357484,
 0.21013438105583193,
 0.22303851246833803,
 0.2940428555011749,
 0.28247087597846987,
 0.3074266254901886,
 0.3601428210735321,
 0.26630370616912846,
 0.1822683691978455,
 0.13011568188667302,
 0.1671282947063

In [3]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('../single_parameter/combined_data/statistics.csv')

# Filter out rows where TuningParameter is 'NONE'
filtered_df = df[df['TuningParameters'] != 'NONE']

# Group by the TuningParameter column
grouped = filtered_df.groupby('TuningParameters')

In [4]:
import numpy as np
base_path = "../single_parameter/combined_data"

parameter_dict = {}
for name, group in grouped:
    print(f'Processing group: {name}')
    matrix = []
    for index, row in group.iterrows():
        path = f"{base_path}/{row['TargetModule']}/{row['RunId']}/parameters_timeline.json"
        matrix.append(extract_parameters_timeline(path, name))
    df_matrix = np.array(pd.DataFrame(matrix))
    parameter_dict[f"{name}"] = df_matrix


Processing group: ChangeParameterProbability
Processing group: ChromosomeLength
Processing group: CrossoverRate
Processing group: Elite
Processing group: Population
Processing group: RandomPerturbation
Processing group: StatementInsertionProbability
Processing group: TestChangeProbability
Processing group: TestDeleteProbability
Processing group: TestInsertProbability
Processing group: TestInsertionProbability
Processing group: TournamentSize


In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from utils.prior_check_helper import HistogramPlot, plot_histograms

#parameter_dict['TestDeleteProbability']
#df_flatten = .apply(pd.Series.explode)
#df_melted = df_flatten.melt(var_name='Variable', value_name='Value')

prob_bins = 50
pal = sns.color_palette("bright", 12)
#pal = sns.color_palette("Paired", 12)
plots = [
HistogramPlot(parameter_dict['ChangeParameterProbability'].flatten(), "", "Change Parameter Probability", pal[0], bin_range=(0,1), bins=prob_bins, default_value=0.1),
HistogramPlot(parameter_dict['ChromosomeLength'].flatten(), "", "Chromosome Length", pal[1], bins=np.arange(20, 82) - 0.5, default_value=40),
HistogramPlot(parameter_dict['CrossoverRate'].flatten(), "", "Crossover Rate", pal[2], bin_range=(0,1), bins=prob_bins, default_value=0.75),
HistogramPlot(parameter_dict['Elite'].flatten(), "", "Elite", pal[3], bins=np.arange(0,12) - 0.5, default_value=1),
HistogramPlot(parameter_dict['Population'].flatten(), "", "Population", pal[4], bins=np.arange(25,102) - 0.5, default_value=50),
HistogramPlot(parameter_dict['RandomPerturbation'].flatten(), "", "Random Perturbation", pal[5], bin_range=(0,1), bins=prob_bins, default_value=0.2),
HistogramPlot(parameter_dict['StatementInsertionProbability'].flatten(), "", "Statement Insertion Probability", pal[6], bin_range=(0,1), bins=prob_bins, default_value=0.5),
HistogramPlot(parameter_dict['TestChangeProbability'].flatten(), "", "Test Change Probability", pal[7], bin_range=(0,1), bins=prob_bins, default_value=0.333),
HistogramPlot(parameter_dict['TestDeleteProbability'].flatten(), "", "Test Delete Probability", pal[8], bin_range=(0,1), bins=prob_bins, default_value=0.333),
HistogramPlot(parameter_dict['TestInsertProbability'].flatten(), "", "Test Insert Probability", pal[9], bin_range=(0,1), bins=prob_bins, default_value=0.333),
HistogramPlot(parameter_dict['TestInsertionProbability'].flatten(), "", "Test Insertion Probability", pal[10], bin_range=(0,1), bins=prob_bins, default_value=0.1),
HistogramPlot(parameter_dict['TournamentSize'].flatten(), "", "Tournament Size", pal[11], bins=np.arange(2,12) - 0.5, default_value=5),
]


plot_histograms(plots, 4, 3, (20,20), False, "Parameter","parameter_histograms/")