# Deduplicate Experimental Results

If you had to restart the experiments, it is possible that the .txt result files contain duplicates. This can cause the JSON decoder in the analysis notebook to throw an error. You can use this Jupyter notebook to deduplicate the data. The script always keeps the first run.

In [None]:
# Imports
import os
import json

In [None]:
# Settings
base_path_mcts = './tensor_eqs_mcts'
experiments = ['egg_greedy_egg_greedy', 'egg_greedy_tensat_ilp', 'new_greedy_new_greedy', 'new_greedy_tensat_ilp', 'tensat_ilp_tensat_ilp']
models = ['bert', 'inceptionv3', 'resnext50', 'nasneta', 'vgg', 'mobilenetv2', 'resnet50', 'squeezenet', 'nasrnn']
num_seeds = 5

In [None]:
# Delete duplicate data
for experiment in experiments:
    for model in models:
        for seed in range(0, num_seeds):
            output_dir = os.path.join(base_path_mcts, experiment, model + '_' + str(seed))

            try:
                ### Settings: only keep the first line ###
                filename = os.path.join(output_dir, 'settings.txt')

                # Open the file and read the first line
                with open(filename, 'r') as file:
                    first_line = file.readline()

                # Open the file in write mode and write the first line
                with open(filename, 'w') as file:
                    file.write(first_line)


                ### RMCTS stats: only keep the first line ###
                filename = os.path.join(output_dir, 'rmcts_stats.txt')

                # Open the file and read the first line
                with open(filename, 'r') as file:
                    first_line = file.readline()

                # Open the file in write mode and write the first line
                with open(filename, 'w') as file:
                    file.write(first_line)


                ### RMCTS iteration data: delete everything after the total planning time decreases ###
                filename = os.path.join(output_dir, 'rmcts_iteration_data.txt')

                with open(filename, 'r') as infile:
                    lines = infile.readlines()
                
                with open(filename, 'w') as outfile:
                    previous_total_planning_time = -1
                    for line in lines:
                        try:
                            data = json.loads(line)
                            current_total_planning_time = data.get('total_planning_time', -1)
                            
                            if current_total_planning_time >= previous_total_planning_time:
                                outfile.write(line)
                                previous_total_planning_time = current_total_planning_time
                            else:
                                # Stop processing and break the loop
                                break
                        except json.JSONDecodeError:
                            print(f'Error decoding JSON in line: {line}')
                            continue

            except Exception as e:
                print('Exception! Experiment: ', experiment, ', model: ', model, ', seed: ', seed, '. Error message: ', e)