In [None]:
import pickle
from pathlib import Path
import numpy as np
import re
from collections import Counter



## Label the actions with separate explore

In [None]:
def records_func(state_action):
    records = []
    rec_num = 0

    for i in range(len(state_action['new_seq']['actions']['action'])):
        if state_action['new_seq']['actions']['action'][i] == 'record':
            rec = {'Rec No': rec_num, 'Width': state_action['new_seq']['state']['width'][i], 'Concentration': state_action['new_seq']['state']['concentration'][i], 'Wavelength': state_action['new_seq']['state']['wavelength'][i], 'Solution': state_action['new_seq']['state']['solution'][i]}
            records.append(rec)
            rec_num += 1

    return records

In [None]:
def categorise_record_based_on_previous_record_v1(state_action):

    record_num = 0
    records = records_func(state_action)
    
    for i in range(len(state_action['new_seq']['actions']['action'])):
        if state_action['new_seq']['actions']['action'][i] == 'record':
            if record_num == 0:
                record_num += 1
            else:
                label_rec = 'record'
                if records[record_num]['Width'] != records[record_num - 1]['Width']:
                    label_rec += '_width'
                if records[record_num]['Concentration'] != records[record_num - 1]['Concentration']:
                    label_rec += '_concentration'
                if records[record_num]['Wavelength'] != records[record_num - 1]['Wavelength']:
                    label_rec += '_wavelength'
                if records[record_num]['Solution'] != records[record_num - 1]['Solution']:
                    label_rec += '_solution'
                state_action['new_seq']['actions']['action'][i] = label_rec
                record_num += 1
    #return state_action

In [None]:
def graph_func(sim_data):
    graph = {'action': [], 'timestamp': [], 'xaxis_value': [None], 'yaxis_value': [None]}

    for i in range(len(sim_data['logs'])):
            if sim_data['logs'][i]['action'] in ['xaxis', 'yaxis']:
                graph['action'].append(sim_data['logs'][i]['action'])
                graph['timestamp'].append(sim_data['logs'][i]['timestamp'])
                if sim_data['logs'][i]['action'] == 'xaxis':
                    if sim_data['logs'][i]['new_value'] == 'cuvetteWidth':
                         graph['xaxis_value'].append('width')
                    elif sim_data['logs'][i]['new_value'] == 'trialNumber':
                        graph['xaxis_value'].append('other')
                    else:
                        graph['xaxis_value'].append(sim_data['logs'][i]['new_value'])
                    graph['yaxis_value'].append(graph['yaxis_value'][-1])
                elif sim_data['logs'][i]['action'] == 'yaxis':
                    if sim_data['logs'][i]['new_value'] == 'cuvetteWidth':
                         graph['yaxis_value'].append('width')
                    elif sim_data['logs'][i]['new_value'] == 'trialNumber':
                        graph['yaxis_value'].append('other')
                    else:
                        graph['yaxis_value'].append(sim_data['logs'][i]['new_value'])
                    graph['xaxis_value'].append(graph['xaxis_value'][-1])
                else:
                    graph['xaxis_value'].append(graph['xaxis_value'][-1])
                    graph['yaxis_value'].append(graph['yaxis_value'][-1])
                    
    return graph

In [None]:
def categorise_analysis_action(state_action, graph):

    new_actions = []

    for i in range(len(state_action['new_seq']['actions']['action'])):
        if state_action['new_seq']['actions']['action'][i] == 'plot':
            timestamp = state_action['new_seq']['actions']['timestamp'][i]
            yaxis_value = None
            xaxis_value = None

            if timestamp > graph['timestamp'][-1]:
                xaxis_value = graph['xaxis_value'][-1]
                yaxis_value = graph['yaxis_value'][-1]
            else:
                for i in range(len(graph['timestamp'])):
                    if graph['timestamp'][i] > timestamp:
                        xaxis_value = graph['xaxis_value'][i]
                        yaxis_value = graph['yaxis_value'][i]
                        break
                    
            if xaxis_value is not None and yaxis_value == 'absorbance' and xaxis_value != 'absorbance':
                new_actions.append('analysis_plot_{0}'.format(xaxis_value))
            elif yaxis_value is not None and xaxis_value == 'absorbance' and yaxis_value != 'absorbance':
                new_actions.append('analysis_plot_{0}'.format(yaxis_value))
            else:
                new_actions.append('analysis_plot_other')
        elif state_action['new_seq']['actions']['action'][i] in ['delete_point_from_table', 'move_down_point', 'move_up_point']:
            new_actions.append('analysis_table')
        elif state_action['new_seq']['actions']['action'][i] in ['xaxis', 'yaxis', 'xaxis_scale', 'yaxis_scale']:
            new_actions.append('analysis_axis')
        elif state_action['new_seq']['actions']['action'][i] == 'remove_from_graph':
            new_actions.append('analysis_remove')
        else:
            new_actions.append(state_action['new_seq']['actions']['action'][i])

    state_action['new_seq']['actions']['action'] = new_actions

    #return state_action

In [None]:
def labelling_exploring_segment_v1(segment):
    label = 'explore'
    if 'width' in segment:
            label += '_width'

    if 'concentration' in segment:
        label += '_concentration'
    
    if 'wavelength' in segment:
        label += '_wavelength'

    if 'solution' in segment: 
        label += '_solution'

    if 'open_instruction' in segment or 'close_instruction' in segment or 'ruler' in segment or 'restore_sim' in segment or 'open_solution_menu' in segment:
        label += '_other'

    return label

In [None]:
def labelling_exploring_variable_v1(state_action, threshold = 3000):
    segment = []
    new_actions = []
    new_begins = []
    new_ends = []
    current_action_in_segment = None
    actions = state_action['new_seq']['actions']['action']
    timestamp = state_action['new_seq']['actions']['timestamp']

    for i in range(len(actions)-1):
        if 'analysis' in actions[i] or 'record'in actions[i]:
            if segment != []:
                labelled_segment = labelling_exploring_segment_v1(segment)
                new_actions.append(labelled_segment)
                current_action_in_segment = None
                segment = []
            new_actions.append(actions[i])
            new_begins.append(timestamp[i])
            new_ends.append(timestamp[i])
        
        elif segment != []:
            if timestamp[i] - new_ends[-1] <= threshold and actions[i] == current_action_in_segment:
                new_ends[-1] = timestamp[i]
                segment.append(actions[i])
            else:
                labelled_segment = labelling_exploring_segment_v1(segment)
                new_actions.append(labelled_segment)
                current_action_in_segment = actions[i]

                segment = [actions[i]]
                new_begins.append(timestamp[i])
                new_ends.append(timestamp[i])

        else:
            segment.append(actions[i])
            new_begins.append(timestamp[i])
            new_ends.append(timestamp[i])
            current_action_in_segment = actions[i]

    if segment != []:
        labelled_segment = labelling_exploring_segment_v1(segment)
        new_actions.append(labelled_segment)

    result = [[new_actions[i], new_begins[i], new_ends[i]] for i in range(len(new_actions))]

    return result

In [None]:
def insert_breaks(result, break_threshold=20000):
    result_duplicate = [result[0]]
    for i in range(1, len(result)):
        if result[i][1] - result[i-1][2] >= break_threshold:
            break_length = result[i][1] - result[i-1][2]
            break_num = break_length // break_threshold
            break_begin = result[i-1][2]
            for j in range(int(break_num)):
                result_duplicate.append(['break', break_begin, break_begin + break_threshold])
                break_begin += break_threshold
            result_duplicate.append(result[i])
        else:
            result_duplicate.append(result[i])
    return result_duplicate

In [None]:
def variables_sequencer_v1(state_action, sim_data, threshold = 3000):
    categorise_record_based_on_previous_record_v1(state_action)
    graph = graph_func(sim_data)
    categorise_analysis_action(state_action, graph)
    result = labelling_exploring_variable_v1(state_action, threshold = threshold)
    final_result = insert_breaks(result, break_threshold=20000)

    fin_result = {'group': state_action['group'], 'subgroup': state_action['subgroup'], 'session_code': state_action['session_code'], 'task': state_action['task'], 'var_seq_v1': final_result}

    return fin_result

# Labeling

## CVS labeling

In [None]:
def cvs_labeling(separate_actions_check, st_act_check):    
    cvs_try = separate_actions_check['actions'].copy()

    for i in range(len(cvs_try)):
        if separate_actions_check['actions'][i] == 'explore_solution':
            solution_experiment = []
            for j in range(i+1, len(cvs_try)):
                if separate_actions_check['actions'][j] == 'explore_solution':
                    for k in range(i, j):
                        solution_experiment.append(separate_actions_check['actions'][k])

                    if (sum('explore_concentration' in k for k in solution_experiment) > 1) or (sum('explore_wavelength' in k for k in solution_experiment) > 1) or 'explore_width' in solution_experiment:
                        for m in range(len(solution_experiment)):
                            if solution_experiment[m] == 'explore_solution':
                                cvs_try[i+m] = 'Non_CVS_explore_solution'
                        solution_experiment = []

                    else:
                        for m in range(len(st_act_check['new_seq']['actions']['timestamp'])):
                            if separate_actions_check['begins'][i] == st_act_check['new_seq']['actions']['timestamp'][m]:
                                concentration_sol = st_act_check['new_seq']['state']['concentration'][m]
                                wavelength_sol = st_act_check['new_seq']['state']['wavelength'][m]
                            if separate_actions_check['ends'][i+len(solution_experiment)-1] == st_act_check['new_seq']['actions']['timestamp'][m]:
                                concentration_final = st_act_check['new_seq']['state']['concentration'][m]
                                wavelength_final = st_act_check['new_seq']['state']['wavelength'][m]

                        try:    
                            if concentration_sol != 0 and (concentration_sol % concentration_final == 0) and wavelength_sol == wavelength_final:
                                for m in range(len(solution_experiment)):
                                    if solution_experiment[m] == 'explore_solution':
                                        cvs_try[i+m] = 'CVS_explore_solution'
                                    elif 'record' in solution_experiment[m]:
                                        cvs_try[i+m] = 'CVS_record_solution'
                                    elif solution_experiment[m] in ['explore_concentration', 'explore_wavelength', 'explore_other']:
                                        cvs_try[i+m] = 'other'
                            else:
                                for m in range(len(solution_experiment)):
                                    if solution_experiment[m] == 'explore_solution':
                                        cvs_try[i+m] = 'Non_CVS_explore_solution'
                                    elif 'record' in solution_experiment[m]:
                                        cvs_try[i+m] = 'Non_CVS_record_solution'
                                    elif solution_experiment[m] in ['explore_concentration', 'explore_wavelength', 'explore_other']:
                                        cvs_try[i+m] = 'other'
                        except ZeroDivisionError:
                            if concentration_sol == concentration_final and wavelength_sol == wavelength_final:
                                for m in range(len(solution_experiment)):
                                    if solution_experiment[m] == 'explore_solution':
                                        cvs_try[i+m] = 'CVS_explore_solution'
                                    elif 'record' in solution_experiment[m]:
                                        cvs_try[i+m] = 'CVS_record_solution'
                                    elif solution_experiment[m] in ['explore_concentration', 'explore_wavelength', 'explore_other']:
                                        cvs_try[i+m] = 'other'
                            else:
                                for m in range(len(solution_experiment)):
                                    if solution_experiment[m] == 'explore_solution':
                                        cvs_try[i+m] = 'Non_CVS_explore_solution'
                                    elif 'record' in solution_experiment[m]:
                                        cvs_try[i+m] = 'Non_CVS_record_solution'
                                    elif solution_experiment[m] in ['explore_concentration', 'explore_wavelength', 'explore_other']:
                                        cvs_try[i+m] = 'other'
                                        
                        solution_experiment = [] 

                    break   
   
    for i in range(len(cvs_try)):
        if cvs_try[i] == 'explore_solution':
            cvs_try[i] = 'Non_CVS_explore_solution'

            
    for i in range(len(cvs_try)):
        if cvs_try[i] in ['explore_concentration', 'explore_wavelength', 'explore_width']:
            try:
                if cvs_try[i+1] in ['explore_concentration', 'explore_wavelength', 'explore_width']:
                    if separate_actions_check['begins'][i+1] - separate_actions_check['ends'][i] <= 3000:
                        cvs_try[i] = 'Non_CVS_' + cvs_try[i]
                        cvs_try[i+1] = 'Non_CVS_' + cvs_try[i+1]
                    else:
                        cvs_try[i] = 'CVS_' + cvs_try[i]
                else:
                    if 'CVS' in cvs_try[i]:
                        pass
                    else:
                        cvs_try[i] = 'CVS_' + cvs_try[i]
            except IndexError:
                cvs_try[i] = 'CVS_' + cvs_try[i]

    for i in range(len(cvs_try)):
        if 'record' in cvs_try[i] and cvs_try[i] not in ['CVS_record_solution', 'Non_CVS_record_solution']:
            if i-1 >= 0 and i+1 < len(cvs_try):
                if cvs_try[i-1] in ['Non_CVS_explore_width', 'Non_CVS_explore_concentration', 'Non_CVS_explore_wavelength'] and cvs_try[i+1] in ['CVS_explore_width', 'CVS_explore_concentration', 'CVS_explore_wavelength']:
                    if cvs_try[i-1].replace("Non_CVS_", "") == cvs_try[i+1].replace("CVS_", ""):
                        cvs_try[i] = 'CVS_record_' + cvs_try[i-1].replace("Non_CVS_explore_", "")
                    else:
                        cvs_try[i] = 'Non_CVS_record_' + cvs_try[i-1].replace("Non_CVS_explore_", "")
                elif cvs_try[i-1] in ['CVS_explore_width', 'CVS_explore_concentration', 'CVS_explore_wavelength']:
                    cvs_try[i] = 'CVS_record_' + cvs_try[i-1].replace("CVS_explore_", "")
                
                elif cvs_try[i-1] in ['Non_CVS_explore_width', 'Non_CVS_explore_concentration', 'Non_CVS_explore_wavelength']:
                    cvs_try[i] = 'Non_CVS_record_' + cvs_try[i-1].replace("Non_CVS_explore_", "")
                else:        
                    cvs_try[i] = 'Non_CVS_record'
            
            elif i-1 >= 0:
                if cvs_try[i-1] in ['CVS_explore_width', 'CVS_explore_concentration', 'CVS_explore_wavelength']:
                    cvs_try[i] = 'CVS_record_' + cvs_try[i-1].replace("CVS_explore_", "")
                
                elif cvs_try[i-1] in ['Non_CVS_explore_width', 'Non_CVS_explore_concentration', 'Non_CVS_explore_wavelength']:
                    cvs_try[i] = 'Non_CVS_record_' + cvs_try[i-1].replace("Non_CVS_explore_", "")

                else:        
                    cvs_try[i] = 'Non_CVS_record'
                    
            else:        
                cvs_try[i] = 'Non_CVS_record'

    for i in range(len(cvs_try)):
        if cvs_try[i] == 'explore_other':
            cvs_try[i] = 'other'  

    return cvs_try      



## Optimal labeling

In [None]:
def optimal_labeling(cvs_try, separate_actions_check, st_act_check):
    optimal_try = []
    for i in range(len(cvs_try)):
        if cvs_try[i] in ['CVS_explore_width', 'Non_CVS_explore_width', 'CVS_record_width', 'Non_CVS_record_width']:
            for j in range(len(st_act_check['new_seq']['actions']['timestamp'])):
                if separate_actions_check['begins'][i] == st_act_check['new_seq']['actions']['timestamp'][j]:
                    concentration = st_act_check['new_seq']['state']['concentration'][j]
                    wavelength = st_act_check['new_seq']['state']['wavelength'][j]
                    solution = st_act_check['new_seq']['state']['solution'][j]

                    if solution == 'drink Mix (red)' or solution == 'Cobalt nitrate (red)' or solution == 'Cobalt chloride (red)':
                        if wavelength not in range(620, 780) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')
                    elif solution == 'Copper sulfate (blue)':
                        if wavelength not in range(450, 495) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')
                    elif solution == 'Nickel chloride (green)':
                        if wavelength not in range(495, 570) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')
                    elif solution == 'Potassium chromate (yellow)':
                        if wavelength not in range(570, 590) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')
                    elif solution == 'Potassium dichromate (orange)':
                        if wavelength not in range(590, 620) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')
                    elif solution == 'Potassium permanganate (purple)':
                        if wavelength not in range(620, 700) and concentration != 0:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Optimal_record_width')
                            else:
                                optimal_try.append('Optimal_explore_width')
                        else:
                            if cvs_try[i] in ['CVS_record_width', 'Non_CVS_record_width']:
                                optimal_try.append('Non_Optimal_record_width')
                            else:
                                optimal_try.append('Non_Optimal_explore_width')

                    concentration = None
                    wavelength = None
                    solution = None

        elif cvs_try[i] in ['CVS_explore_concentration', 'Non_CVS_explore_concentration', 'CVS_record_concentration', 'Non_CVS_record_concentration']:
            for j in range(len(st_act_check['new_seq']['actions']['timestamp'])):
                if separate_actions_check['begins'][i] == st_act_check['new_seq']['actions']['timestamp'][j]:
                    wavelength = st_act_check['new_seq']['state']['wavelength'][j]
                    solution = st_act_check['new_seq']['state']['solution'][j]

                    if solution == 'drink Mix (red)' or solution == 'Cobalt nitrate (red)' or solution == 'Cobalt chloride (red)':
                        if wavelength not in range(620, 780):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')
                    elif solution == 'Copper sulfate (blue)':
                        if wavelength not in range(450, 495):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')
                    elif solution == 'Nickel chloride (green)':
                        if wavelength not in range(495, 570):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')
                    elif solution == 'Potassium chromate (yellow)':
                        if wavelength not in range(570, 590):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')
                    elif solution == 'Potassium dichromate (orange)':
                        if wavelength not in range(590, 620):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')
                    elif solution == 'Potassium permanganate (purple)':
                        if wavelength not in range(620, 700):
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Optimal_record_concentration')
                            else:
                                optimal_try.append('Optimal_explore_concentration')
                        else:
                            if cvs_try[i] in ['CVS_record_concentration', 'Non_CVS_record_concentration']:
                                optimal_try.append('Non_Optimal_record_concentration')
                            else:
                                optimal_try.append('Non_Optimal_explore_concentration')

                    wavelength = None
                    solution = None

        elif cvs_try[i] in ['CVS_explore_wavelength', 'Non_CVS_explore_wavelength', 'CVS_record_wavelength', 'Non_CVS_record_wavelength']:
            for j in range(len(st_act_check['new_seq']['actions']['timestamp'])):
                if separate_actions_check['begins'][i] == st_act_check['new_seq']['actions']['timestamp'][j]:
                    concentration = st_act_check['new_seq']['state']['concentration'][j]

                    if concentration != 0:
                        if cvs_try[i] in ['CVS_record_wavelength', 'Non_CVS_record_wavelength']:
                            optimal_try.append('Optimal_record_wavelength')
                        else:
                            optimal_try.append('Optimal_explore_wavelength')
                    else:
                        if cvs_try[i] in ['CVS_record_wavelength', 'Non_CVS_record_wavelength']:
                            optimal_try.append('Non_Optimal_record_wavelength')
                        else:
                            optimal_try.append('Non_Optimal_explore_wavelength')

                    concentration = None

        elif cvs_try[i] in ['Non_CVS_record', 'Non_CVS_record_solution', 'CVS_record_solution']:
            optimal_try.append(re.sub(r'^(CVS_|Non_CVS_)', '', cvs_try[i]))
        elif cvs_try[i] in ['CVS_explore_solution', 'Non_CVS_explore_solution']:
            optimal_try.append('solution')
            
        else:
            optimal_try.append(cvs_try[i])
            
    return optimal_try

## Range labeling

In [None]:
def ranger_stop(st_act, experiment_begin, experiment_end, current_variable):
    state_data = st_act['new_seq']['state']
    timestamp = st_act['new_seq']['actions']['timestamp']

    experiment_values = []
    previous_timestamp = None

    for j in range(len(timestamp)):
        if timestamp[j] >= experiment_begin and timestamp[j] <= experiment_end:
            if timestamp[j] == previous_timestamp:
                continue # Skip to the next iteration
            # If not equal, append the value to the list
            experiment_values.append(state_data[current_variable][j+1])
            previous_timestamp = timestamp[j]
            solution = state_data['solution'][j]

    explore_times = []
    stop_counter = 0
    
    for j in range(len(timestamp)):
        if timestamp[j] >= experiment_begin and timestamp[j] <= experiment_end:
            if timestamp[j] == previous_timestamp:
                continue

            if 'record' not in st_act['new_seq']['actions']['action'][j]:
                explore_times.append(timestamp[j])
            else:
                if len(explore_times) != 0:
                    for m in range(len(explore_times)-1):
                        if (explore_times[m+1] - explore_times[m]) > 20:
                            stop_counter += 1
                    explore_times = []

    if len(explore_times) != 0:
        for m in range(len(explore_times)-1):
            if (explore_times[m+1] - explore_times[m]) > 20:
                stop_counter += 1
        explore_times = []        

    min_value = np.min(experiment_values)
    max_value = np.max(experiment_values)

    range_perc = (max_value - min_value)

    if current_variable == 'width':
        range_perc /= 1.5
        
    elif current_variable == 'concentration':
        concentration_division = {
            'drink Mix (red)': 0.4,
            'Cobalt nitrate (red)': 0.4,
            'Cobalt chloride (red)': 0.25,
            'Copper sulfate (blue)': 0.2,
            'Nickel chloride (green)': 0.35,
            'Potassium chromate (yellow)': 0.0004,
            'Potassium dichromate (orange)': 0.0005,
            'Potassium permanganate (purple)': 0.0008
        }
        range_perc /= concentration_division.get(solution)

    elif current_variable == 'wavelength':
        range_perc /= 400

    return round(range_perc*100, 2), stop_counter

In [None]:
def range_labeling_stop(cvs_try, separate_actions_check, st_act_check):
    range_try = cvs_try.copy()
    experiment = []
    solutions = ['drink Mix (red)']

    for i in range(len(cvs_try)):
        if experiment == []:
            if cvs_try[i] in ['CVS_explore_width', 'Non_CVS_explore_width', 'CVS_explore_concentration', 'Non_CVS_explore_concentration', 'CVS_explore_wavelength', 'Non_CVS_explore_wavelength']:
                experiment.append(cvs_try[i])
                experiment_begin = separate_actions_check['begins'][i]
                experiment_end = separate_actions_check['ends'][i]
                begin_index = i
                end_index = i
                current_variable = re.sub(r'^(CVS_explore_|Non_CVS_explore_)', '', cvs_try[i]) #removes CVS_exlpore_ or Non_CVS_explore_ from the beginning of the string
        else:
            if cvs_try[i] in ['CVS_explore_width', 'Non_CVS_explore_width', 'CVS_explore_concentration', 'Non_CVS_explore_concentration', 'CVS_explore_wavelength', 'Non_CVS_explore_wavelength']:
                if current_variable == re.sub(r'^(CVS_explore_|Non_CVS_explore_)', '', cvs_try[i]):
                    experiment.append(cvs_try[i])
                    experiment_end = separate_actions_check['ends'][i]
                    end_index = i
                else:
                    step_counter = sum('record' in k for k in experiment)
                    range_perc, stops = ranger_stop(st_act_check, experiment_begin, experiment_end, current_variable)
                    for m in range(begin_index, end_index+1):
                        if 'record' in range_try[m]:
                            range_try[m] = re.sub(r'^(CVS_|Non_CVS_)', '', range_try[m])
                        else:
                            range_try[m] = [range_perc, step_counter, stops]

                    experiment = []

                    experiment.append(cvs_try[i])
                    experiment_begin = separate_actions_check['begins'][i]
                    experiment_end = separate_actions_check['ends'][i]
                    begin_index = i
                    end_index = i
                    current_variable = re.sub(r'^(CVS_explore_|Non_CVS_explore_)', '', cvs_try[i])
            elif 'CVS_record' in cvs_try[i] or 'Non_CVS_record' in cvs_try[i]:
                experiment.append(cvs_try[i])
                experiment_end = separate_actions_check['ends'][i]
                end_index = i
            else:
                step_counter = sum('record' in k for k in experiment)
                range_perc, stops = ranger_stop(st_act_check, experiment_begin, experiment_end, current_variable)
                for m in range(begin_index, end_index+1):
                    if 'record' in range_try[m]:
                        range_try[m] = re.sub(r'^(CVS_|Non_CVS_)', '', range_try[m])
                    else:
                        range_try[m] = [range_perc, step_counter, stops]
                        
                experiment = []
                experiment_begin = None
                experiment_end = None
                current_variable = None
                begin_index = None
                end_index = None

    if experiment != []:
        step_counter = sum('record' in k for k in experiment)
        range_perc, stops = ranger_stop(st_act_check, experiment_begin, experiment_end, current_variable)
        for m in range(begin_index, end_index+1):
            if 'record' in range_try[m]:
                range_try[m] = re.sub(r'^(CVS_|Non_CVS_)', '', range_try[m])
            else:
                range_try[m] = [range_perc, step_counter, stops]

    for i in range(len(cvs_try)):
        if 'CVS_explore_solution' in cvs_try[i] or 'Non_CVS_explore_solution' in cvs_try[i]:
            solution_timestamp = separate_actions_check['begins'][i]
            for j in range(len(st_act_check['new_seq']['actions']['timestamp'])):
                if solution_timestamp == st_act_check['new_seq']['actions']['timestamp'][j]:
                    solution = st_act_check['new_seq']['state']['solution'][j+1]
                    solutions.append(solution)
        elif 'record' in cvs_try[i]:
            range_try[i] = re.sub(r'^(CVS_|Non_CVS_)', '', range_try[i])

    unique_solutions = set(solutions)
    percentage_range_solutions = len(unique_solutions)/0.08
    for i in range(len(cvs_try)):
        if 'CVS_explore_solution' in cvs_try[i] or 'Non_CVS_explore_solution' in cvs_try[i]:
            range_try[i] = [percentage_range_solutions, 0, 0]

    return range_try
                        

## Insert back breaks and analysis_table

In [None]:
def insert_breaks_and_table(separate_actions_check, breaks_and_table, cvs_try, optimal_try, range_try):    
    new_labels = {'actions': [], 'begins': [], 'ends': [], 'CVS': [], 'Optimal': [], 'Range: percentage, steps, stops': []}
        
    # Iterate through both lists simultaneously
    i = 0
    j = 0
    while i < len(separate_actions_check['begins']) and j < len(breaks_and_table['begins']):
        # Compare the elements at the current positions
        if separate_actions_check['begins'][i] < breaks_and_table['begins'][j]:
            # Add the smaller number to the merged list
            new_labels['actions'].append(separate_actions_check['actions'][i])
            new_labels['begins'].append(separate_actions_check['begins'][i])
            new_labels['ends'].append(separate_actions_check['ends'][i])
            new_labels['CVS'].append(cvs_try[i])
            new_labels['Optimal'].append(optimal_try[i])
            new_labels['Range: percentage, steps, stops'].append(range_try[i])
            i += 1
        else:
            new_labels['actions'].append(breaks_and_table['actions'][j])
            new_labels['begins'].append(breaks_and_table['begins'][j])
            new_labels['ends'].append(breaks_and_table['ends'][j])
            new_labels['CVS'].append(breaks_and_table['actions'][j])
            new_labels['Optimal'].append(breaks_and_table['actions'][j])
            new_labels['Range: percentage, steps, stops'].append(breaks_and_table['actions'][j])
            j += 1

    # Add remaining elements from both lists (if any)
    new_labels['actions'].extend(separate_actions_check['actions'][i:])
    new_labels['begins'].extend(separate_actions_check['begins'][i:])
    new_labels['ends'].extend(separate_actions_check['ends'][i:])
    new_labels['CVS'].extend(cvs_try[i:])
    new_labels['Optimal'].extend(optimal_try[i:])
    new_labels['Range: percentage, steps, stops'].extend(range_try[i:])

    new_labels['actions'].extend(breaks_and_table['actions'][j:])
    new_labels['begins'].extend(breaks_and_table['begins'][j:])
    new_labels['ends'].extend(breaks_and_table['ends'][j:])
    new_labels['CVS'].extend(breaks_and_table['actions'][j:])
    new_labels['Optimal'].extend(breaks_and_table['actions'][j:])
    new_labels['Range: percentage, steps, stops'].extend(breaks_and_table['actions'][j:])

    return new_labels


## Labeling analysis actions

In [None]:
def find_odd_occurrences(numbers):
    counts = Counter(numbers)
    odd_occurrences = [num for num, count in counts.items() if count % 2 != 0]
    return odd_occurrences

In [None]:
def analysis_labeling(new_labels, sim_data_check, table_points, separate_actions_check):    
    counter_plot = 0
    analysis_experiment = []
    analysis_timestamps = []
    analysis_points = []

    for i in range(len(separate_actions_check['actions'])):
        if 'analysis_plot' in separate_actions_check['actions'][i]:
            analysis_experiment.append(separate_actions_check['actions'][i])
            analysis_timestamps.append(separate_actions_check['begins'][i])
            counter_plot += 1
        elif 'analysis_remove' in separate_actions_check['actions'][i]:
            analysis_experiment.append(separate_actions_check['actions'][i])
            analysis_timestamps.append(separate_actions_check['begins'][i])
            counter_plot -= 1
            
            if counter_plot == 0:
                for k in range(len(analysis_experiment)-1, -1, -1):
                    if 'analysis_plot' in analysis_experiment[k]:
                        stop_index = k
                        break

                # forming the analysis_points - till the last analysis_plot
                for j in range(stop_index + 1):
                    for m in range(len(sim_data_check['logs'])):
                        if analysis_timestamps[j] == sim_data_check['logs'][m]['timestamp']:
                            analysis_points.append(sim_data_check['logs'][m]['table_point'])
                            break
                result_points = find_odd_occurrences(analysis_points)

                # finding the concentration, width, wavelength and solution for each result point
                concentration = []
                width = []
                wavelength = []
                solution = []

                for j in range(len(analysis_points)):
                    for m in range(1, len(table_points)+1):
                        if analysis_points[j] == m and m in result_points:
                            concentration.append(table_points[m]['concentration'])
                            width.append(table_points[m]['width'])
                            wavelength.append(table_points[m]['wavelength'])
                            solution.append(table_points[m]['solution'])
                            break
                
                # finding the cvs_label
                if len(concentration) == 1:
                    cvs_label = 'Non_CVS_analysis'
                elif all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
                    cvs_label = 'repeat'
                else:
                    if all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength):
                        cvs_label = 'CVS_analysis_solution'
                    elif all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == solution[0] for x in solution):
                        cvs_label = 'CVS_analysis_wavelength'
                    elif all(x == concentration[0] for x in concentration) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
                        cvs_label = 'CVS_analysis_width'
                    elif all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
                        cvs_label = 'CVS_analysis_concentration'
                    else:
                        cvs_label = 'Non_CVS_analysis'

                # finding the optimal_label
                if cvs_label == 'repeat':
                    optimal_label = 'Non_Optimal_analysis'

                elif cvs_label == 'CVS_analysis_width':
                    if solution[0] == 'drink Mix (red)' or solution[0] == 'Cobalt nitrate (red)' or solution[0] == 'Cobalt chloride (red)':
                        if wavelength[0] not in range(620, 780) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'
                    elif solution[0] == 'Copper sulfate (blue)':
                        if wavelength[0] not in range(450, 495) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'
                    elif solution[0] == 'Nickel chloride (green)':
                        if wavelength[0] not in range(495, 570) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'
                    elif solution[0] == 'Potassium chromate (yellow)':
                        if wavelength[0] not in range(570, 590) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'
                    elif solution[0] == 'Potassium dichromate (orange)':
                        if wavelength[0] not in range(590, 620) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'
                    elif solution[0] == 'Potassium permanganate (purple)':
                        if wavelength[0] not in range(620, 700) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_width'
                        else:
                            optimal_label = 'Non_Optimal_analysis_width'

                elif cvs_label == 'CVS_analysis_concentration':
                    if solution[0] == 'drink Mix (red)' or solution[0] == 'Cobalt nitrate (red)' or solution[0] == 'Cobalt chloride (red)':
                        if wavelength[0] not in range(620, 780) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'
                    elif solution[0] == 'Copper sulfate (blue)':
                        if wavelength[0] not in range(450, 495) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'
                    elif solution[0] == 'Nickel chloride (green)':
                        if wavelength[0] not in range(495, 570) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'
                    elif solution[0] == 'Potassium chromate (yellow)':
                        if wavelength[0] not in range(570, 590) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'
                    elif solution[0] == 'Potassium dichromate (orange)':
                        if wavelength[0] not in range(590, 620) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'
                    elif solution[0] == 'Potassium permanganate (purple)':
                        if wavelength[0] not in range(620, 700) and 'analysis_plot_concentration' in analysis_experiment:
                            optimal_label = 'Optimal_analysis_concentration'
                        else:
                            optimal_label = 'Non_Optimal_analysis_concentration'

                elif cvs_label == 'CVS_analysis_wavelength':
                    optimal_label = 'Non_Optimal_analysis_wavelength'

                elif len(concentration) == 1:
                    optimal_label = 'Non_Optimal_analysis'

                else:
                    optimal_label = 'analysis'

                # finding the range_label
                if cvs_label == 'repeat':
                    range_label = 0
                
                elif cvs_label == 'CVS_analysis_width':
                    max_width = max(width)
                    min_width = min(width)
                    range_label = round((max_width - min_width)/1.5 * 100, 2)

                elif cvs_label == 'CVS_analysis_concentration':
                    concentration_division = {
                        'drink Mix (red)': 0.4,
                        'Cobalt nitrate (red)': 0.4,
                        'Cobalt chloride (red)': 0.25,
                        'Copper sulfate (blue)': 0.2,
                        'Nickel chloride (green)': 0.35,
                        'Potassium chromate (yellow)': 0.0004,
                        'Potassium dichromate (orange)': 0.0005,
                        'Potassium permanganate (purple)': 0.0008
                    }
                    max_concentration = max(concentration)
                    min_concentration = min(concentration)
                    range_label = round((max_concentration - min_concentration)/concentration_division.get(solution[0]) * 100, 2)

                elif cvs_label == 'CVS_analysis_wavelength':
                    max_wavelength = max(wavelength)
                    min_wavelength = min(wavelength)
                    range_label = round((max_wavelength - min_wavelength)/400 * 100, 2)

                elif cvs_label == 'CVS_analysis_solution':
                    range_label = len(set(solution))/0.08

                elif len(concentration) == 1:
                    range_label = 0

                else:
                    if 'analysis_plot_width' in analysis_experiment:
                        max_width = max(width)
                        min_width = min(width)
                        range_label = round((max_width - min_width)/1.5 * 100, 2)

                    elif 'analysis_plot_concentration' in analysis_experiment:
                        if all(x == solution[0] for x in solution):
                            concentration_division = {
                                'drink Mix (red)': 0.4,
                                'Cobalt nitrate (red)': 0.4,
                                'Cobalt chloride (red)': 0.25,
                                'Copper sulfate (blue)': 0.2,
                                'Nickel chloride (green)': 0.35,
                                'Potassium chromate (yellow)': 0.0004,
                                'Potassium dichromate (orange)': 0.0005,
                                'Potassium permanganate (purple)': 0.0008
                            }
                            max_concentration = max(concentration)
                            min_concentration = min(concentration)
                            range_label = round((max_concentration - min_concentration)/concentration_division.get(solution[0]) * 100, 2)
                        else:
                            range_label = 'analysis'

                    else:
                        range_label = 'analysis'


                # assigning the labels
                for j in range(len(analysis_experiment)):
                    for m in range(len(new_labels['begins'])):
                        if analysis_timestamps[j] == new_labels['begins'][m]:
                            if 'analysis_plot' in analysis_experiment[j]:
                                if analysis_points[j] in result_points:
                                    new_labels['CVS'][m] = cvs_label
                                    new_labels['Optimal'][m] = optimal_label
                                    new_labels['Range: percentage, steps, stops'][m] = [range_label, len(result_points)]
                                else:
                                    new_labels['CVS'][m] = 'other'
                                    new_labels['Optimal'][m] = 'other'
                                    new_labels['Range: percentage, steps, stops'][m] = 'other'
                            else:
                                new_labels['CVS'][m] = 'other' 
                                new_labels['Optimal'][m] = 'other'
                                new_labels['Range: percentage, steps, stops'][m] = 'other'              
                            break
                
                analysis_experiment = []
                analysis_timestamps = []
                analysis_points = []

    # for the last analysis_experiment
    if analysis_experiment != []:
        for j in range(len(analysis_experiment)):
            for m in range(len(sim_data_check['logs'])):
                if analysis_timestamps[j] == sim_data_check['logs'][m]['timestamp']:
                    analysis_points.append(sim_data_check['logs'][m]['table_point'])
                    break
        result_points = find_odd_occurrences(analysis_points)

        concentration = []
        width = []
        wavelength = []
        solution = []

        for j in range(len(analysis_points)):
            for m in range(1, len(table_points)+1):
                if analysis_points[j] == m and m in result_points:
                    concentration.append(table_points[m]['concentration'])
                    width.append(table_points[m]['width'])
                    wavelength.append(table_points[m]['wavelength'])
                    solution.append(table_points[m]['solution'])
                    break
        
        # finding the cvs_label
        if len(concentration) == 1:
            cvs_label = 'Non_CVS_analysis'
        elif all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
            cvs_label = 'repeat'
        else:
            if all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength):
                cvs_label = 'CVS_analysis_solution'
            elif all(x == concentration[0] for x in concentration) and all(x == width[0] for x in width) and all(x == solution[0] for x in solution):
                cvs_label = 'CVS_analysis_wavelength'
            elif all(x == concentration[0] for x in concentration) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
                cvs_label = 'CVS_analysis_width'
            elif all(x == width[0] for x in width) and all(x == wavelength[0] for x in wavelength) and all(x == solution[0] for x in solution):
                cvs_label = 'CVS_analysis_concentration'
            else:
                cvs_label = 'Non_CVS_analysis'

        # finding the optimal_label
        if cvs_label == 'repeat':
            optimal_label = 'Non_Optimal_analysis'

        elif cvs_label == 'CVS_analysis_width':
            if solution[0] == 'drink Mix (red)' or solution[0] == 'Cobalt nitrate (red)' or solution[0] == 'Cobalt chloride (red)':
                if wavelength[0] not in range(620, 780) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'
            elif solution[0] == 'Copper sulfate (blue)':
                if wavelength[0] not in range(450, 495) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'
            elif solution[0] == 'Nickel chloride (green)':
                if wavelength[0] not in range(495, 570) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'
            elif solution[0] == 'Potassium chromate (yellow)':
                if wavelength[0] not in range(570, 590) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'
            elif solution[0] == 'Potassium dichromate (orange)':
                if wavelength[0] not in range(590, 620) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'
            elif solution[0] == 'Potassium permanganate (purple)':
                if wavelength[0] not in range(620, 700) and concentration[0] != 0 and 'analysis_plot_width' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_width'
                else:
                    optimal_label = 'Non_Optimal_analysis_width'

        elif cvs_label == 'CVS_analysis_concentration':
            if solution[0] == 'drink Mix (red)' or solution[0] == 'Cobalt nitrate (red)' or solution[0] == 'Cobalt chloride (red)':
                if wavelength[0] not in range(620, 780) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'
            elif solution[0] == 'Copper sulfate (blue)':
                if wavelength[0] not in range(450, 495) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'
            elif solution[0] == 'Nickel chloride (green)':
                if wavelength[0] not in range(495, 570) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'
            elif solution[0] == 'Potassium chromate (yellow)':
                if wavelength[0] not in range(570, 590) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'
            elif solution[0] == 'Potassium dichromate (orange)':
                if wavelength[0] not in range(590, 620) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'
            elif solution[0] == 'Potassium permanganate (purple)':
                if wavelength[0] not in range(620, 700) and 'analysis_plot_concentration' in analysis_experiment:
                    optimal_label = 'Optimal_analysis_concentration'
                else:
                    optimal_label = 'Non_Optimal_analysis_concentration'

        elif cvs_label == 'CVS_analysis_wavelength':
            optimal_label = 'Non_Optimal_analysis_wavelength'

        elif len(concentration) == 1:
            optimal_label = 'Non_Optimal_analysis'

        else:
            optimal_label = 'analysis'

        # finding the range_label
        if cvs_label == 'repeat':
            range_label = 0
        
        elif cvs_label == 'CVS_analysis_width':
            max_width = max(width)
            min_width = min(width)
            range_label = round((max_width - min_width)/1.5 * 100, 2)

        elif cvs_label == 'CVS_analysis_concentration':
            concentration_division = {
                'drink Mix (red)': 0.4,
                'Cobalt nitrate (red)': 0.4,
                'Cobalt chloride (red)': 0.25,
                'Copper sulfate (blue)': 0.2,
                'Nickel chloride (green)': 0.35,
                'Potassium chromate (yellow)': 0.0004,
                'Potassium dichromate (orange)': 0.0005,
                'Potassium permanganate (purple)': 0.0008
            }
            max_concentration = max(concentration)
            min_concentration = min(concentration)
            range_label = round((max_concentration - min_concentration)/concentration_division.get(solution[0]) * 100, 2)

        elif cvs_label == 'CVS_analysis_wavelength':
            max_wavelength = max(wavelength)
            min_wavelength = min(wavelength)
            range_label = round((max_wavelength - min_wavelength)/400 * 100, 2)

        elif cvs_label == 'CVS_analysis_solution':
            range_label = len(set(solution))/0.08

        elif len(concentration) == 1:
            range_label = 0

        else:
            if 'analysis_plot_width' in analysis_experiment:
                max_width = max(width)
                min_width = min(width)
                range_label = round((max_width - min_width)/1.5 * 100, 2)

            elif 'analysis_plot_concentration' in analysis_experiment:
                if all(x == solution[0] for x in solution):
                    concentration_division = {
                        'drink Mix (red)': 0.4,
                        'Cobalt nitrate (red)': 0.4,
                        'Cobalt chloride (red)': 0.25,
                        'Copper sulfate (blue)': 0.2,
                        'Nickel chloride (green)': 0.35,
                        'Potassium chromate (yellow)': 0.0004,
                        'Potassium dichromate (orange)': 0.0005,
                        'Potassium permanganate (purple)': 0.0008
                    }
                    max_concentration = max(concentration)
                    min_concentration = min(concentration)
                    range_label = round((max_concentration - min_concentration)/concentration_division.get(solution[0]) * 100, 2)
                else:
                    range_label = 'analysis'

            else:
                range_label = 'analysis'

        # assigning the labels
        for j in range(len(analysis_experiment)):
            for m in range(len(new_labels['begins'])):
                if analysis_timestamps[j] == new_labels['begins'][m]:
                    if 'analysis_plot' in analysis_experiment[j]:
                        if analysis_points[j] in result_points:
                            new_labels['CVS'][m] = cvs_label
                            new_labels['Optimal'][m] = optimal_label
                            new_labels['Range: percentage, steps, stops'][m] = [range_label, len(result_points)]
                        else:
                            new_labels['CVS'][m] = 'other'
                            new_labels['Optimal'][m] = 'other'
                            new_labels['Range: percentage, steps, stops'][m] = 'other'
                    else:
                        new_labels['CVS'][m] = 'other' 
                        new_labels['Optimal'][m] = 'other'
                        new_labels['Range: percentage, steps, stops'][m] = 'other'               
                    break
    return new_labels

## Define total function for new labeling

In [None]:
def new_labeling(st_act_check, sim_data_check):

    table_points = {}
    count_points = 1
    for i in range(len(st_act_check['new_seq']['actions']['action'])):
        if 'record' in st_act_check['new_seq']['actions']['action'][i]:
            table_points[count_points] = {'timestamp': st_act_check['new_seq']['actions']['timestamp'][i],
                                            'width': st_act_check['new_seq']['state']['width'][i],
                                            'concentration': st_act_check['new_seq']['state']['concentration'][i],
                                            'wavelength': st_act_check['new_seq']['state']['wavelength'][i],
                                            'solution': st_act_check['new_seq']['state']['solution'][i],
                                            'absorbance': st_act_check['new_seq']['state']['absorbance'][i],
                                            }

            count_points += 1

    separate_explore_check = variables_sequencer_v1(st_act_check, sim_data_check, threshold = 3000)
    
    separate_actions_check = {'actions': [], 'begins': [], 'ends': []}
    for action, begin, end in separate_explore_check['var_seq_v1']:
            if action not in ['break', 'analysis_table']:
                separate_actions_check['actions'].append(action)
                separate_actions_check['begins'].append(begin)
                separate_actions_check['ends'].append(end)

    cvs_try = cvs_labeling(separate_actions_check, st_act_check)
    optimal_try = optimal_labeling(cvs_try, separate_actions_check, st_act_check)
    range_try = range_labeling_stop(cvs_try, separate_actions_check, st_act_check)

    breaks_and_table = {'actions': [], 'begins': [], 'ends': []}
    for action, begin, end in separate_explore_check['var_seq_v1']:
        if action == 'break' or action == 'analysis_table':
            breaks_and_table['actions'].append(action)
            breaks_and_table['begins'].append(begin)
            breaks_and_table['ends'].append(end)


    new_labels = insert_breaks_and_table(separate_actions_check, breaks_and_table, cvs_try, optimal_try, range_try)
    new_labels_final = analysis_labeling(new_labels, sim_data_check, table_points, separate_actions_check)
    final_dict = {'group': st_act_check['group'], 'subgroup': st_act_check['subgroup'], 'session_code': st_act_check['session_code'], 'task': st_act_check['task'], 'single_exp': new_labels_final}

    return final_dict

### Apply to all files

In [None]:
base_directory1 = '///' # Directory with the Sequences
base_directory2 = '///' # Directory with the Parsed files

# Iterate over all files in base_directory1
for file1 in Path(base_directory1).rglob('*.pkl'):
    # Load the 'session_code' and 'group'/'subgroup' from the first file
    with open(file1, 'rb') as fp1:
        state_action = pickle.load(fp1)
        session_code = state_action.get('session_code')
    
    # Check if 'session_code' exists in the first file
    if session_code:
        # Construct the corresponding file paths in base_directory2
        file2 = next(Path(base_directory2).rglob(f'**/*{session_code}*.pkl'), None)
        
        # Check if corresponding file was found in base_directory2
        if file2:
            # Load data from the two files
            with open(file1, 'rb') as fp1, open(file2, 'rb') as fp2:
                st_act_check = pickle.load(fp1)
                sim_data_check = pickle.load(fp2)

                session_code = st_act_check['session_code']
                print(session_code)
        

                path = "///.pkl".format(session_code)
                with open(path, 'wb') as fp:
                    pickle.dump(new_labeling(st_act_check, sim_data_check), fp)