In [1]:
# # Highlight with red the exact covers
# def highlight_correct_ticks(ax, states, EXACT_COVERS):
        
#     MEC = [state for state in EXACT_COVERS if state.count("1") == min([x.count("1")  for x in EXACT_COVERS])][0]

#     ax.set_xticks(ax.get_xticks())
#     ax.set_xticklabels(labels=states, rotation=90)
#     for (state, ticklbl) in zip(states, ax.xaxis.get_ticklabels()):
#         ticklbl.set_color('cyan' if state==MEC
#                           else'red' if state in EXACT_COVERS
#                           else 'black')
# Highlight with red the exact covers

def highlight_correct_ticks(ax, EXACT_COVERS):
    
    xlabels = [elem.get_text() for elem in ax.xaxis.get_ticklabels()]
    MEC = [state for state in EXACT_COVERS if state.count("1") == min([x.count("1")  for x in EXACT_COVERS])][0]
    n = len(MEC)
    one_one_states = ["".join(elem) for elem in distinct_permutations('0'*(n-1) + '1')]
    
    for (state, ticklbl) in zip(xlabels, ax.xaxis.get_ticklabels()):
        ticklbl.set_color('limegreen' if state==MEC
                          else'crimson' if state in EXACT_COVERS
                          else 'lightgrey' if state in one_one_states
                          else 'black')


def underline_states(ax, states_to_underline):
    xtext = [elem.get_text() for elem in ax.xaxis.get_ticklabels()]
    xtext_new = [r'\underline{%s}' %x if x in states_to_underline else x for x in xtext]
    ax.xaxis.set_ticks(xtext)
    ax.set_xticklabels(xtext_new)

In [None]:
def plot_histogram_of_df_column(df, column_to_plot, EXACT_COVERS, states_to_underline, title=''):
    '''
    presa dal file di param fixing
    '''
    df = df.set_index('states')
    df = df.astype(float).fillna(0.0)
    
    # ##### COMPUTE PERCENTAGES
    total = df.sum()
    percentage = (df/ total) * 100

    percentage = percentage[[column_to_plot]]
    percentage = percentage.sort_values(column_to_plot, ascending=False)
    
    ##### FIGURE
    plt.figure(figsize=(7,5))
    N = 10
    ax = sns.barplot(x="states", y=column_to_plot, data=percentage, 
                     width=0.7, color='red', alpha=0.5) #label=f"Lowest energy among {random_attempts} random attempts"
    
    ### Make labels with percentages.
    labels = percentage[column_to_plot].round(1).astype('str') + '%'
    for container in ax.containers:
            ax.bar_label(container, labels=labels, fontsize=N-2)
            
    ### Highlight exact covers' ticks with red.
    df_for_ticks = percentage.copy()
    df_for_ticks["states"] = df_for_ticks.index
    underline_states(ax, states_to_underline)
    highlight_correct_ticks(ax, EXACT_COVERS)
    
    ### Refinements.
    plt.xlabel("states", fontsize=N)
    plt.ylabel("", fontsize=N)
    plt.xticks(fontsize=N-2, rotation="vertical")
    plt.yticks(fontsize=N)
    plt.xlim(xmin=-1)
    plt.ylim(ymin=0, ymax=106)
    plt.minorticks_on()
    plt.grid(alpha=0.2)
    plt.title(title, fontsize=N)

    return ax        
    
def plot_histogram_of_best_column(df, best_column, EXACT_COVERS, states_to_underline, title=''):
    
    ax = plot_histogram_of_df_column(df, best_column, EXACT_COVERS, states_to_underline, title=title)
    
    df = df.set_index('states')
    df = df.astype(float).fillna(0.0)

    ##### COMPUTE PERCENTAGES AND ADD AVERAGE COLUMN
    total = df.sum()
    percentage = (df/ total) * 100
    
    percentage['average'] = percentage.mean(numeric_only=True, axis=1)
    percentage['std'] = percentage[percentage.columns[:-1]].std(numeric_only=True, axis=1)
        
    ##### KEEP THE BEST AND THE AVERAGE RESULT
    percentage = percentage[[best_column, "average", "std"]]
    percentage = percentage.sort_values(best_column, ascending=False)
    
            
    ### Display errors, but just for the "average" bars.
    x_coords = [p.get_x() + 0.5 * p.get_width() for p in ax.patches] 
    y_coords = percentage["average"]
    ax.errorbar(x=x_coords, y=y_coords, yerr=percentage["std"], linestyle="",
                markerfacecolor='none', linewidth=1,
                marker='o', color='k', ecolor='k', elinewidth=0.7, capsize=3.5, 
                barsabove=True, alpha=0.5) # label=f"Average on {random_attempts} random attempts"

    plt.show()


def plot_list_of_files(FILENAME_list, DATA_FILENAME_list, title=None, removefromtitles=[]):
    
    fig = plt.figure(figsize=(20,7))
    N = 13 # fontsize
    if title != None:
        fig.suptitle(title, fontsize=N+3)
    

    num_cols = math.ceil(len(FILENAME_list)/2)
    num_rows = 2
    for num_subplot, (FILENAME, DATA_FILENAME) in enumerate(zip(FILENAME_list, DATA_FILENAME_list)):
        
        ##### EXTRAPOLATE INFORMATION FROM THE FILENAME
        n, instance, init_name, p, random_attempts, k = define_parameters_from_filename(DATA_FILENAME, verbose=False)
            
        ##### WITH THIS INFORMATION, GET THE INSTANCE SETS
        U, subsets_dict = define_instance(n, instance, verbose=False)
        
        ##### FIND FEASIBLE STATES, ENERGIES, EXACT COVERS
        states, energies, states_feasible, energies_feasible, EXACT_COVERS = find_spectrum(U, subsets_dict, n, k)
        # print("\nEXACT_COVERS: ", EXACT_COVERS)
        
        ###########################################################
        ###########################################################
        
        ##### UPLOAD THE DATAFRAME FROM FILE
        df = pd.read_csv(FILENAME, dtype = str).set_index('states')
        df = df.astype(float).fillna(0.0)
        
        
        ##### COMPUTE PERCENTAGES AND ADD AVERAGE COLUMN
        total = df.sum()
        percentage = (df/ total) * 100
        
        percentage['average'] = percentage.mean(numeric_only=True, axis=1)
        percentage['std'] = percentage[percentage.columns[:-1]].std(numeric_only=True, axis=1)
        
        
        ##### FIND THE BEST HISTOGRAM INDEX (i_best) BASED ON THE MINIMUM ENERGY FOUND
        with open(DATA_FILENAME, 'r') as DATA_FILE:
            for line in DATA_FILE:
                if 'Attempt that reached the best result with' in line:        
                    
                    string = line.split('#')[1]
                    i_best = string.split(' ')[0]
                    # print("string", string)
                    # print("i_best is", i_best)
                    
        ##### THE COLUMN CORRESPONDING TO THE BEST RESULT
        column_best = f'counts_p{p}_{i_best}of{random_attempts}'
        
        
        ##### KEEP THE BEST AND THE AVERAGE RESULT
        percentage = percentage[[column_best, "average", "std"]]
        percentage = percentage.sort_values(column_best, ascending=False)
        
        
        ##### FIGURE
        fig.add_subplot(num_rows, num_cols, num_subplot+1)
        ax = sns.barplot(x="states", y=column_best, data=percentage, width=0.7, color='red', alpha=0.5) #label=f"Lowest energy among {random_attempts} random attempts"
        
        ### Make labels with percentages.
        labels = percentage[column_best].round(1).astype('str') + '%'
        for container in ax.containers:
                ax.bar_label(container, labels=labels, fontsize=N-2)
                
        ### Display errors, but just for the "average" bars.
        x_coords = [p.get_x() + 0.5 * p.get_width() for p in ax.patches] 
        y_coords = percentage["average"]
        ax.errorbar(x=x_coords, y=y_coords, yerr=percentage["std"], linestyle="",
                    markerfacecolor='none', linewidth=1,
                    marker='o', color='k', ecolor='k', elinewidth=0.7, capsize=3.5, 
                    barsabove=True, alpha=0.5) # label=f"Average on {random_attempts} random attempts"
        
        ### Highlight exact covers' ticks with red.
        df_for_ticks = percentage.copy()
        df_for_ticks["states"] = df_for_ticks.index
        highlight_correct_ticks(ax, EXACT_COVERS)
        

        ### Set title.
        # path = FILENAME.split('/')[0] +'/'
        # titlestring = FILENAME.replace(path, '')
        # if title in titlestring: 
        #     titlestring = titlestring.replace(title, '')
            
        # subtitlestring = '\nBOUNDS' + titlestring.split('BOUNDS')[1]
        # titlestring = titlestring.split('BOUNDS')[0] + subtitlestring
        # ax.set_title(titlestring, fontsize=N-3)
        dictstring = {"n":f"$n={n}$", "i":f"$i={instance}$", "init":f"init={init_name}", 
                      "p":f"$p={p}$", "ra":f"$ra={random_attempts}$", "k":f"$k={k}$"}

        s = ''
        keys_to_format = []
        for key,value in dictstring.items():
            if key not in removefromtitles:
                s += value
                s += ", "
        # s = s[:-2] + " " # remove comma
        
        # if title in s: s = s.replace(title, '')
        bounds_and_pars0 = FILENAME.split('pars0')[1].split('.csv')[0]
        bounds_and_pars0 = bounds_and_pars0.replace("pi", "\pi")
        bounds_and_pars0 = bounds_and_pars0.replace("x", "\\times")
        s = s + "\n$" + bounds_and_pars0 + "$"
        ax.set_title(s, fontsize=N)
        
        ### Refinements.
        plt.xlabel("states", fontsize=N)
        plt.ylabel("", fontsize=N)
        plt.xticks(fontsize=N-2, rotation="vertical")
        plt.yticks(fontsize=N)
        plt.xlim(xmin=-1)
        plt.ylim(ymin=0, ymax=106)
        plt.minorticks_on()
        plt.grid(alpha=0.2)
        # ax.set_title(f"instance {instance}, k = {k}", fontsize=N)
        
    plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.8)
    plt.show()

# Reading files

In [None]:
def define_parameters_from_filename(filename, verbose):
    """
    Parameters
    ----------
        filename (str): a string that contains ... dim6_mail5_all0_random_p3_10ra_k0.085_ ...
        verbose (bool): if True, prints output.

    Return
    ------
        n (int): instance dimension
        instance (int): number of the instance
        init_name (str): a string describing the kind of initialization
        p (int): the maximum layer, the depth of the circuit
        ra (int): number of random attempts
        k (float): parameter k of the problem (l1/l2=k*n)

    Example
    -------
        Input: "15-09@11h54m_dim6_mail3_all0_random_p3_10ra_k0.067_BOUNDS[0,2pi]xNone_pars0[0,2pi]xNone_data.txt"
        Output: 6 3 all0 3 10 0.067
    """
    filename = filename.rsplit('/', 1)[1] #remove path
    n = int(filename.split('dim')[1][0])
    instance = filename.split('mail')[1]
    instance = int(instance.split('_')[0])
    if "all" in filename:
        init_name = filename.split(f'mail{instance}_')[1]
        init_name = init_name.split("_")[0]
    else:
        init_name = 'customized'
    p = int(filename.split('_p')[1][0])
    ra = filename.split(f'p{p}_')[1]
    ra = int(ra.split("ra")[0])
    k = filename.split("ra_k")[1]
    k = float(k.split("_")[0])
    

    if verbose:
        print("n={}, i={}, init={}, p={}, ra={}, k={}".format(n, instance, init_name, p, ra, k))
        
        
    return n, instance, init_name, p, ra, k

In [None]:
def find_files_containing_string(strings, path=None, verbose=False):    
    """
    Find .txt and .csv files containing all the strings in strings,
    ordered by number of instance.
    
    Parameters
    ----------
        path (str): path of the file. The default is './'.
        strings (list): list of strings that the files must contain.
        verbose (bool): if True, print is activated.

    Return
    ------
        DATA_FILENAME_list (list of str): a list containing the .txt files.
        FILENAME_list (list of str): a list containing the .csv files.
    """
    if path == None: path = './'
    
    FILENAME_list = []
    DATA_FILENAME_list = []
    for obj in os.listdir(path):
        if os.path.isfile(os.path.join(path,obj)) and np.all([s in obj for s in strings]):
            if '_data' in obj:
                DATA_FILENAME_list.append(path+obj)
            else:
                FILENAME_list.append(path+obj)


    find_datetime = lambda x: x.split("_dim")[0].rsplit('_', 1)[1] 
    remove_datetime = lambda x: x.replace(find_datetime(x), "")
    
    # FILENAME_list = sorted(FILENAME_list, key=find_datetime)
    # DATA_FILENAME_list = sorted(DATA_FILENAME_list, key=find_datetime)
    
    ### RIORDINO IN BASE AL NUMERO DELL'ISTANZA
    find_number_of_instance = lambda x: int((x.split('mail')[1]).split('_')[0])
    FILENAME_list = sorted(FILENAME_list, key=find_number_of_instance)
    DATA_FILENAME_list = sorted(DATA_FILENAME_list, key=find_number_of_instance)

    
    for f,d in zip(FILENAME_list, DATA_FILENAME_list):
        ff = remove_datetime(f)
        dd = remove_datetime(d)
        if ff.replace(".csv", "_data.txt")  != dd:
            print(" ERROR !!! ")
        if verbose:
            print(f"{f}\n{d}\n\n")
        
    return FILENAME_list, DATA_FILENAME_list