# Conversion of data obtained from the QED-C framework
https://github.com/SRI-International/QC-App-Oriented-Benchmarks

The data from the QED-C repository is should be placed in a folder named `__results`.

Note that the `__results` folder, the `conversion.ipynb` and `conversion.py` files should be placed in the `maxcut/qiskit` folder in the QED-C package. 

In [2]:
import os
import conversion as ws
from itertools import product as iterprod

ModuleNotFoundError: No module named 'maxcut_benchmark'

For this example, the data is assumed to be in folders of the type `__results/instance=0/approx_ratio/rounds=2_shots=100`.

Each such folder contains `json` files corresponding to random initializations or restarts.

If the data is organized differently, the function `get_folder_names` defined below should be modified approriately.


In [1]:
#%% Parameters and functions for loading data
rounds_list = [2]
num_shots_list = [50,70,100]
obj_func_list = ['approx_ratio'] # ['cvar_ratio', 'approx_ratio', 'gibbs_ratio', 'bestcut_ratio']

objective_function_type = 'approx_ratio'

def get_folder_names(instance_number, objective_function_type):
    """Get a list of folders with raw data corresponding to given instance and objective function

    Args:
        instance_number (int)
        objective_function_type (str)

    Returns:
        nested dictionary: key sequence [rounds][num_shots] contains location of folder with raw data corresponding to those values. 
    """
    all_folders = dict()
    for rounds in rounds_list:
        shots_dict = dict()
        for num_shots in num_shots_list:
            shots_dict[num_shots] = os.path.join('__results', 'instance={}'.format(instance_number), objective_function_type, 'rounds-{}_shots-{}'.format(rounds,num_shots))
        all_folders[rounds] = shots_dict
    return all_folders

Now convert the raw data to `.pkl` format

In [None]:
def raw_to_metric_data(instance_number, objective_function_type, rounds_list, num_shots_list):
    """
    For given values of instance and objective function type, load all available raw qed-c data.
    Retain only the quantities of interest (i.e. only the values of the objective function as afunction of iteration number and restart index), and store files in a folder '__results_pkl'.
    Each stored file will correspond to a combination of shots and rounds, and contain a dataframe with indices as iteration number and columns as metric values corresponding to various restarts

    Args:
        instance_number (int)
        objective_function_type (str)
    """
    # First, get a list of folders with raw data corresponding to this instance
    all_folders = get_folder_names(instance_number, objective_function_type)
    
    for rounds, num_shots in iterprod(rounds_list, num_shots_list):
        raw_folder = all_folders[rounds][num_shots]
        
        target_folder = os.path.join('__results_pkl', 'instance={}'.format(instance_number))
        target_file_name = '{}_rounds-{}_shots-{}.pkl'.format(objective_function_type, rounds, num_shots)
        df, gen_prop = ws.json_to_pkl(raw_folder, target_folder = target_folder, target_file_name = target_file_name)

for instance_number in range(2):
    raw_to_metric_data(instance_number, objective_function_type, rounds_list, num_shots_list)

Do bootstrapping for restarts. Store bootstrapped data in `checkpoints` folder. 

In [None]:
bs_restarts_list = [1,2,5,10,20,30,40,50,60,70,80,90,100]
bootstrap_iterations = 1000
confidence_level = 64
def metric_data_to_bootstrapped(instance_number, bs_restarts_list, bootstrap_iterations, confidence_level):
    pkl_folder = os.path.join('__results_pkl', 'instance={}'.format(instance_number))
    sb_target_folder = 'checkpoints'
    sb_target_file = 'bootstrapped_results_inst={}.pkl'.format(instance_number)
    df = ws.pkl_to_sb_pkl_bootstrapped(pkl_folder, bs_restarts_list, bootstrap_iterations, instance_number, confidence_level, sb_target_folder = sb_target_folder, sb_target_file = sb_target_file)

    return df

for instance_number in range(2):
    df = metric_data_to_bootstrapped(instance_number, bs_restarts_list, bootstrap_iterations, confidence_level)
