# Creating test cases
This notebook takes a folder of notebooks and turns them into a jsonl file in the format human_eval expects.

It also verifies if all test cases are categorised in the categorise_functions yaml file

In [1]:
import json
import os
import warnings

In [2]:
source_notebook_directory = '../test_cases/' # must end with /
# Specify the filename to save the .jsonl file
target_jsonl_filename = '../data/human-eval-bia.jsonl'
categories_file = "../data/human-eval-bia-categories.yaml"

In [3]:
list_of_cases = []


# List all files in the current directory
files = os.listdir(source_notebook_directory)

# Iterate through the files and print names ending with .ipynb
for file in files:
    if file.endswith('.ipynb'):
        notebook_filename = source_notebook_directory + file
        
        # Load and parse the notebook
        with open(notebook_filename, 'r') as file:
            notebook = json.load(file)
        
        task_id = notebook_filename
        prompt = None
        canonical_solution = None
        entry_point = None
        test = None
        
        # Iterate through the cells and print the source of code cells
        for cell in notebook['cells']:
            if cell['cell_type'] == 'code':
                # Joining the lines of code for better readability
                code = ''.join(cell['source'])
                # print('\n\nCODE\n\n',code)
        
                if code.startswith('check('):
                    entry_point = code.strip().replace("check(","").replace(")","").strip()
                elif '"""' in code:
                    temp = code.split('"""')
                    canonical_solution = temp[-1]
                    temp[-1] = ""
                    prompt = '"""'.join(temp)
                elif 'def check(' in code:
                    test = code 
                elif len(code.strip()) == 0:
                    pass
                else:
                    sample = code[:20]
                    warnings.warn(f"I had issues reading a cell in {task_id} starting with ")
                    
        if prompt is None:
            warnings.warn(f"Couldn't extract prompt from {task_id}.")
        elif canonical_solution is None:
            warnings.warn(f"Couldn't extract canonical_solution from {task_id}.")
        elif entry_point is None:
            warnings.warn(f"Couldn't extract entry_point from {task_id}.")
        
        test_case = {
            'task_id':task_id,
            'prompt':prompt,
            'canonical_solution':canonical_solution,
            'entry_point':entry_point,
            'test':test
        }

        #print(test_case)
        list_of_cases.append(test_case)



In [4]:
#Verify that the test cases are in the categorise_functions yaml file 
#Raise a Value Error if not

import yaml 

# Read YAML file where each test case has a category
with open(categories_file, 'r') as stream:
    categorise_functions = yaml.safe_load(stream)

all_categories = []

#for each test_case in list_of_cases, check if they are in keys in categorise_functions
for test_case in list_of_cases:
    #get the name of the test case from filepath in task id
    test_case_name = os.path.splitext(os.path.basename(test_case['task_id']))[0]
    #verify if the test case is in the categorise_functions file
    if test_case_name not in categorise_functions.keys():
        print(f"{test_case_name} not found in categorise_functions")
    else:
        for c in categorise_functions[test_case_name]:
            all_categories.append(c)

Here we can check if some appear multiple times, e.g. with typos.

In [5]:
sorted(list(set(all_categories)))

['data_wrangling',
 'feature_extraction',
 'file_i_o',
 'hello_world',
 'image_filtering',
 'image_preprocessing',
 'image_transformation',
 'measurement',
 'morphological_operations',
 'segmentation',
 'segmentation_post_processing',
 'statistical_analysis',
 'workflow_automation']

In [6]:
# Open the file in write mode and save the dictionaries
with open(target_jsonl_filename, 'w') as file:
    for dictionary in list_of_cases:
        # Convert dictionary to a JSON formatted string and write it
        json_str = json.dumps(dictionary)
        file.write(json_str + '\n')

print(f'Data successfully saved to {target_jsonl_filename}.')

Data successfully saved to ../data/human-eval-bia.jsonl.


In [7]:
filename = source_notebook_directory + "readme.md"
n = len(list_of_cases)
header = f"""
# List of use-cases
This list of {n} use-cases is auto-generated. Do not modify this file.

"""

with open(filename, 'w') as file:
    file.write(header)
    for test_case in list_of_cases:

        name = test_case["entry_point"]
        task = test_case["task_id"].replace("../test_cases/", "")

        print(name)
        description = test_case["prompt"].split('"""')[-2]
        
        file.write(f"""
* [{name}]({task}): {description}
        """)

apply_otsu_threshold_and_count_positive_pixels
binary_closing
binary_skeleton
bland_altman
combine_columns_of_tables
create_polygon_from_coordinates
convex_hull_measure_area
convolve_images
count_number_of_touching_neighbors
count_objects_over_time
count_overlapping_regions
create_umap
crop_quarter_image
dataframe_colum_rename
deconvolve_image
detect_edges
detect_ellipse
distance_between_maxima
expand_labels_without_overlap
extract_surface_measure_area
fft_spectrum
find_closest_neighbors
fit_circle
fit_gaussian_to_spot
flow_field_deformation
generate_image_histogram
identify_centroids
interpolate_stack
label_binary_image_and_count_labels
label_sequentially
linear_intensity_profile
list_image_files_in_folder
load_tif_and_output_rgb
local_maxima_from_distance_transform
map_pixel_count_of_labels
mask_image
maximum_intensity_projection
mean_squared_error
mean_std_column
measure_aspect_ratio_of_regions
measure_intensity_of_labels
measure_intensity_over_time
measure_mean_image_intensity
meas

In [8]:
print(len(list_of_cases))

81
