In [2]:

import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
import concurrent.futures
import os
import re

def extract_specific_output(cell):
    yy_value = None
    fitness_value = None
    if cell.cell_type == 'code':
        for output in cell.outputs:
            if output.output_type == 'stream' and 'name' in output and output.name == 'stdout':
                lines = output.text.split('\n')
                for line in lines:
                    if line.strip().isdigit():  # Check if the line is a number
                        if yy_value is None:
                            yy_value = line.strip()
                        else:
                            fitness_value = line.strip()
    return yy_value, fitness_value

def process_notebook(input_folder, output_folder, file_name):
    # Extract seed index from the filename
    seed_index = int(re.search(r'DropGNP_seed_(\d+).ipynb', file_name).group(1))

    file_path = os.path.join(input_folder, file_name)
    with open(file_path) as f:
        nb = nbformat.read(f, as_version=4)

    ep = ExecutePreprocessor(timeout=30, kernel_name='python3')
    ep.preprocess(nb)

    output_file_path = os.path.join(output_folder, file_name.replace('.ipynb', '_executed.ipynb'))
    with open(output_file_path, 'w', encoding='utf-8') as f:
        nbformat.write(nb, f)

    for cell in nb.cells:
        yy, fitness = extract_specific_output(cell)
        if yy is not None or fitness is not None:
            return seed_index, yy, fitness
    return seed_index, None, None

def run_notebooks_in_parallel(input_folder, output_folder, notebook_files):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    extracted_outputs = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(process_notebook, input_folder, output_folder, file_name): file_name for file_name in notebook_files}
        for future in concurrent.futures.as_completed(futures):
            output = future.result()
            if output is not None:
                extracted_outputs.append(output)

    return extracted_outputs

# Example usage
input_folder = 'GenD480-500'
output_folder = 'Executed_Notebooks480-500'
notebook_files = [f'DropGNP_seed_{j}.ipynb' for j in range(480, 500)]
extracted_outputs = run_notebooks_in_parallel(input_folder, output_folder, notebook_files)

# Write extracted outputs to a file
with open('Drop_outputs480-500.txt', 'w') as f:
    for index, yy, fitness in extracted_outputs:
        f.write(f'Index: {index}, yy: {yy}, fitness[0]: {fitness}\n')
