In [None]:
import os
import csv
import numpy as np
import time
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, as_completed

cdir = ['/2D_ics_lc2_6400r/', '/2D_ics_lc3_6400r/', '/2D_ics_lc4_6400r/',
        '/2D_hcs_lc2_6400r/', '/2D_hcs_lc3_6400r/', '/2D_hcs_lc4_6400r/',
        '/2D_lcs_lc2_6400r/', '/2D_lcs_lc3_6400r/', '/2D_lcs_lc4_6400r/']
realizations = 128000

paths = [os.path.join(ic, str(ir)) for ic in cdir for ir in range(realizations)]

# Load labels from the first file
ind_keys = np.load(paths[0] + '/ConnectivityMetrics/512.npy', allow_pickle=True).item()
ind_labels = list(ind_keys.keys())
prop_labels = ['L', 'con', 'lc', 'p', 'lcG', 'lcNst', 'lcBin', 'keff']
labels = ind_labels + prop_labels

# Process each path
def process_path(path):
    try:
        indicator_dict = np.load(os.path.join(path, '/ConnectivityMetrics/512.npy'), allow_pickle=True).item()
        indicator = [v.item() for v in indicator_dict.values()]

        with open(os.path.join(path, 'GenParams.txt'), 'r') as gen_file:
            lines = gen_file.readlines()
            gen = [lines[i].strip() for i in [3, 5, 6, 7]]

        with open(os.path.join(path, 'lc.txt'), 'r') as lc_file:
            lc_lines = lc_file.readlines()
            lc = [lc_lines[i].strip() for i in [1, 2, 3]]

        with open(os.path.join(path, 'SolverRes.txt'), 'r') as keff_file:
            keff_lines = keff_file.readlines()
            keff = [keff_lines[1].strip()]

        return indicator + gen + lc + keff

    except Exception as e:
        print(f"Error processing {path}: {e}")
        return None

# Function to measure ThreadPoolExecutor performance
def process_with_threads(paths):
    
    start_time = time.time()
    results = []
    with ThreadPoolExecutor() as executor:
        futures = {executor.submit(process_path, path): path for path in paths}
        for future in as_completed(futures):
            result = future.result()
            if result is not None:
                results.append(result)
    end_time = time.time()

    print(f"ThreadPoolExecutor Time: {end_time - start_time:.2f} seconds")
    return results

# Function to measure multiprocessing.Pool performance
def process_with_multiprocessing(paths, num_workers):
    
    start_time = time.time()
    with mp.Pool(processes=num_workers) as pool:
        results = pool.map(process_path, paths)
    cleaned_results = [result for result in results if result is not None]
    end_time = time.time()
    
    print(f"Multiprocessing.Pool Time: {end_time - start_time:.2f} seconds")
    return cleaned_results

# Main execution and comparison
if __name__ == "__main__":
    # Determine the number of workers to use for multiprocessing
    num_workers = mp.cpu_count()

    print("Running ThreadPoolExecutor...")
    thread_results = process_with_threads(paths)

    print("\nRunning multiprocessing.Pool...")
    process_results = process_with_multiprocessing(paths, num_workers)

    print("\nComparison complete.")

    # Write the CSV file
    csv_file = 'ind_output_2D.csv'
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(labels)
        writer.writerows(process_results) # or thread results
    print(f'{csv_file} plain text file was generated')