In [4]:
import os
import pandas as pd

# Set the main directory where subfolders contain .xlsx files
main_folder = r"C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD"  # Change this to your actual folder path

# Define the worksheet names from "Summary_Site_Data"
summary_sheet_names = [
    "Module Summary",
    "LOA Summary",
    "Reference Condition Comparison",
    "Stressor Response Summary",
    "Spatial Co-Occurrence Summary",
    "RSCA Comparator Site Data"
]

# Create dictionaries to store data for merging
merged_summary_data = {sheet: [] for sheet in summary_sheet_names}  # Store Summary_Site_Data
monitoring_list = []  # Store Monitoring_Recommendations

# Walk through all subdirectories to find .xlsx files
for root, _, files in os.walk(main_folder):
    for file in files:
        if file.endswith('.xlsx'):
            file_path = os.path.join(root, file)
            try:
                # Process "Monitoring_Recommendations" files
                if "Monitoring_Recommendations" in file:
                    df = pd.read_excel(file_path, engine="openpyxl")  # Read single-sheet Excel file
                    df['Source_File'] = file  # Add filename as a new column
                    monitoring_list.append(df)

                # Process "Summary_Site_Data" files
                elif "Summary_Site_Data" in file:
                    sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")  # Read all sheets

                    for sheet in summary_sheet_names:
                        if sheet in sheets:  # Ensure sheet exists
                            df = sheets[sheet]
                            #df['Source_File'] = file  # Add filename as a column
                            #df['Sheet_Name'] = sheet  # Add sheet name as a column
                            merged_summary_data[sheet].append(df)

            except Exception as e:
                print(f"❌ Error reading {file_path}: {e}")

# ✅ Save "Monitoring_Recommendations" merged CSV
if monitoring_list:
    merged_monitoring = pd.concat(monitoring_list, ignore_index=True)
    output_monitoring = os.path.join(main_folder, "Merged_Monitoring_Recommendations.csv")
    merged_monitoring.to_csv(output_monitoring, index=False)
    print(f"✅ Merged Monitoring CSV saved to: {output_monitoring}")
else:
    print("❌ No 'Monitoring_Recommendations' files found.")

# ✅ Save each "Summary_Site_Data" sheet separately
for sheet, df_list in merged_summary_data.items():
    if df_list:
        merged_df = pd.concat(df_list, ignore_index=True)
        output_csv = os.path.join(main_folder, f"Merged_{sheet.replace(' ', '_')}.csv")
        merged_df.to_csv(output_csv, index=False)
        print(f"✅ Merged CSV saved for '{sheet}': {output_csv}")
    else:
        print(f"❌ No data found for '{sheet}'")

✅ Merged Monitoring CSV saved to: C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_Monitoring_Recommendations.csv
✅ Merged CSV saved for 'Module Summary': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_Module_Summary.csv
✅ Merged CSV saved for 'LOA Summary': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_LOA_Summary.csv
✅ Merged CSV saved for 'Reference Condition Comparison': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_Reference_Condition_Comparison.csv
✅ Merged CSV saved for 'Stressor Response Summary': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_Stressor_Response_Summary.csv
✅ Merged CSV saved for 'Spatial Co-Occurrence Summary': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_Spatial_Co-Occurrence_Summary.csv
✅ Merged CSV saved for 'RSCA Comparator Site Data': C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_STD\Merged_RSCA_Comparator_Site_Data.csv


In [5]:
pip install tqdm

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


claudes re build


In [None]:
import os
import sys
import pandas as pd
import time
from tqdm import tqdm
import concurrent.futures
import traceback
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s: %(message)s',
                    handlers=[
                        logging.FileHandler('excel_merger_log.txt'),
                        logging.StreamHandler(sys.stdout)
                    ])

# Track timing
start_time = time.time()

# Set the main directory where subfolders contain .xlsx files
main_folder = r"C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_Mod"

# Define the worksheet names from "Summary_Site_Data"
summary_sheet_names = [
    "Module Summary",
    "LOA Summary",
    "Reference Condition Comparison",
    "Stressor Response Summary",
    "Spatial Co-Occurrence Summary",
    "RSCA Comparator Site Data"
]

# Function to process a single file with detailed error handling
def process_file(file_path):
    result = {
        'monitoring': None,
        'summary_sheets': {sheet: None for sheet in summary_sheet_names},
        'error': None
    }
    
    try:
        file = os.path.basename(file_path)
        
        # Process "Monitoring_Recommendations" files
        if "Monitoring_Recommendations" in file:
            try:
                df = pd.read_excel(file_path, engine="openpyxl")
                df['Source_File'] = file
                result['monitoring'] = df
            except Exception as e:
                result['error'] = f"Monitoring file error: {str(e)}"
                logging.error(f"Error processing {file_path}: {traceback.format_exc()}")
        
        # Process "Summary_Site_Data" files
        elif "Summary_Site_Data" in file:
            try:
                # Try reading specific sheets first
                try:
                    sheets = pd.read_excel(file_path, sheet_name=summary_sheet_names, engine="openpyxl")
                    for sheet in summary_sheet_names:
                        if sheet in sheets:
                            result['summary_sheets'][sheet] = sheets[sheet]
                except ValueError:
                    # If specific sheets fail, try reading all sheets
                    sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")
                    for sheet in summary_sheet_names:
                        if sheet in sheets:
                            result['summary_sheets'][sheet] = sheets[sheet]
            except Exception as e:
                result['error'] = f"Summary file error: {str(e)}"
                logging.error(f"Error processing {file_path}: {traceback.format_exc()}")
    
    except Exception as e:
        result['error'] = f"Unexpected error: {str(e)}"
        logging.error(f"Unexpected error processing {file_path}: {traceback.format_exc()}")
    
    return result

# Find all Excel files
excel_files = []
for root, _, files in os.walk(main_folder):
    for file in files:
        if file.endswith('.xlsx'):
            excel_files.append(os.path.join(root, file))

logging.info(f"Found {len(excel_files)} Excel files to process")

# Process files with progress bar
monitoring_list = []
merged_summary_data = {sheet: [] for sheet in summary_sheet_names}
error_files = []

# Process files in batches with more robust error handling
batch_size = 100
num_batches = (len(excel_files) + batch_size - 1) // batch_size

for batch_num in range(num_batches):
    start_idx = batch_num * batch_size
    batch = excel_files[start_idx:start_idx+batch_size]
    
    logging.info(f"Processing batch {batch_num + 1}/{num_batches}")
    
    # Use a lower number of workers to reduce memory pressure
    with concurrent.futures.ProcessPoolExecutor(max_workers=max(1, os.cpu_count() - 1)) as executor:
        try:
            # Process the batch
            results = list(tqdm(executor.map(process_file, batch), total=len(batch), 
                                desc=f"Batch {batch_num + 1}"))
            
            # Collect results from this batch
            for result in results:
                # Track files with errors
                if result['error']:
                    error_files.append({
                        'file': batch[results.index(result)],
                        'error': result['error']
                    })
                
                # Collect monitoring data
                if result['monitoring'] is not None:
                    monitoring_list.append(result['monitoring'])
                
                # Collect summary sheet data
                for sheet in summary_sheet_names:
                    if result['summary_sheets'][sheet] is not None:
                        merged_summary_data[sheet].append(result['summary_sheets'][sheet])
        
        except Exception as e:
            logging.error(f"Batch {batch_num + 1} processing error: {traceback.format_exc()}")
    
    # Force garbage collection
    import gc
    gc.collect()

# Log processing summary
logging.info(f"Finished processing all files in {time.time() - start_time:.1f} seconds")

# Log error files if any
if error_files:
    logging.warning(f"Encountered errors in {len(error_files)} files")
    with open(os.path.join(main_folder, 'error_log.txt'), 'w') as f:
        for error_file in error_files:
            f.write(f"File: {error_file['file']}\nError: {error_file['error']}\n\n")

# Save "Monitoring_Recommendations" merged CSV
if monitoring_list:
    logging.info("Merging monitoring data...")
    merged_monitoring = pd.concat(monitoring_list, ignore_index=True)
    output_monitoring = os.path.join(main_folder, "Merged_Monitoring_Recommendations.csv")
    merged_monitoring.to_csv(output_monitoring, index=False)
    logging.info(f"✅ Merged Monitoring CSV saved to: {output_monitoring}")
else:
    logging.warning("❌ No 'Monitoring_Recommendations' files found.")

# Save each "Summary_Site_Data" sheet separately
for sheet, df_list in merged_summary_data.items():
    if df_list:
        logging.info(f"Merging '{sheet}' data...")
        merged_df = pd.concat(df_list, ignore_index=True)
        output_csv = os.path.join(main_folder, f"Merged_{sheet.replace(' ', '_')}.csv")
        merged_df.to_csv(output_csv, index=False)
        logging.info(f"✅ Merged CSV saved for '{sheet}': {output_csv}")
    else:
        logging.warning(f"❌ No data found for '{sheet}'")

logging.info(f"Total execution time: {time.time() - start_time:.1f} seconds")

GPT rebuild of Claude

In [6]:
import os
import sys
import pandas as pd
import time
from tqdm import tqdm
import concurrent.futures
import traceback
import logging

# Configure logging with UTF-8 encoding for both file and stream handlers
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s: %(message)s"))
file_handler = logging.FileHandler('excel_merger_log.txt', encoding='utf-8')
file_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s: %(message)s"))
logging.getLogger().handlers = [file_handler, stream_handler]
logging.getLogger().setLevel(logging.INFO)

# Track timing
start_time = time.time()

# Set the main directory where subfolders contain .xlsx files
main_folder = r"C:\Users\abels\Documents\MyR\RSCA_NoDB\output\SoCal_Mod"

# Define the worksheet names from "Summary_Site_Data"
summary_sheet_names = [
    "Module Summary",
    "LOA Summary",
    "Reference Condition Comparison",
    "Stressor Response Summary",
    "Spatial Co-Occurrence Summary",
    "RSCA Comparator Site Data"
]

# Function to process a single file with detailed error handling
def process_file(file_path):
    result = {
        'monitoring': None,
        'summary_sheets': {sheet: None for sheet in summary_sheet_names},
        'error': None
    }
    
    try:
        file = os.path.basename(file_path)
        
        # Process "Monitoring_Recommendations" files
        if "Monitoring_Recommendations" in file:
            try:
                df = pd.read_excel(file_path, engine="openpyxl")
                df['Source_File'] = file
                result['monitoring'] = df
            except Exception as e:
                result['error'] = f"Monitoring file error: {str(e)}"
                logging.error(f"Error processing {file_path}:\n{traceback.format_exc()}")
        
        # Process "Summary_Site_Data" files
        elif "Summary_Site_Data" in file:
            try:
                # Try reading specific sheets first
                try:
                    sheets = pd.read_excel(file_path, sheet_name=summary_sheet_names, engine="openpyxl")
                    for sheet in summary_sheet_names:
                        if sheet in sheets:
                            result['summary_sheets'][sheet] = sheets[sheet]
                except ValueError:
                    # If specific sheets fail, try reading all sheets
                    sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")
                    for sheet in summary_sheet_names:
                        if sheet in sheets:
                            result['summary_sheets'][sheet] = sheets[sheet]
            except Exception as e:
                result['error'] = f"Summary file error: {str(e)}"
                logging.error(f"Error processing {file_path}:\n{traceback.format_exc()}")
    
    except Exception as e:
        result['error'] = f"Unexpected error: {str(e)}"
        logging.error(f"Unexpected error processing {file_path}:\n{traceback.format_exc()}")
    
    return result

# Find all Excel files
excel_files = []
for root, _, files in os.walk(main_folder):
    for file in files:
        if file.endswith('.xlsx'):
            excel_files.append(os.path.join(root, file))

logging.info(f"Found {len(excel_files)} Excel files to process")

# Prepare lists for collected data
monitoring_list = []
merged_summary_data = {sheet: [] for sheet in summary_sheet_names}
error_files = []

# Process files in batches
batch_size = 100
num_batches = (len(excel_files) + batch_size - 1) // batch_size

for batch_num in range(num_batches):
    start_idx = batch_num * batch_size
    batch = excel_files[start_idx:start_idx+batch_size]
    
    logging.info(f"Processing batch {batch_num + 1}/{num_batches}")
    
    # Use a lower number of workers to reduce memory pressure
    with concurrent.futures.ProcessPoolExecutor(max_workers=max(1, os.cpu_count() - 1)) as executor:
        try:
            # Process the batch with a progress bar
            results = list(tqdm(executor.map(process_file, batch), total=len(batch),
                                desc=f"Batch {batch_num + 1}"))
            
            # Collect results from this batch using enumerate to avoid index lookup issues
            for i, result in enumerate(results):
                file_processed = batch[i]
                if result['error']:
                    error_files.append({'file': file_processed, 'error': result['error']})
                
                if result['monitoring'] is not None:
                    monitoring_list.append(result['monitoring'])
                
                for sheet in summary_sheet_names:
                    if result['summary_sheets'][sheet] is not None:
                        merged_summary_data[sheet].append(result['summary_sheets'][sheet])
        
        except Exception as e:
            logging.error(f"Batch {batch_num + 1} processing error:\n{traceback.format_exc()}")
    
    # Force garbage collection
    import gc
    gc.collect()

logging.info(f"Finished processing all files in {time.time() - start_time:.1f} seconds")

# Log error files if any
if error_files:
    logging.warning(f"Encountered errors in {len(error_files)} files")
    with open(os.path.join(main_folder, 'error_log.txt'), 'w', encoding='utf-8') as f:
        for error_file in error_files:
            f.write(f"File: {error_file['file']}\nError: {error_file['error']}\n\n")

# Save "Monitoring_Recommendations" merged CSV
if monitoring_list:
    logging.info("Merging monitoring data...")
    merged_monitoring = pd.concat(monitoring_list, ignore_index=True)
    output_monitoring = os.path.join(main_folder, "Merged_Monitoring_Recommendations.csv")
    merged_monitoring.to_csv(output_monitoring, index=False)
    logging.info(f"✅ Merged Monitoring CSV saved to: {output_monitoring}")
else:
    logging.warning("❌ No 'Monitoring_Recommendations' files found.")

# Save each "Summary_Site_Data" sheet separately
for sheet, df_list in merged_summary_data.items():
    if df_list:
        logging.info(f"Merging '{sheet}' data...")
        merged_df = pd.concat(df_list, ignore_index=True)
        output_csv = os.path.join(main_folder, f"Merged_{sheet.replace(' ', '_')}.csv")
        merged_df.to_csv(output_csv, index=False)
        logging.info(f"✅ Merged CSV saved for '{sheet}': {output_csv}")
    else:
        logging.warning(f"❌ No data found for '{sheet}'")

logging.info(f"Total execution time: {time.time() - start_time:.1f} seconds")


2025-03-04 10:12:30,693 - INFO: Found 2328 Excel files to process
2025-03-04 10:12:30,695 - INFO: Processing batch 1/24


Batch 1:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:31,201 - ERROR: Batch 1 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv




2025-03-04 10:12:31,584 - ERROR: Batch 2 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 766, in map
    results = super().map(partial(_process_chunk, fn),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in map
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in <listcomp>
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 720, in submit
    raise BrokenProcessPool(self._broken)
concurrent.futures.process.BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anym

Batch 3:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:31,970 - ERROR: Batch 3 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv


Batch 4:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:32,434 - ERROR: Batch 4 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv


Batch 5:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:32,816 - ERROR: Batch 5 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv


Batch 6:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:33,217 - ERROR: Batch 6 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv




2025-03-04 10:12:33,566 - ERROR: Batch 7 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 766, in map
    results = super().map(partial(_process_chunk, fn),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in map
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in <listcomp>
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 720, in submit
    raise BrokenProcessPool(self._broken)
concurrent.futures.process.BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anym

Batch 8:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:33,956 - ERROR: Batch 8 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv


Batch 9:   0%|                                                                                                                                                                        | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:34,355 - ERROR: Batch 9 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupenv


Batch 10:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:34,773 - ERROR: Batch 10 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 11:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:35,177 - ERROR: Batch 11 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 12:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:35,574 - ERROR: Batch 12 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 13:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:35,968 - ERROR: Batch 13 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen




2025-03-04 10:12:36,346 - ERROR: Batch 14 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 766, in map
    results = super().map(partial(_process_chunk, fn),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in map
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in <listcomp>
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 720, in submit
    raise BrokenProcessPool(self._broken)
concurrent.futures.process.BrokenProcessPool: A child process terminated abruptly, the process pool is not usable any

Batch 15:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:36,722 - ERROR: Batch 15 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 16:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:37,146 - ERROR: Batch 16 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen




2025-03-04 10:12:37,525 - ERROR: Batch 17 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 766, in map
    results = super().map(partial(_process_chunk, fn),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in map
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 610, in <listcomp>
    fs = [self.submit(fn, *args) for args in zip(*iterables)]
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 720, in submit
    raise BrokenProcessPool(self._broken)
concurrent.futures.process.BrokenProcessPool: A child process terminated abruptly, the process pool is not usable any

Batch 18:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:37,922 - ERROR: Batch 18 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 19:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:38,326 - ERROR: Batch 19 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 20:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:38,695 - ERROR: Batch 20 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 21:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:39,071 - ERROR: Batch 21 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 22:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:39,483 - ERROR: Batch 22 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 23:   0%|                                                                                                                                                                       | 0/100 [00:00<?, ?it/s]

2025-03-04 10:12:39,864 - ERROR: Batch 23 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


Batch 24:   0%|                                                                                                                                                                        | 0/28 [00:00<?, ?it/s]

2025-03-04 10:12:40,243 - ERROR: Batch 24 processing error:
Traceback (most recent call last):
  File "C:\Users\abels\AppData\Local\Temp\ipykernel_32416\1271874543.py", line 108, in <module>
    results = list(tqdm(executor.map(process_file, batch), total=len(batch),
  File "C:\Users\abels\.conda\envs\myJupenv\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\process.py", line 575, in _chain_from_iterable_of_lists
    for element in iterable:
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "C:\Users\abels\.conda\envs\myJupenv\lib\concurrent\futures\_base.py", line 458, in result
    return self.__get_result()
  File "C:\Users\abels\.conda\envs\myJupen


