# Batch processing
It can be useful to prepare the processing of several file to list them in
a table, filter out some files and process them.

In [None]:
from pathlib import Path
import pandas as pd

# define the path to the folder
folder = Path('../data')

# list nd2 files in a folder and save them in a Pandas Data Frame:
dataset = pd.DataFrame({
    'filename':[Path(str(x).replace(str(folder),'')) for x in folder.glob('*/*.nd2')],
    'filesize':[x.stat().st_size/1e9 for x in folder.glob('*/*.nd2')]
    })
    
dataset

Let's add a column using a regular expression `.*Developmental stage ([0-9]).*`
to extract the stage from the folder name:

In [None]:
import re
dataset['stage'] = [int(re.sub(r'.*Developmental stage ([0-9]).*',r'\1',str(x))) for x in dataset['filename']]
dataset

Filter out some rows based on a test:

In [None]:
dataset = dataset.query('filesize>0.1')
# or dataset = dataset[dataset['filesize']>0.1]

We can now process the file we have selected:

In [None]:
def process_row(input:dict) -> pd.DataFrame:
    """Process a dictionary and return a pandas data frame
    """
    # Load an image    
    # img = tiff.imread(input['filename'])
    # Process the image or perform other operations
    # create a data frame, note that values must be lists            
    return pd.DataFrame({'filename': [input['filename']],'x':[input['stage'] + 1],'y':[7]})

result = pd.concat([process_row(row) for row in dataset.to_dict(orient='records')],ignore_index=True)
result

We merge the results and the original input table once the processing is done
in order to keep the original information on the dataset

In [None]:
result = dataset.merge(result)