# Process all files

In this notebook, we load the list of files to process them in parallel.
The results are saved into a hdf5 file.


In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the list of files

In [10]:
import pandas as pd
from pathlib import Path

# list files from the src folder
src = Path('/media/cephfs2/jparham/Joe for Jerome /')
dst = Path('/media/cephfs2/jeromeb/userdata/Baum_group/jparham/Analysis8')

filelist = pd.read_csv(dst/'filelist.csv', index_col=0)
print(f"Number of files {len(filelist)}")
filelist.head()


Number of files 62


Unnamed: 0,path,name,condition
0,/media/cephfs2/jparham/Joe for Jerome /SegA Li...,01a_delSegA_CM_SyS_reg_Cell_0.tif,sega
1,/media/cephfs2/jparham/Joe for Jerome /SegA Li...,01a_delSegA_CM_SyS_reg_Cell_12.tif,sega
2,/media/cephfs2/jparham/Joe for Jerome /SegA Li...,01a_delSegA_CM_SyS_reg_Cell_13.tif,sega
3,/media/cephfs2/jparham/Joe for Jerome /SegA Li...,01a_delSegA_CM_SyS_reg_Cell_3.tif,sega
4,/media/cephfs2/jparham/Joe for Jerome /SegA Li...,01a_delSegA_CM_SyS_reg_Cell_11.tif,sega


## Processing

Files are processed in parallel.

In [None]:
from dask.distributed import LocalCluster
cluster = LocalCluster()
client = cluster.get_client()
cluster.scale(6)
cluster

In [None]:
import motionquant as mq
import dask
from dask.distributed import Lock
import traceback

def process(filename, results_path):
    """Process files and save results in a hf5 file"""
    name = Path(filename).stem        
    img, mask, position, speed, diff, flow, rho, div, blob = mq.process(Path(filename))
    lock = Lock('process-sufo')
    lock.acquire()
    mq.save_result(results_path, name, img, mask, position, speed, diff, flow, rho, div, blob)
    lock.release()
    df = mq.record(name, img, mask, position, speed, diff, flow, rho, div, blob)
    return df

def process_safe(filename, results_path):
    """Process files catching exceptions"""
    try:
        return process(filename, results_path)
    except Exception as e:
        print(f"file '{filename}' could not be processed")
        print(e)
        print(traceback.print_exc())
        pass

results_path = dst/Path('results.h5') # result
if results_path.exists():
    results_path.unlink()

print(f"Saving results in file '{results_path}'")
tsk = [dask.delayed(process_safe)(filename, results_path) for filename in filelist['path'].iloc]
res = dask.compute(tsk)
df = pd.concat(res[0])

In [7]:
df.to_csv(dst/'results.csv')