# Create a list of files to process

We start by creating a list of files to process located in a source (data) folder. 

The list of files will be saved as csv file in the destination (project) folder. 

The current source and destination folder are stored in a config.yml file locally.

In [None]:
import yaml
from pathlib import Path
from ipyfilechooser import FileChooser

srcdir, dstdir = "", ""
if Path("config.yml").exists():
    with open("config.yml", "r") as file:
        config = yaml.safe_load(file)
        if "source" in config.keys():
            srcdir = Path(config["source"])
            if not srcdir.exists():
                srcdir = ""
        if "destination" in config.keys():
            dstdir = Path(config["destination"])
            if not dstdir.exists():
                dstdir = ""

fc1 = FileChooser(srcdir, select_desc="Source")
fc2 = FileChooser(dstdir, select_desc="Destination")
display(fc1)
display(fc2)

In [None]:
srcdir = Path(fc1.selected) if fc1.selected is not None else Path(srcdir)
dstdir = Path(fc2.selected) if fc2.selected is not None else Path(dstdir)
filelistname = dstdir / "filelist.csv"
print("Source folder:", srcdir)
print("Destination folder :", dstdir)
print("Output file list :", filelistname)

# update yaml config file
try:
    cnf = yaml.safe_load(
        f'source: "{srcdir.as_posix()}"\ndestination: "{dstdir.as_posix()}"\n'
    )
    with open("config.yml", "w") as file:
        yaml.dump(cnf, file)
except:
    pass

In [None]:
import pandas as pd

filelist = pd.DataFrame.from_records(
    [
        {
            "folder": p.parent,
            "name": p.name,
            "channel1": "ch1",
            "channel2": "ch2",
            "channel3": "ch3",
            "channel4": "nuclei",
        }
        for p in srcdir.glob("*.ims")
    ]
)
filelist.to_csv(filelistname, index=False)

filelist