In [1]:
import os
import json
from datetime import datetime
import fbpyutils
from fbpyutils import file as fu
env, logger = fbpyutils.get_env(), fbpyutils.get_logger()
logger.info(f"Running using AppConfig: {env.APP}")

2025-09-07 18:35:04,666 - fbpyutils - INFO - setup completed.
2025-09-07 18:35:04,964 - fbpyutils - INFO - Running using AppConfig: name='FBPyUtils' version='1.6.13' environment='dev' appcode='FBPYUTILS' year=2025


In [None]:
from tqdm.notebook import tqdm
from tqdm.contrib.concurrent import process_map
import multiprocessing

In [None]:
def process_source(source_dir, source_mask, source_label, exclusion_list, report_target_path):
    def process_file(f):
        file_report = {
            "file_path": "",
            "status": "OK", 
            "status_message": None
        }
        file_report["file_path"] = f
        try:
            file_report["details"] = fu.describe_file(f)
            file_report["status"] = "OK"
            file_report["status_message"] = None
            logger.info(f"File {f} successfully described.")
        except Exception as e:
            file_report["details"] = None
            file_report["status"] = "ERROR"
            file_report["status_message"] = str(e)
            logger.error(f"Error trying to descrige file {f}: {str(e)}.")
        return file_report

    report = {
        "source_name": source_label,
        "source_dir": source_dir,
        "source_mask": source_mask,
        "start_extraction": datetime.now(),
        "files": [],
        "status": "OK"
    }
    try:
        files_on_source = fu.find(source_dir, source_mask, recurse=True, parallel=True)
        logger.info(f"Found {len(files_on_source)} files on {source_label} ({source_dir})")

        files_to_process = [f for f in files_on_source if f not in [f for x in exclusion_list if f.__contains__(x)]]
        logger.info(f"Selected {len(files_to_process)} files on {source_label} ({source_dir})")

        report["files"] = process_map(
                process_file, 
                files_to_process, 
                max_workers=multiprocessing.cpu_count(),
                desc=f"Processing {len(files_to_process)} files on {source_dir}...",
                chunksize=250
        )
        report_target_path=os.path.sep.join([
            report_target_path, 
            f"REPORT_{source_label}_{report['end_extraction'].strftime('%Y%m%d%H%M%S')}.json"
        ])
        with open(report_target_path, "w", encoding="utf-8") as f:
            f.write(json.dumps(report, ensure_ascii=False, allow_nan=True, default=str, sort_keys=False, indent=4))
            logger.info(f"Report for {source_label} writed at {report_target_path}.")
    except Exception as e:
        report["status"] = f"FAIL: {str(e)}"
    
    report['end_extraction'] = datetime.now()
    return report

exclusion_list = (
    f"{os.path.sep}.", 
    f"{os.path.sep}AppData{os.path.sep}", 
    f"{os.path.sep}CrossDevice{os.path.sep}", 
)

report_target_path=r"C:\Users\fcjbispo\Downloads"

source_list = (
    ("I:/", "*", "YGGDRASIL_DISK_IA", exclusion_list, report_target_path),
    ("G:/Meu Drive", "*", "YGGDRASIL_GDRIVE", exclusion_list, report_target_path),
    ("C:/Users/fcjbispo", "*", "YGGDRASIL_FCBISPO_HOME", exclusion_list, report_target_path),
    ("E:/Shared", "*", "YGGDRASIL_DATA_SHARED", exclusion_list, report_target_path)
)

# resultados = process_map(
#     process_source, 
#     source_list, 
#     max_workers=multiprocessing.cpu_count(),
#     desc="Processando..."
# )
resultados = []
for source in tqdm(source_list, desc="Processing..."):
    source_dir, source_mask, source_label, exclusion_list, report_target_path = source

source_dir, source_mask, source_label, exclusion_list, report_target_path

resultados.append(process_source(*source))