In [2]:
import os, sys, requests, parse
from fnmatch import fnmatch
from pathlib import Path, PurePath
from pprint import pprint


URL = "http://127.0.0.1:9091/api/v1/ingest"
HEADERS = {"Content-type": "application/json"}
def req(data):
    resp = requests.post(URL, json={"data": data}, headers=HEADERS).json()
    return resp
FILES = [
    "/Users/george/Downloads/textures",
    "/Users/george/Downloads/textures-2",
    "/Users/george/Downloads/textures-3",
    "/Users/george/Downloads/textures-4",
    "/Users/george/Downloads/textures-5",
    "/Users/george/Downloads/hdri"
]

In [47]:
def trim_filepaths(files):
    parts = files[0].lstrip("/").split("/")
    for i, part in enumerate(parts):
        for file in files:
            if f"/{part}/" not in file:
                break
    common = "/" + "/".join(parts[:i - 1]) + "/"
    return [f.replace(common, "") for f in files]

files = []
for file in FILES:
    file = Path(file)
    if file.is_dir():
        files += list(file.glob("**/*"))
    else:
        files.append(file)
# files = files[:10]
files_posix = [f.as_posix() for f in files]
files_trimmed = trim_filepaths(files_posix)
print("INPUT:")
pprint(files_posix)
print("\nTRIMMED:")
pprint(files_trimmed)

rules = []
rule = {
    "file_target": "*", # any
    "file_target_type": "directory", # directory, filename, entire_path
    "rule_type": "extract_info", # set_value, rename_values
    "rule_target": "filename", # filename, directory, entire_path
    "rule_value": "{name.0}_{name.1}_{name.2}_{comp.0}_{}.{ext}" # any
}
rules.append(rule)
rule = {
    "file_target": "asphalt",
    "file_target_type": "filename",
    "rule_type": "extract_info",
    "rule_target": "filename",
    "rule_value": "{name.0}_{name.1}_{comp}_{}.{ext}"
}
rules.append(rule)
rule = {
    "file_target": "*",
    "file_target_type": "filename",
    "rule_type": "replace_value",
    "rule_target": "nor",
    "rule_value": "norm"
}
# rules.append(rule)

results = [
    {
        "trimmed": files_trimmed[i],
        "file": files[i],
        "data": {},
        "replace_values": {},
        "set_values": {}
    }
    for i, f in enumerate(files_trimmed)
]
for result in results:
    file = result["file"]
    trimmed = result["trimmed"]
    filepath = str(file)
    directory = str(file.parent)
    filename = file.name
    for rule in rules:
        file_target = filepath
        if rule["file_target_type"] == "directory":
            file_target = directory
        elif rule["file_target_type"] == "filename":
            file_target = filename
        pattern = f"*{rule['file_target']}*"
        if not fnmatch(file_target, pattern):
            continue
        rule_type = rule["rule_type"]
        if rule_type == "extract_info":
            rule_value = rule["rule_value"]
            rule_target = trimmed
            if rule["rule_target"] == "directory":
                expr_target = trimmed.rstrip(filename)
            elif rule["rule_target"] == "filename":
                expr_target = filename
            result["pattern"] = rule_value
            parsed = parse.parse(rule_value, expr_target)
            if not parsed:
                continue
            result["data"].update(parsed.named)
        elif rule_type == "replace_value":
            result["replace_values"][rule["rule_target"]] = rule["rule_value"]
        elif rule_type == "set_value":
            result["set_value"][rule["rule_target"]] = rule["rule_value"]

fields = ("name", "comp")
for result in results:
    data = result["data"]
    if not data:
        continue
    pattern = result["pattern"]
    processed = {}
    for field in fields:
        if field in list(data.keys()):
            processed[field] = data[field]
            continue
        data_filtered = {k: v for k, v in data.items() if field in k}
        keys = list(data_filtered.keys())
        ordered = [key for key in sorted(keys, key=lambda x: int(x.split(".")[-1]))]
        value = ""
        for i in range(len(ordered) - 1):
            current_expr = "{" + ordered[i] + "}"
            next_expr = "{" + ordered[i + 1] + "}"
            split = pattern.split(current_expr)[1].split(next_expr)[0]
            value += data_filtered[ordered[i]] + split
        value += data_filtered[ordered[-1]]
        processed[field] = value
    result["data"] = processed

def replace_values(value, replace_data):
    for k, v in replace_data.items():
        if k not in value:
            continue
        value = value.replace(k, v)
    return value

assets = {}
unmatched = []
for result in results:
    data = result["data"]
    _set_values = result["set_values"]
    _replace_values = result["replace_values"]
    if not data and not _set_values.get("name"):
        # pprint(result)
        unmatched.append(str(result["file"]))
        continue
    name = replace_values(data.get("name", ""), result["replace_values"])
    if _set_values.get("name"):
        name = _set_values["name"]
    if not assets.get(name):
        assets[name] = {"comps": []}
    comp = {
        "name": replace_values(data["comp"], _replace_values),
        "file": result["file"].as_posix().split("/")[-1]
    }
    assets[name]["comps"].append(comp)

print("\nRESULT:")
pprint(assets)

print("\nUNMATCHED:")
pprint(unmatched)


INPUT:
['/Users/george/Downloads/textures/weathered_brown_planks_disp_2k.exr',
 '/Users/george/Downloads/textures/weathered_brown_planks_diff_2k.exr',
 '/Users/george/Downloads/textures/weathered_brown_planks_nor_gl_2k.exr',
 '/Users/george/Downloads/textures/weathered_brown_planks_rough_2k.exr',
 '/Users/george/Downloads/textures-2/coast_sand_01_nor_gl_2k.exr',
 '/Users/george/Downloads/textures-2/coast_sand_01_rough_2k.exr',
 '/Users/george/Downloads/textures-2/coast_sand_01_diff_2k.exr',
 '/Users/george/Downloads/textures-2/coast_sand_01_disp_2k.exr',
 '/Users/george/Downloads/textures-3/red_bricks_04_rough_2k.exr',
 '/Users/george/Downloads/textures-3/red_bricks_04_disp_2k.exr',
 '/Users/george/Downloads/textures-3/red_bricks_04_diff_2k.exr',
 '/Users/george/Downloads/textures-3/red_bricks_04_nor_gl_2k.exr',
 '/Users/george/Downloads/textures-4/forest_leaves_04_disp_2k.exr',
 '/Users/george/Downloads/textures-4/forest_leaves_04_diff_2k.exr',
 '/Users/george/Downloads/textures-4/for