In [1]:
import os
import re
import shutil
import traceback
from collections import defaultdict

# Set your root folder here
root_dir = r"C:\Users\galen\My Drive\社區醫學\Mededbot-多語言衛教AI\ChatbotTexts"

# Regex that finds filenames ending in YYYYMMDD_HHMMSS.ext
FILENAME_PATTERN = re.compile(
    r'^(?P<user_id>.+?)[-_](?P<datetime>\d{8}_\d{6})\.(?P<ext>\w+)$'
)

VALID_EXTS = {"wav", "m4a", "mp3", "txt"}
UNCLASSIFIED_DIR = "__unclassified"

def organize_files(base_dir: str):
    scanned = 0
    moved = 0
    invalid = 0
    counts = defaultdict(int)
    errors = []

    for dirpath, _, filenames in os.walk(base_dir):
        for name in filenames:
            scanned += 1
            src = os.path.join(dirpath, name)
            m = FILENAME_PATTERN.match(name)

            if m:
                user_id = m.group("user_id")
                ext = m.group("ext").lower()
                if ext in VALID_EXTS:
                    dest_dir = os.path.join(base_dir, user_id, ext)
                else:
                    dest_dir = os.path.join(base_dir, UNCLASSIFIED_DIR, os.path.basename(dirpath))
                    invalid += 1
            else:
                dest_dir = os.path.join(base_dir, UNCLASSIFIED_DIR, os.path.basename(dirpath))
                invalid += 1

            dest_path = os.path.join(dest_dir, name)
            try:
                os.makedirs(dest_dir, exist_ok=True)
                if os.path.abspath(src) != os.path.abspath(dest_path):
                    shutil.move(src, dest_path)
                    moved += 1
                counts[os.path.basename(dest_dir)] += 1
            except Exception:
                err = traceback.format_exc().splitlines()[-1]
                errors.append((os.path.relpath(src, base_dir), err))

    return scanned, moved, invalid, counts, errors

def remove_empty_dirs(base_dir: str):
    removed = 0
    for dirpath, dirnames, filenames in os.walk(base_dir, topdown=False):
        if dirpath == base_dir:
            continue
        if dirnames:
            continue
        if not filenames or all(f.lower().endswith('.ini') for f in filenames):
            try:
                for f in filenames:
                    if f.lower().endswith('.ini'):
                        os.remove(os.path.join(dirpath, f))
                os.rmdir(dirpath)
                removed += 1
            except Exception:
                pass
    return removed

# Run
scanned, moved, invalid, counts, errors = organize_files(root_dir)
print(f"Total files scanned:      {scanned}")
print(f"✅ Files moved:            {moved}")
print(f"🚫 Invalid/misfiled files: {invalid}")
print("Counts by final folder:")
for ext, cnt in counts.items():
    print(f"  {ext}: {cnt}")
if errors:
    print("❌ Errors:")
    for fname, err in errors:
        print(f" - {fname}: {err}")

removed = remove_empty_dirs(root_dir)
print(f"🗑️ Empty folders removed: {removed}")


Total files scanned:      721
✅ Files moved:            137
🚫 Invalid/misfiled files: 51
Counts by final folder:
  ChatbotTexts: 1
  txt: 558
  wav: 57
  U08cc0e5e1eedaf90b8871aec941bb6d1: 1
  20250601: 1
  U0f0bdb9fc4d6c3ad8d7a3e9139998c92: 1
  20250514: 1
  U216749c3d1686e7ab509ee299536e226: 1
  20250526: 1
  U26bea4fde7b6b43342a04675f2a92470: 1
  20250512: 1
  U54a5560f9b36e7b647d804a45a105403: 1
  20250519: 1
  20250606: 1
  U5a4654a3405dd638fc4bee134acf29a2: 1
  U61539d6d1bd2f05b77ea99c84332ce93: 1
  m4a: 58
  U87d1b33f17cb39f92e55db0c92b057ff: 1
  20250515: 1
  U9ffa74e1b3f9842abf6966122130c7d2: 1
  20250604: 1
  Ua5952aed7d4c22e7c29449f27d198401: 1
  20250530: 1
  Uae43c1ad29699f302b59be86024a058d: 1
  Ub05a858ae099002ed29942b57cb8e220: 1
  20250510: 1
  Uc721d654582e24d84713fddd7839865f: 1
  20250605: 1
  20250608: 1
  20250609: 1
  Uf5306c3757c9c6eb526e7a8e1491ca2b: 1
❌ Errors:
 - U0f0bdb9fc4d6c3ad8d7a3e9139998c92\20250601\desktop.ini: PermissionError: [Errno 13] Permission de