In [1]:
import os
import json
import shutil

# Path to your metadata file (update if needed)
metadata_path = "/Users/Aminekhelif/Documents/DeepDub/metadata.json"

# Load metadata
with open(metadata_path, "r") as f:
    meta = json.load(f)

# If your structure is meta["files"] = { <file_path>: {...} }, we'll iterate over each file
files_dict = meta.get("files", {})

def find_correct_subfolder(split_path: str) -> str:
    """
    Given a path to split_audio or vocals, e.g.:
      /Users/.../DeepDub/test3/FILM46/VF/output_directory/vocals.mp3
    we want to extract the subfolder part, e.g. "VF" or "VFF" or "VFQ" or "VO_anglais".
    
    We'll assume the subfolder is the *basename* right before 'output_directory'.
    For example:
      split_path.split('/') => [..., 'FILM46', 'VF', 'output_directory', 'vocals.mp3']
      subfolder_name = 'VF'
    
    This is robust for bigger datasets, as it doesn't require a fixed 'FILM46' in the path.
    """
    parts = split_path.split("/")
    # find index of 'output_directory'
    try:
        idx = parts.index("output_directory")
        if idx > 0:
            # The subfolder is the part just before 'output_directory'
            return parts[idx - 1]
        else:
            return None
    except ValueError:
        return None

def build_correct_diar_path(diar_path: str, subfolder: str) -> str:
    """
    If diar_path is:
      /Users/.../test3/FILM46/output_directory/diarization/diar_simple.json
    but we want:
      /Users/.../test3/FILM46/VF/output_directory/diarization/diar_simple.json
    We'll insert the subfolder right before 'output_directory'.
    
    If diar_path already has that subfolder, we won't duplicate it.
    """
    if not diar_path:
        return None
    
    parts = diar_path.split("/")
    if "output_directory" not in parts:
        # Unexpected path
        return diar_path
    
    # If subfolder is already in there, skip
    if subfolder in parts:
        # Already correct
        return diar_path
    
    try:
        idx = parts.index("output_directory")
        # Insert subfolder right before output_directory
        parts.insert(idx, subfolder)
        return "/".join(parts)
    except ValueError:
        # No output_directory found
        return diar_path

def move_folder_if_exists(old_dir: str, new_dir: str):
    """
    Physically move the entire diarization folder from old_dir to new_dir if old_dir exists
    and new_dir doesn't yet.
    """
    if not old_dir or not new_dir:
        return
    
    if os.path.exists(old_dir):
        if not os.path.exists(new_dir):
            # Make parent folder
            os.makedirs(os.path.dirname(new_dir), exist_ok=True)
            shutil.move(old_dir, new_dir)
            print(f"Moved folder:\n  FROM: {old_dir}\n  TO:   {new_dir}")
        else:
            print(f"Skipped move: new folder {new_dir} already exists.")
    else:
        print(f"Skipped move: old folder {old_dir} does not exist.")

# Optional: If you want to re-run diar for English, define a function to remove diarization_data
def remove_diar_for_english(meta, file_path):
    """
    Removes diarization_data from the English file so you can re-run diarization.
    """
    if file_path in meta["files"]:
        if "diarization_data" in meta["files"][file_path]:
            del meta["files"][file_path]["diarization_data"]
            print(f"Removed diarization_data for English: {file_path}")

# Loop over each file in metadata
for file_path, info in files_dict.items():
    diar_path = info.get("diarization_data")
    if not diar_path:
        continue  # No diar done for this file
    
    # We'll attempt to detect the correct subfolder from e.g. split_audio or vocals
    # check whichever is present
    # e.g. split_audio:
    split_aud = info.get("split_audio")
    if not split_aud:
        # maybe check vocals
        split_aud = info.get("vocals")
        if not split_aud:
            # fallback
            print(f"No splitted or vocals found for {file_path}. Skipping fix.")
            continue
    
    # subfolder might be VF, VFQ, VFF, or VO_anglais
    subfolder = find_correct_subfolder(split_aud)
    if not subfolder:
        print(f"Could not detect subfolder for file {file_path}")
        continue
    
    # Build the correct diar path
    correct_diar_path = build_correct_diar_path(diar_path, subfolder)
    
    if correct_diar_path and correct_diar_path != diar_path:
        # Move the diarization folder from old location to new
        old_dir = os.path.dirname(diar_path)  # e.g. .../output_directory/diarization
        new_dir = os.path.dirname(correct_diar_path)  # e.g. .../VF/output_directory/diarization
        
        move_folder_if_exists(old_dir, new_dir)
        
        # Update metadata
        meta["files"][file_path]["diarization_data"] = correct_diar_path
        print(f"Updated diarization_data in metadata:\n  FROM: {diar_path}\n  TO:   {correct_diar_path}")

# Example: if you specifically want to remove English diarization so it re-runs
# let’s assume your English files contain "vo_anglais" in the path or name
for file_path, info in files_dict.items():
    if "vo_anglais" in file_path.lower():
        remove_diar_for_english(meta, file_path)

# Finally, save updated metadata
with open(metadata_path, "w") as f:
    json.dump(meta, f, indent=4)

print("Done updating metadata. You can now re-run your pipeline.")

Moved folder:
  FROM: /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/output_directory/diarization
  TO:   /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/VF/output_directory/diarization
Updated diarization_data in metadata:
  FROM: /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/output_directory/diarization/diar_simple.json
  TO:   /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/VF/output_directory/diarization/diar_simple.json
Skipped move: old folder /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/output_directory/diarization does not exist.
Updated diarization_data in metadata:
  FROM: /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/output_directory/diarization/diar_simple.json
  TO:   /Users/Aminekhelif/Documents/DeepDub/test3/FILM46/VO_anglais/output_directory/diarization/diar_simple.json
Moved folder:
  FROM: /Users/Aminekhelif/Documents/DeepDub/test3/FILM33/output_directory/diarization
  TO:   /Users/Aminekhelif/Documents/DeepDub/test3/FILM33/VF/output_directory/diar