In [4]:
import os
import pandas as pd
import shutil
import stat

In [5]:
def handle_remove_readonly(func, path, _):
    """Clear read-only flag and retry deletion."""
    os.chmod(path, stat.S_IWRITE)
    func(path)

def clean_clone_directory(dataframe, clone_path):
    # Normalize full_name column to match folder names (replace `/` with `_`)
    valid_folders = set(dataframe['full_name'].str.replace('/', '_', regex=False))

    # List all folders in the clone directory
    all_folders = [f for f in os.listdir(clone_path) if os.path.isdir(os.path.join(clone_path, f))]

    # Identify and remove folders not in the valid set
    folders_to_delete = [f for f in all_folders if f not in valid_folders]

    for folder in folders_to_delete:
        folder_path = os.path.join(clone_path, folder)
        try:
            shutil.rmtree(folder_path, onerror=handle_remove_readonly)
            print(f"Deleted: {folder_path}")
        except Exception as e:
            print(f"Error deleting {folder_path}: {e}")

In [6]:
# Example usage
# Load your actual dataframe from file
df = pd.read_csv("pac_usage_summary_updated.csv")
clean_clone_directory(df, "C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone")

Deleted: C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\094459__cedar-flask-demo
Deleted: C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\1Strategy__cloud-custodian-demo
Deleted: C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\1upD__propper-2013
Deleted: C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\3scale-labs__kiper
Deleted: C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\404954-TomasRivetta__Api-Cine
Error deleting C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\4linux__541: [WinError 3] The system cannot find the path specified: 'C:/Users/fpatr/OneDrive/Documents/Adoption of policies as code in ML based application/clone\\4linux__541\\provision\\ansible\\files\\data\\docker\\registry\\v2\\repositories\\4linux-app\