# Group shapefiles

## Purpose

Groups shapefile components together in sub-folders named after each base filename.
Example: all files named 'roads.shp', 'roads.shx', 'roads.dbf' will move into folder 'roads'.
   

In [2]:
# Change this path to the directory you want to process
directory_path = "shp_plan_regonal_parcels_2015"

In [3]:
import os

In [10]:
def group_shapefile_components(directory):
    """
    Groups shapefile components together in sub-folders named after the shapefile base name.

    This version has special logic to capture '.shp.xml' files as part of the
    same group as the corresponding '.shp' file.

    Examples:
      - roads.shp, roads.dbf, roads.shx, roads.shp.xml → folder 'roads'
      - lakes.shp.xml → folder 'lakes'
    """

    def get_shapefile_basename(filename):
        """
        Returns the 'shapefile base name' if the file corresponds to a shapefile component,
        or None if it's not a valid shapefile component.
        
        - Special handling for files ending with '.shp.xml' → belongs to the same group as 'filename.shp'.
        - Ignores hidden files (like .DS_Store).
        """
        # Ignore hidden files (e.g., .DS_Store)
        if filename.startswith('.'):
            return None
        
        # Special handling for .shp.xml
        # e.g.: "myfile.shp.xml" → base name: "myfile"
        if filename.endswith('.shp.xml'):
            return filename[:-8]  # Remove the .shp.xml portion
        
        # Otherwise, do a simple splitext
        base, ext = os.path.splitext(filename)
        if not ext:
            # If there's no extension at all, skip
            return None
        
        # Return the base (e.g., roads.shp → roads)
        return base

    # Dictionary mapping basename → list of filenames with that basename
    files_by_basename = {}

    for filename in os.listdir(directory):
        full_path = os.path.join(directory, filename)
        
        # Skip directories
        if os.path.isdir(full_path):
            continue

        # Figure out what the "shapefile basename" should be
        basename = get_shapefile_basename(filename)
        if not basename:
            # If None, skip this file (hidden or not shapefile-related)
            continue

        # Add filename to our dictionary under the identified basename
        if basename not in files_by_basename:
            files_by_basename[basename] = []
        files_by_basename[basename].append(filename)

    # Create sub-folders and move files
    for basename, file_list in files_by_basename.items():
        folder_path = os.path.join(directory, basename)

        # If there's already a file (not a directory) named 'basename', skip creating the folder.
        if os.path.exists(folder_path) and not os.path.isdir(folder_path):
            print(f"Skipping creation of '{folder_path}' because a file with that name exists.")
            continue

        # Safely create the sub-folder
        os.makedirs(folder_path, exist_ok=True)

        # Move each file into the new folder
        for file_item in file_list:
            src = os.path.join(directory, file_item)
            dst = os.path.join(folder_path, file_item)

            # Double-check source exists before moving (just in case of partial rerun)
            if os.path.exists(src):
                os.rename(src, dst)

In [11]:
group_shapefile_components(directory_path)