# Fixing the paths of orthomoisaics and GEOJSON Polygon files in the QGZ Files

## Lisitng down the files in the source folder

In [10]:
import glob
import os
# Function to find the tif files in a given folder¶
def find_files_in_folder(folder_path, extension=None, recursive=False):
    """
    Retrieves a list of file paths in a specified folder, optionally filtered by file extension,
    and optionally including subdirectories.

    Parameters:
    - folder_path (str): The path of the folder to search for files.
    - extension (str, optional): The file extension to filter by (e.g., "txt" or "tif").
                                 If None, the function lists all files.
    - recursive (bool, optional): If True, includes files from all subdirectories within `folder_path`.
                                  Defaults to False (only lists files in the specified folder).

    Returns:
    - list of str: A list of file paths that match the specified extension in the folder.
                   If no matching files are found, returns a list containing an empty string.

    Example:
    >>> find_files_in_folder("/path/to/folder", "tif")
    ['/path/to/folder/file1.tif', '/path/to/folder/file2.tif']

    >>> find_files_in_folder("/path/to/folder", recursive=True)
    ['/path/to/folder/file1.tif', '/path/to/folder/subfolder/file2.txt', '/path/to/folder/file3.jpg']
    """
    
    matched_files = []

    # Determine the search pattern based on whether an extension is provided and recursion is enabled
    if extension:
        if recursive:
            search_pattern = os.path.join(folder_path, f"**/*.{extension}")
        else:
            search_pattern = os.path.join(folder_path, f"*.{extension}")
    else:
        # No extension specified, handle both recursive and non-recursive cases
        if recursive:
            search_pattern = os.path.join(folder_path, "**/*")
        else:
            search_pattern = os.path.join(folder_path, "*")
    
    # Use glob to find matching files in the specified directory and subdirectories if recursive
    matched_files.extend(glob.glob(search_pattern, recursive=recursive))
    
    # If no files are found, return a list with an empty string
    if not matched_files:
        matched_files = [""]

    return matched_files

## Replacing certain text in the QGS file insize the QGZ Archive

In [19]:
import zipfile
import os
import shutil

def replace_text_in_qgz(qgz_file, search_text, replace_text):
    """Replaces text in the .qgs project file inside a .qgz archive and counts replacements."""
    
    # Step 1: Extract the .qgz file (it's a ZIP file)
    temp_dir = "temp_qgz_extracted"
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
    
    with zipfile.ZipFile(qgz_file, 'r') as zip_ref:
        zip_ref.extractall(temp_dir)
    
    # Step 2: Find the .qgs file in the extracted files
    qgs_file = None
    for file_name in os.listdir(temp_dir):
        if file_name.endswith(".qgs"):
            qgs_file = os.path.join(temp_dir, file_name)
            break
    
    if not qgs_file:
        print(f"No .qgs file found in {qgz_file}")
        return

    print(f"Found .qgs file: {qgs_file}")

    # Step 3: Read the .qgs file and count the instances of search_text
    with open(qgs_file, 'r', encoding='utf-8') as file:
        xml_data = file.read()

    # Step 4: Count occurrences of the search_text
    count = xml_data.count(search_text)
    if count == 0:
        print(f"No instances of '{search_text}' found in {qgs_file}")
        return

    # Step 5: Replace the specified text
    xml_data = xml_data.replace(search_text, replace_text)
    print(f"Replaced {count} instance(s) of '{search_text}' with '{replace_text}'")

    # Step 6: Write the modified XML data back to the .qgs file
    with open(qgs_file, 'w', encoding='utf-8') as file:
        file.write(xml_data)

    # Step 7: Recompress the files back into a .qgz file 
    #Replace the original file
    new_qgz_file = qgz_file
    # # If you want the new file to be named differently
    # new_qgz_file = qgz_file.replace(".qgz", "_modified.qgz")
    with zipfile.ZipFile(new_qgz_file, 'w', zipfile.ZIP_DEFLATED) as zip_ref:
        for folder_name, subfolders, filenames in os.walk(temp_dir):
            for filename in filenames:
                file_path = os.path.join(folder_name, filename)
                zip_ref.write(file_path, os.path.relpath(file_path, temp_dir))
    
    # Step 8: Clean up temporary extraction folder
    try:
        shutil.rmtree(temp_dir)  # Delete the entire folder and its contents
    except PermissionError as e:
        print(f"Error deleting temporary folder: {e}")
    
    print(f"Modified .qgz replaced the original file: {new_qgz_file}")

## Usage

In [6]:
qgz_file_path = r"D:\PhenoCrop\3_qgis\PHENO_CROP\20240819 PHENO_CROP M3M 30m MS 70 75.qgz"

search_text = "../../3_Extraction Polygons/3. FINAL MASKS PYTHON"
replace_text = "../3_Extraction Polygons/3. FINAL MASKS PYTHON"

search_text = "../../../2_pix4d/PHENO_CROP/MS"
replace_text = "../../2_pix4d/PHENO_CROP/MS"
# replace_text_in_qgz(qgz_file_path, search_text, replace_text)

Found .qgs file: temp_qgz_extracted\20240819 PHENO_CROP M3M 30m MS 70 75.qgs
Replaced 12 instance(s) of '../../../2_pix4d/PHENO_CROP/MS' with '../../2_pix4d/PHENO_CROP/MS'
Error deleting temporary folder: [WinError 5] Access is denied: 'temp_qgz_extracted'
Modified .qgz saved as: D:\PhenoCrop\3_qgis\PHENO_CROP\20240819 PHENO_CROP M3M 30m MS 70 75_modified.qgz


## Looping it on all the files in the source path

In [22]:
folder_path = r"D:\PhenoCrop\3_qgis\PHENO_CROP"
qgz_files_to_modify =find_files_in_folder(folder_path, "qgz")
# qgz_files_to_modify

In [20]:
for file in qgz_files_to_modify:

    # Correct the path to the orthomosaics
    search_text = "../../../2_pix4d/PHENO_CROP/MS"
    replace_text = "../../2_pix4d/PHENO_CROP/MS"
    # replace_text_in_qgz(file, search_text, replace_text)

    # Correct the path to the extraction polygon geojson
    search_text = "../../3_Extraction Polygons/3. FINAL MASKS PYTHON"
    replace_text = "../3_Extraction Polygons/3. FINAL MASKS PYTHON"
    # replace_text_in_qgz(file, search_text, replace_text)

Found .qgs file: temp_qgz_extracted\20240603 PHENO_CROP P4M 20m MS 80 85.qgs
Replaced 14 instance(s) of '../../../2_pix4d/PHENO_CROP/MS' with '../../2_pix4d/PHENO_CROP/MS'
Error deleting temporary folder: [WinError 5] Access is denied: 'temp_qgz_extracted'
Modified .qgz saved as: D:\PhenoCrop\3_qgis\PHENO_CROP\20240603 PHENO_CROP P4M 20m MS 80 85.qgz
Found .qgs file: temp_qgz_extracted\20240603 PHENO_CROP P4M 20m MS 80 85.qgs
Replaced 2 instance(s) of '../../3_Extraction Polygons/3. FINAL MASKS PYTHON' with '../3_Extraction Polygons/3. FINAL MASKS PYTHON'
Error deleting temporary folder: [WinError 5] Access is denied: 'temp_qgz_extracted'
Modified .qgz saved as: D:\PhenoCrop\3_qgis\PHENO_CROP\20240603 PHENO_CROP P4M 20m MS 80 85.qgz
Found .qgs file: temp_qgz_extracted\20240606 PHENO_CROP M3M 20m MS 80 85.qgs
Replaced 12 instance(s) of '../../../2_pix4d/PHENO_CROP/MS' with '../../2_pix4d/PHENO_CROP/MS'
Error deleting temporary folder: [WinError 5] Access is denied: 'temp_qgz_extracted'
