In [None]:
import os

def create_river_folders(base_directory):
    """
    Create folders for river monitoring stations in the specified directory.
    
    Parameters:
    base_directory (str): The path where folders should be created
    """

    # Create the base directory if it doesn't exist
    if not os.path.exists(base_directory):
        os.makedirs(base_directory)
        print(f"Created base directory: {base_directory}")
    
    # Create each folder
    created_count = 0
    skipped_count = 0
    
    for folder_name in folder_names:
        folder_path = os.path.join(base_directory, folder_name)
        
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            created_count += 1
            print(f"Created: {folder_name}")
        else:
            skipped_count += 1
            print(f"Already exists: {folder_name}")
    
    print(f"\n✓ Summary: {created_count} folders created, {skipped_count} already existed")
    print(f"Total folders: {len(folder_names)}")

# Run it
target_directory = r"E:\Dissertation\Data\RiverMapping\Reaches"
create_river_folders(target_directory)

In [None]:
import os
import geopandas as gpd
from pathlib import Path

def convert_gpkg_to_shapefile():
    """
    Convert .gpkg files to shapefiles with specific attributes and reprojection.
    Searches through all planform types and river names, converts each .gpkg to a shapefile
    with a single 'ds_order' attribute set to 1, reprojects to EPSG:3395, and saves to
    the corresponding reach folder.
    """
    
    # Define paths
    source_base = r"E:\Dissertation\Data\Zhao_etal_2025"
    target_base = r"E:\Dissertation\Data\RiverMapping\Reaches"
    
    # Define planform types
    planform_types = ["B", "HSW", "LSW", "Me"]
    
    # Counters for summary
    success_count = 0
    error_count = 0
    not_found_count = 0
    errors = []
    
    # Get list of all river folders from the target directory
    river_folders = [f for f in os.listdir(target_base) 
                    if os.path.isdir(os.path.join(target_base, f))]
    
    print(f"Found {len(river_folders)} river reach folders")
    print(f"Searching through {len(planform_types)} planform types\n")
    
    # Process each river and planform combination
    for river_name in river_folders:
        found_gpkg = False
        
        for planform in planform_types:
            # Construct source path
            source_dir = os.path.join(source_base, planform, river_name)
            source_file = os.path.join(source_dir, f"{river_name}.gpkg")
            
            # Check if the .gpkg file exists
            if os.path.exists(source_file):
                found_gpkg = True
                
                try:
                    # Read the geopackage
                    gdf = gpd.read_file(source_file)
                    
                    # Verify it has exactly one feature
                    if len(gdf) != 1:
                        print(f"⚠ Warning: {river_name} ({planform}) has {len(gdf)} features, expected 1")
                    
                    # Create new GeoDataFrame with only ds_order attribute
                    gdf_new = gpd.GeoDataFrame(
                        {'ds_order': [1]},
                        geometry=gdf.geometry,
                        crs=gdf.crs
                    )
                    
                    # Reproject to EPSG:3395 (WGS 84 / World Mercator)
                    gdf_reprojected = gdf_new.to_crs('EPSG:3395')
                    
                    # Construct output path
                    output_dir = os.path.join(target_base, river_name)
                    output_file = os.path.join(output_dir, f"{river_name}.shp")
                    
                    # Save as shapefile
                    gdf_reprojected.to_file(output_file)
                    
                    success_count += 1
                    print(f"✓ Converted: {river_name} (from {planform})")
                    
                except Exception as e:
                    error_count += 1
                    error_msg = f"{river_name} ({planform})"
                    errors.append(error_msg)
                    print(f"✗ Error: {river_name} ({planform}) - {str(e)}")
                    print(f"   Skipping and continuing with remaining rivers...")
                
                # Break after finding first matching planform (whether success or error)
                break
        
        if not found_gpkg:
            not_found_count += 1
            print(f"○ Not found: {river_name} (checked all planform types)")
    
    # Print summary
    print(f"\n{'='*60}")
    print(f"SUMMARY")
    print(f"{'='*60}")
    print(f"✓ Successfully converted: {success_count}")
    print(f"○ GPKG files not found: {not_found_count}")
    print(f"✗ Errors encountered: {error_count}")
    print(f"Total river folders processed: {len(river_folders)}")
    
    if errors:
        print(f"\n{'='*60}")
        print(f"FAILED RIVERS")
        print(f"{'='*60}")
        for error in errors:
            print(f"  - {error}")

if __name__ == "__main__":
    print("Starting GPKG to Shapefile conversion...\n")
    convert_gpkg_to_shapefile()
    print("\nConversion process complete!")

In [None]:
import os

target_base = r"E:\Dissertation\Data\RiverMapping\Reaches"

print(f"Does directory exist? {os.path.exists(target_base)}")
if os.path.exists(target_base):
    folders = [f for f in os.listdir(target_base) if os.path.isdir(os.path.join(target_base, f))]
    print(f"Number of folders found: {len(folders)}")
    if folders:
        print(f"First few folders: {folders[:5]}")

In [None]:
import os

def get_folder_names_from_directory(directory_path):
    """
    Generate a list of folder names from a given directory.
    
    Parameters:
    directory_path (str): Path to the directory containing folders
    
    Returns:
    list: Sorted list of folder names
    """
    # Get all items in the directory
    all_items = os.listdir(directory_path)
    
    # Filter to only keep directories (not files)
    folder_names = [item for item in all_items 
                   if os.path.isdir(os.path.join(directory_path, item))]
    
    # Sort alphabetically for consistency
    folder_names.sort()
    
    return folder_names

# Use it
directory = r"E:\Dissertation\Data\Zhao_etal_2025\B"
folder_names = get_folder_names_from_directory(directory)

# Print as a Python list that you can copy-paste
print(f"Found {len(folder_names)} folders:\n")
print("folder_names = [")
for i, name in enumerate(folder_names):
    if i < len(folder_names) - 1:
        print(f"    '{name}',")
    else:
        print(f"    '{name}'")
print("]")

# Also print just the count and first few examples
print(f"\nTotal: {len(folder_names)} folders")
if folder_names:
    print(f"First 5: {folder_names[:5]}")
    print(f"Last 5: {folder_names[-5:]}")

## This folder creates folders with river names at the given directory
It will optionally generate an additional subfolder, such as "cleaned" below the river_name folder

In [None]:
import os

def create_unique_river_folders(base_directory, subfolder_name=None):
    """
    Create folders for river monitoring stations, avoiding duplicates.
    
    Parameters:
    base_directory (str): The path where folders should be created
    subfolder_name (str, optional): If provided, creates a subfolder with this name
                                    inside each river folder
    """
    
    # Raw list of folder names with duplicates
    raw_folder_names = [
        'Amazonas_Tamshiyacu', 'AmuDarya_Kerki', 'Amur_Khabarovsk',
        'Amyl_Kachulka', 'Apalachicola_NearBlountstown', 'Araguaia_Aruana',
        'Araguaia_LuizAlves', 'Araguaia_SaoFelixDoAraguaia', 'Beas_MandiPlain',
        'Beni_Rurrenabaque', 'Benue_Ibi', 'Bhareli_NTRoadCrossing',
        'Brahmaputra_Bahadurabad', 'Brahmaputra_Pandu', 'Brahmaputra_Yangcun',
        'Chenab_Akhnoor', 'Cuiaba_PortoDoAlegre', 'Demini_PostoAjuricaba',
        'Gandak_Dumariaghat', 'Gandak_Triveni', 'Ganges_Paksey',
        'Guapore_Pimenteiras', 'HuangHe_Huayuankou', 'HuangHe_TanglaiQu',
        'Ica_IpirangaVelho', 'Indus_Kotri', 'Irrawaddy_Katha',
        'Irrawaddy_Sagaing', 'Itacuai_LadarioJusante', 'Jurua_EirunepeMontante',
        'Jurua_SantosDumont', 'Katun_Srostki', 'Kokcha_Khojaghar',
        'Krishna_Vijayawada', 'Lena_Tabaga', 'Liard_UpperCrossing',
        'Logone_Bongor', 'Logone_Lai', 'Madeira_Humaita',
        'Mamore_Guajara-Mirim', 'Mamore_PuertoSiles', 'Maranon_Borja',
        'Maranon_SanRegis', 'Mortes_SantoAntonioDoLeverger', 'Napo_Bellavista',
        'Napo_NvoRocafuerte', 'Naryn_UstKekirim', 'Ob_Prokhorkino',
        'Panj_NizPyandzh', 'Parana_Corrientes', 'Peace_FifthMeridian',
        'Pilcomayo_VillaMontes', 'Purus_Canutama', 'Purus_Labrea',
        'Purus_SeringalDaCaridade', 'Purus_ValparaisoMontante', 'Red_Index',
        'Rufiji_Stigler', 'Salinas_SanAugustin', 'SaoFrancisco_BomJesusDaLapa',
        'SaoFrancisco_Morpara', 'SaptKosi_Baltara', 'SaptKosi_Chatara-Kothu',
        'Selenga_Naushki', 'Solimoes_SantoAntonioDoIca', 'Solimoes_SaoPauloDeOlivenca',
        'Solimoes_Tabatinga', 'Taku_NearTulsequa', 'Tanana_NearHardingLake',
        'Tarauaca_Envira', 'Tista_AndersonBr', 'Tista_Kaunia',
        'Tisza_Vylok', 'Trinity_Romayor', 'Ucayali_Atalaya',
        'Ucayali_Pucallpa', 'Ucayali_Requena', 'Ural_Kushum',
        'White_DevallsBluff', 'White_Petersburg', 'Yukon_NearStevensVillage',
        'Zambezi_Sesheke', 'Beni', 'Beni', 'Beni', 'Beni', 'Beni',
        'Beni', 'Beni', 'Beni', 'Beni', 'Beni', 'Bermejo', 'Bermejo',
        'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo',
        'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo',
        'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo', 'Bermejo',
        'Koyukuk_Huslia', 'Yukon_Beaver', 'Aladan_VerkhoyanskiyPerevoz',
        'Amazonas_Jatuarana', 'Amur_Komsomolsk', 'Benue_Umaisha',
        'BolshayaKet_Rodyonovka', 'Brahmaputra_Pasighat', 'Chari_Bousso',
        'Chari_Guelengdeng', 'Chari_Ndjamena', 'Chari_Sahr', 'Fraser_Hope',
        'Gandak_Devghat', 'Helmand_Kajaki', 'Helmand_Malakhan', 'Indus_Attock',
        'Irtysh_Bobrovsky', 'Irtysh_Hanti-Mansisk', 'Irtysh_Pavlodar',
        'Irtysh_Semiyarskoje', 'Jutai_PortoSeguro', 'Kamchatka_Kozyrevsk',
        'Kan_Kansk', 'MadreDeDios_CachuelaEsperanza', 'Magdalena_Calamar',
        'Magdalena_PuertoBerrio', 'Manas_Mathanguri', 'Mbam_Goura',
        'Mekong_Kratie', 'Niger_Tossaye', 'Ob_Barnaul', 'Ob_Kolpashevo',
        'Ob_Mogochin', 'Ob_Phominskoje', 'Orinoco_CiudadBolivar',
        'Orinoco_Musinacio', 'Paraguay_Asuncion', 'Paraguay_PortoMurtinho',
        'Parana_Chapeton', 'Porcupine_NearFortYukon', 'Sangha_Ouesso',
        'Solimoes_Itapeua', 'Solimoes_Manacapuru', 'SonghuaJiang_Haerbin',
        'Vilyuy_KhatyrykKhoma', 'Yangtze_Datong', 'Yellowstone_NearSidney',
        'Yukon_Eagle', 'Zambezi_LukuluMission', 'Zambezi_Matundo-Cais'
    ]
    
    # Remove duplicates while preserving order
    seen = set()
    folder_names = []
    duplicates_removed = []
    
    for name in raw_folder_names:
        if name not in seen:
            seen.add(name)
            folder_names.append(name)
        else:
            duplicates_removed.append(name)
    
    # Report on duplicates
    if duplicates_removed:
        from collections import Counter
        duplicate_counts = Counter(raw_folder_names)
        print("Duplicate rivers found and removed:")
        for name, count in duplicate_counts.items():
            if count > 1:
                print(f"  - {name}: appeared {count} times, keeping 1")
        print()
    
    # Create the base directory if it doesn't exist
    if not os.path.exists(base_directory):
        os.makedirs(base_directory)
        print(f"Created base directory: {base_directory}\n")
    
    # Create each folder
    created_count = 0
    skipped_count = 0
    subfolders_created = 0
    subfolders_skipped = 0
    
    for folder_name in folder_names:
        folder_path = os.path.join(base_directory, folder_name)
        
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            created_count += 1
            print(f"Created: {folder_name}")
        else:
            skipped_count += 1
            print(f"Already exists: {folder_name}")
        
        # Create subfolder if specified
        if subfolder_name:
            subfolder_path = os.path.join(folder_path, subfolder_name)
            if not os.path.exists(subfolder_path):
                os.makedirs(subfolder_path)
                subfolders_created += 1
            else:
                subfolders_skipped += 1
    
    print(f"\n{'='*60}")
    print(f"SUMMARY")
    print(f"{'='*60}")
    print(f"Original list: {len(raw_folder_names)} entries")
    print(f"Duplicates removed: {len(duplicates_removed)}")
    print(f"Unique folders: {len(folder_names)}")
    print(f"✓ Folders created: {created_count}")
    print(f"○ Already existed: {skipped_count}")
    
    if subfolder_name:
        print(f"\nSubfolder '{subfolder_name}':")
        print(f"✓ Created: {subfolders_created}")
        print(f"○ Already existed: {subfolders_skipped}")

if __name__ == "__main__":
    # Specify your target directory here
    target_directory = r"E:\Dissertation\Data\RiverMapping\RiverMasks"  # Change this to your desired path
    
    # Optional: specify a subfolder name to create inside each river folder
    # Set to None if you don't want subfolders
    subfolder = "Cleaned"  # Example: "masks" or "data" or None
    
    create_unique_river_folders(target_directory, subfolder_name=subfolder)

This moves the Greenberg et al 2024 masks from his folder structure to the new folder structure

In [None]:
import os
import shutil

def move_greenberg_masks():
    """
    Move .tif mask files from Greenberg et al. directory structure to RiverMapping directory.
    Searches through all planform types and river names, finds mask folders, and moves
    all .tif files to the new directory structure.
    """
    
    # Define paths
    source_base = r"E:\Dissertation\Data\Greenberg_etal_2024\RiverData"
    target_base = r"E:\Dissertation\Data\RiverMapping\RiverMasks"
    
    # Define planform types
    planform_types = ["B", "HSW", "LSW", "Me"]
    
    # Counters for summary
    rivers_processed = 0
    files_moved = 0
    rivers_not_found = 0
    rivers_with_multiple_folders = 0
    errors = []
    rivers_with_masks = []
    rivers_without_masks = []
    rivers_with_ambiguous_structure = []
    
    # Get list of all potential river names by scanning all planform directories
    all_river_names = set()
    for planform in planform_types:
        planform_dir = os.path.join(source_base, planform)
        if os.path.exists(planform_dir):
            river_dirs = [d for d in os.listdir(planform_dir) 
                         if os.path.isdir(os.path.join(planform_dir, d))]
            all_river_names.update(river_dirs)
    
    all_river_names = sorted(all_river_names)
    print(f"Found {len(all_river_names)} unique river names across all planform types")
    print(f"Searching through {len(planform_types)} planform types\n")
    
    # Process each river
    for river_name in all_river_names:
        found_masks = False
        
        # Search through planform types for this river
        for planform in planform_types:
            # Construct source path to river directory
            river_dir = os.path.join(source_base, planform, river_name)
            
            if not os.path.exists(river_dir):
                continue
            
            # Check what's inside the river directory
            items_in_river_dir = [d for d in os.listdir(river_dir) 
                                 if os.path.isdir(os.path.join(river_dir, d))]
            
            # Look for potential search locations
            waterlevel_folders = [f for f in items_in_river_dir if f.startswith('WaterLevel')]
            has_mask_direct = 'mask' in items_in_river_dir
            has_river_subfolder = river_name in items_in_river_dir
            
            # Determine the correct path to mask folder
            mask_dir = None
            all_waterlevel_folders = []
            
            # Collect all WaterLevel folders from both levels
            if waterlevel_folders:
                all_waterlevel_folders.extend(waterlevel_folders)
            
            # Also check inside river_name subfolder if it exists
            if has_river_subfolder:
                river_subfolder = os.path.join(river_dir, river_name)
                subfolders = [d for d in os.listdir(river_subfolder) 
                             if os.path.isdir(os.path.join(river_subfolder, d))]
                subfolder_waterlevels = [f for f in subfolders if f.startswith('WaterLevel')]
                all_waterlevel_folders.extend(subfolder_waterlevels)
            
            # Case 1: Multiple WaterLevel folders across both levels - ambiguous
            if len(all_waterlevel_folders) > 1:
                found_masks = True  # Mark as found to avoid "not found" message
                rivers_with_ambiguous_structure.append(river_name)
                rivers_with_multiple_folders += 1
                print(f"⚠ Warning: {river_name} ({planform}) has multiple WaterLevel folders: {all_waterlevel_folders}")
                print(f"   Skipping due to ambiguous structure...")
                break
            
            # Case 2: WaterLevel folder directly in river_dir
            elif len(waterlevel_folders) == 1:
                potential_mask_dir = os.path.join(river_dir, waterlevel_folders[0], "mask")
                if os.path.exists(potential_mask_dir) and os.path.isdir(potential_mask_dir):
                    mask_dir = potential_mask_dir
            
            # Case 3: WaterLevel folder inside river_name subfolder
            elif has_river_subfolder:
                river_subfolder = os.path.join(river_dir, river_name)
                subfolders = [d for d in os.listdir(river_subfolder) 
                             if os.path.isdir(os.path.join(river_subfolder, d))]
                subfolder_waterlevels = [f for f in subfolders if f.startswith('WaterLevel')]
                
                if len(subfolder_waterlevels) == 1:
                    potential_mask_dir = os.path.join(river_subfolder, subfolder_waterlevels[0], "mask")
                    if os.path.exists(potential_mask_dir) and os.path.isdir(potential_mask_dir):
                        mask_dir = potential_mask_dir
                elif 'mask' in subfolders:
                    # No WaterLevel, but mask exists directly in river_name subfolder
                    mask_dir = os.path.join(river_subfolder, "mask")
            
            # Case 4: mask folder directly in river_dir (no river_name subfolder, no WaterLevel)
            elif has_mask_direct:
                mask_dir = os.path.join(river_dir, "mask")
            
            # If we found a mask directory, process it
            if mask_dir:
                # Get all .tif files in the mask directory
                tif_files = [f for f in os.listdir(mask_dir) if f.lower().endswith('.tif')]
                
                if tif_files:
                    found_masks = True
                    
                    try:
                        # Create target directory
                        target_dir = os.path.join(target_base, river_name, "Cleaned")
                        os.makedirs(target_dir, exist_ok=True)
                        
                        # Move each .tif file
                        files_moved_for_river = 0
                        for tif_file in tif_files:
                            source_file = os.path.join(mask_dir, tif_file)
                            target_file = os.path.join(target_dir, tif_file)
                            
                            shutil.move(source_file, target_file)
                            files_moved_for_river += 1
                        
                        files_moved += files_moved_for_river
                        rivers_processed += 1
                        rivers_with_masks.append(river_name)
                        print(f"✓ Moved {files_moved_for_river} file(s): {river_name} (from {planform})")
                        
                    except Exception as e:
                        error_msg = f"{river_name} ({planform})"
                        errors.append(error_msg)
                        print(f"✗ Error: {river_name} ({planform}) - {str(e)}")
                        print(f"   Skipping and continuing with remaining rivers...")
                    
                    # Break after finding first matching planform with masks
                    break
        
        if not found_masks:
            rivers_without_masks.append(river_name)
            rivers_not_found += 1
    
    # Print summary
    print(f"\n{'='*60}")
    print(f"SUMMARY")
    print(f"{'='*60}")
    print(f"✓ Rivers with masks processed: {rivers_processed}")
    print(f"✓ Total .tif files moved: {files_moved}")
    print(f"○ Rivers without mask folders: {rivers_not_found}")
    print(f"⚠ Rivers with ambiguous structure (skipped): {rivers_with_multiple_folders}")
    print(f"✗ Errors encountered: {len(errors)}")
    print(f"Total unique rivers scanned: {len(all_river_names)}")
    
    if rivers_with_ambiguous_structure:
        print(f"\n{'='*60}")
        print(f"RIVERS WITH AMBIGUOUS STRUCTURE (Multiple WaterLevel Folders)")
        print(f"{'='*60}")
        for river in rivers_with_ambiguous_structure:
            print(f"  - {river}")
    
    if errors:
        print(f"\n{'='*60}")
        print(f"FAILED RIVERS")
        print(f"{'='*60}")
        for error in errors:
            print(f"  - {error}")
    
    # Optional: Print rivers without masks (useful for debugging)
    if rivers_without_masks and len(rivers_without_masks) <= 20:
        print(f"\n{'='*60}")
        print(f"RIVERS WITHOUT MASK FOLDERS (showing up to 20)")
        print(f"{'='*60}")
        for river in rivers_without_masks[:20]:
            print(f"  - {river}")
        if len(rivers_without_masks) > 20:
            print(f"  ... and {len(rivers_without_masks) - 20} more")

if __name__ == "__main__":
    print("Starting mask file migration...\n")
    move_greenberg_masks()
    print("\nMask migration complete!")

This moves the Zhao et al 2025 masks from his folder structure to the new folder structure

In [None]:
def move_zhao_masks():
    """
    Move .tif mask files from Zhao et al. directory structure to RiverMapping directory.
    Searches through all planform types and river names, finds PreparedImagery_annual folders,
    and moves all .tif files to the new directory structure.
    """
    
    # Define paths
    source_base = r"E:\Dissertation\Data\Zhao_etal_2025"
    target_base = r"E:\Dissertation\Data\RiverMapping\RiverMasks"
    
    # Define planform types
    planform_types = ["B", "HSW", "LSW", "Me"]
    
    # Counters for summary
    rivers_processed = 0
    files_moved = 0
    rivers_not_found = 0
    errors = []
    rivers_with_masks = []
    rivers_without_masks = []
    
    # Get list of all potential river names by scanning all planform directories
    all_river_names = set()
    for planform in planform_types:
        planform_dir = os.path.join(source_base, planform)
        if os.path.exists(planform_dir):
            river_dirs = [d for d in os.listdir(planform_dir) 
                         if os.path.isdir(os.path.join(planform_dir, d))]
            all_river_names.update(river_dirs)
    
    all_river_names = sorted(all_river_names)
    print(f"Found {len(all_river_names)} unique river names across all planform types")
    print(f"Searching through {len(planform_types)} planform types\n")
    
    # Process each river
    for river_name in all_river_names:
        found_masks = False
        
        # Search through planform types for this river
        for planform in planform_types:
            # Construct source path: planform/river_name/PreparedImagery_annual/river_name
            river_dir = os.path.join(source_base, planform, river_name)
            imagery_dir = os.path.join(river_dir, "PreparedImagery_annual")
            mask_dir = os.path.join(imagery_dir, river_name)
            
            # Check if mask directory exists
            if os.path.exists(mask_dir) and os.path.isdir(mask_dir):
                # Get all .tif files in the directory
                tif_files = [f for f in os.listdir(mask_dir) if f.lower().endswith('.tif')]
                
                if tif_files:
                    found_masks = True
                    
                    try:
                        # Create target directory
                        target_dir = os.path.join(target_base, river_name, "Cleaned")
                        os.makedirs(target_dir, exist_ok=True)
                        
                        # Move each .tif file
                        files_moved_for_river = 0
                        for tif_file in tif_files:
                            source_file = os.path.join(mask_dir, tif_file)
                            target_file = os.path.join(target_dir, tif_file)
                            
                            shutil.move(source_file, target_file)
                            files_moved_for_river += 1
                        
                        files_moved += files_moved_for_river
                        rivers_processed += 1
                        rivers_with_masks.append(river_name)
                        print(f"✓ Moved {files_moved_for_river} file(s): {river_name} (from {planform})")
                        
                    except Exception as e:
                        error_msg = f"{river_name} ({planform})"
                        errors.append(error_msg)
                        print(f"✗ Error: {river_name} ({planform}) - {str(e)}")
                        print(f"   Skipping and continuing with remaining rivers...")
                    
                    # Break after finding first matching planform with masks
                    break
        
        if not found_masks:
            rivers_without_masks.append(river_name)
            rivers_not_found += 1
    
    # Print summary
    print(f"\n{'='*60}")
    print(f"SUMMARY")
    print(f"{'='*60}")
    print(f"✓ Rivers with masks processed: {rivers_processed}")
    print(f"✓ Total .tif files moved: {files_moved}")
    print(f"○ Rivers without mask folders: {rivers_not_found}")
    print(f"✗ Errors encountered: {len(errors)}")
    print(f"Total unique rivers scanned: {len(all_river_names)}")
    
    if errors:
        print(f"\n{'='*60}")
        print(f"FAILED RIVERS")
        print(f"{'='*60}")
        for error in errors:
            print(f"  - {error}")
    
    # Optional: Print rivers without masks (useful for debugging)
    if rivers_without_masks and len(rivers_without_masks) <= 20:
        print(f"\n{'='*60}")
        print(f"RIVERS WITHOUT MASK FOLDERS (showing up to 20)")
        print(f"{'='*60}")
        for river in rivers_without_masks[:20]:
            print(f"  - {river}")
        if len(rivers_without_masks) > 20:
            print(f"  ... and {len(rivers_without_masks) - 20} more")


if __name__ == "__main__":
    print("Starting Greenberg mask file migration...\n")
    move_greenberg_masks()
    print("\nGreenberg mask migration complete!")
    
    print("\n" + "="*60)
    print("\nStarting Zhao mask file migration...\n")
    move_zhao_masks()
    print("\nZhao mask migration complete!")

Preliminary QC of river masks

In [2]:
import os
import numpy as np
import pandas as pd
import rasterio
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from datetime import datetime

def analyze_mask_statistics(mask_dir):
    """
    Analyze all .tif masks in a directory and compute statistics.
    
    Parameters:
    -----------
    mask_dir : str
        Path to directory containing .tif mask files
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame with statistics for each mask
    """
    tif_files = sorted([f for f in os.listdir(mask_dir) if f.lower().endswith('.tif')])
    
    if not tif_files:
        return None
    
    stats_list = []
    
    for tif_file in tif_files:
        file_path = os.path.join(mask_dir, tif_file)
        
        try:
            # Suppress the NotGeoreferencedWarning
            import warnings
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning)
                with rasterio.open(file_path) as src:
                    data = src.read(1)  # Read first band
            
            # Get unique values to check what's actually in the data
            unique_vals = np.unique(data)
            
            # Convert data to float for consistent handling
            data = data.astype(np.float64)
            
            # Count pixels
            total_pixels = data.size
            
            # Detect if this is a 0/1 mask or 0/255 mask
            # Check if any values are close to 255
            has_255 = np.any(np.abs(data - 255) < 0.01)
            
            if has_255:
                # This is a 0/255 mask (Zhao study)
                is_zero = np.abs(data - 0) < 0.01
                is_wet = np.abs(data - 255) < 0.01
                wet_pixels = np.sum(is_wet)
                dry_pixels = np.sum(is_zero)
                other_pixels = total_pixels - wet_pixels - dry_pixels
            else:
                # This is a 0/1 mask (Greenberg study)
                is_zero = np.abs(data - 0) < 0.01
                is_one = np.abs(data - 1) < 0.01
                wet_pixels = np.sum(is_one)
                dry_pixels = np.sum(is_zero)
                other_pixels = total_pixels - wet_pixels - dry_pixels
            
            # Calculate percentages
            pct_wet = (wet_pixels / total_pixels) * 100 if total_pixels > 0 else 0
            pct_dry = (dry_pixels / total_pixels) * 100 if total_pixels > 0 else 0
            
            stats_list.append({
                'filename': tif_file,
                'total_pixels': total_pixels,
                'wet_pixels': wet_pixels,
                'dry_pixels': dry_pixels,
                'other_pixels': other_pixels,
                'pct_wet': pct_wet,
                'pct_dry': pct_dry,
                'unique_values': str(unique_vals.tolist())
            })
            
        except Exception as e:
            print(f"  Error reading {tif_file}: {e}")
            stats_list.append({
                'filename': tif_file,
                'total_pixels': 0,
                'wet_pixels': 0,
                'dry_pixels': 0,
                'other_pixels': 0,
                'pct_wet': 0,
                'pct_dry': 0,
                'unique_values': '',
                'error': str(e)
            })
    
    return pd.DataFrame(stats_list)

def flag_problematic_masks(df, z_threshold=3):
    """
    Flag masks that deviate significantly from the mean or have other issues.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with mask statistics
    z_threshold : float
        Z-score threshold for flagging outliers
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame with added flag columns
    """
    # Calculate z-scores for wet pixels
    mean_wet = df['wet_pixels'].mean()
    std_wet = df['wet_pixels'].std()
    
    if std_wet > 0:
        df['z_score'] = (df['wet_pixels'] - mean_wet) / std_wet
    else:
        df['z_score'] = 0
    
    # Flag different types of problems
    df['flag_all_dry'] = df['wet_pixels'] == 0
    df['flag_all_wet'] = df['pct_wet'] > 99.9
    df['flag_extreme_outlier'] = np.abs(df['z_score']) > z_threshold
    df['flag_non_binary'] = df['other_pixels'] > 0
    df['flag_tiny_wet'] = (df['pct_wet'] < 0.1) & (df['pct_wet'] > 0)
    
    # Overall flag
    df['flagged'] = (df['flag_all_dry'] | df['flag_all_wet'] | 
                     df['flag_extreme_outlier'] | df['flag_non_binary'] | 
                     df['flag_tiny_wet'])
    
    return df

def create_contact_sheet(mask_dir, df, output_path, max_images=40):
    """
    Create a contact sheet showing all masks with flagged ones highlighted.
    
    Parameters:
    -----------
    mask_dir : str
        Path to directory containing masks
    df : pandas.DataFrame
        DataFrame with mask statistics and flags
    output_path : str
        Path to save the contact sheet image
    max_images : int
        Maximum number of images to show
    """
    tif_files = df['filename'].tolist()[:max_images]
    n_images = len(tif_files)
    
    # Calculate grid dimensions
    n_cols = min(8, n_images)
    n_rows = int(np.ceil(n_images / n_cols))
    
    fig = plt.figure(figsize=(20, n_rows * 2.5))
    
    for idx, tif_file in enumerate(tif_files):
        file_path = os.path.join(mask_dir, tif_file)
        
        try:
            # Suppress the NotGeoreferencedWarning
            import warnings
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning)
                with rasterio.open(file_path) as src:
                    data = src.read(1)
            
            ax = plt.subplot(n_rows, n_cols, idx + 1)
            ax.imshow(data, cmap='Blues', vmin=0, vmax=1)
            
            # Highlight flagged images with red border
            row = df[df['filename'] == tif_file].iloc[0]
            if row['flagged']:
                for spine in ax.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(3)
            
            # Add title with basic info
            title = f"{tif_file[:15]}...\n"
            title += f"Wet: {row['pct_wet']:.1f}%"
            if row['flagged']:
                title += "\n⚠ FLAGGED"
            
            ax.set_title(title, fontsize=8, color='red' if row['flagged'] else 'black')
            ax.axis('off')
            
        except Exception as e:
            print(f"  Error visualizing {tif_file}: {e}")
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.close()

def create_time_series_plot(df, output_path, river_name):
    """
    Create a time series plot of wet pixels over time.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with mask statistics
    output_path : str
        Path to save the plot
    river_name : str
        Name of the river for the title
    """
    fig, ax = plt.subplots(figsize=(14, 6))
    
    x = np.arange(len(df))
    
    # Plot all points
    ax.plot(x, df['wet_pixels'], 'o-', color='steelblue', markersize=4, label='Normal')
    
    # Highlight flagged points
    flagged_df = df[df['flagged']]
    if not flagged_df.empty:
        flagged_indices = flagged_df.index
        ax.plot(flagged_indices, flagged_df['wet_pixels'], 'ro', 
                markersize=8, label='Flagged', zorder=5)
    
    # Add mean line
    mean_wet = df['wet_pixels'].mean()
    ax.axhline(mean_wet, color='green', linestyle='--', alpha=0.7, label=f'Mean: {mean_wet:.0f}')
    
    # Add ±3 std lines
    std_wet = df['wet_pixels'].std()
    ax.axhline(mean_wet + 3*std_wet, color='red', linestyle=':', alpha=0.5, label='±3 σ')
    ax.axhline(mean_wet - 3*std_wet, color='red', linestyle=':', alpha=0.5)
    
    ax.set_xlabel('Mask Index', fontsize=12)
    ax.set_ylabel('Wet Pixels', fontsize=12)
    ax.set_title(f'{river_name} - Wet Pixel Count Over Time', fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.close()

def screen_all_rivers(base_dir, output_dir, z_threshold=3, create_visualizations=True):
    """
    Screen all river mask directories and generate reports.
    
    Parameters:
    -----------
    base_dir : str
        Base directory containing river folders
    output_dir : str
        Directory to save QC reports
    z_threshold : float
        Z-score threshold for flagging outliers
    create_visualizations : bool
        Whether to create contact sheets and plots (slower)
    """
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all river directories
    river_dirs = [d for d in os.listdir(base_dir) 
                  if os.path.isdir(os.path.join(base_dir, d))]
    river_dirs = sorted(river_dirs)
    
    print(f"Found {len(river_dirs)} river directories\n")
    
    # Track overall QC status
    qc_summary = []
    
    for river_idx, river_name in enumerate(river_dirs, 1):
        print(f"[{river_idx}/{len(river_dirs)}] Processing {river_name}...", end='', flush=True)
        
        # Path to masks
        mask_dir = os.path.join(base_dir, river_name, "Cleaned")
        
        if not os.path.exists(mask_dir):
            print(f" ⚠ No 'Cleaned' folder found, skipping")
            qc_summary.append({
                'river_name': river_name,
                'status': 'No masks found',
                'total_masks': 0,
                'flagged_masks': 0,
                'qc_complete': False
            })
            continue
        
        # Analyze masks
        print(f" Analyzing...", end='', flush=True)
        df = analyze_mask_statistics(mask_dir)
        
        if df is None or len(df) == 0:
            print(f" ⚠ No .tif files found, skipping")
            qc_summary.append({
                'river_name': river_name,
                'status': 'No .tif files',
                'total_masks': 0,
                'flagged_masks': 0,
                'qc_complete': False
            })
            continue
        
        # Flag problematic masks
        print(f" Flagging...", end='', flush=True)
        df = flag_problematic_masks(df, z_threshold)
        
        # Create river-specific output directory
        river_output_dir = os.path.join(output_dir, river_name)
        os.makedirs(river_output_dir, exist_ok=True)
        
        # Save statistics CSV
        print(f" Saving CSV...", end='', flush=True)
        csv_path = os.path.join(river_output_dir, f"{river_name}_statistics.csv")
        df.to_csv(csv_path, index=False)
        
        if create_visualizations:
            # Create contact sheet
            print(f" Contact sheet...", end='', flush=True)
            contact_sheet_path = os.path.join(river_output_dir, f"{river_name}_contact_sheet.png")
            try:
                create_contact_sheet(mask_dir, df, contact_sheet_path)
            except Exception as e:
                print(f" [Contact sheet error: {e}]", end='', flush=True)
            
            # Create time series plot
            print(f" Time series...", end='', flush=True)
            timeseries_path = os.path.join(river_output_dir, f"{river_name}_timeseries.png")
            try:
                create_time_series_plot(df, timeseries_path, river_name)
            except Exception as e:
                print(f" [Time series error: {e}]", end='', flush=True)
        
        # Summary stats
        total_masks = len(df)
        flagged_count = df['flagged'].sum()
        
        print(f" ✓ {total_masks} masks, {flagged_count} flagged")
        
        qc_summary.append({
            'river_name': river_name,
            'status': 'Processed',
            'total_masks': total_masks,
            'flagged_masks': flagged_count,
            'pct_flagged': (flagged_count / total_masks * 100) if total_masks > 0 else 0,
            'qc_complete': False,
            'date_screened': datetime.now().strftime('%Y-%m-%d'),
            'notes': ''
        })
    
    # Save overall QC tracking file
    qc_df = pd.DataFrame(qc_summary)
    qc_tracking_path = os.path.join(output_dir, 'QC_tracking.csv')
    qc_df.to_csv(qc_tracking_path, index=False)
    
    print(f"\n{'='*60}")
    print(f"SCREENING COMPLETE")
    print(f"{'='*60}")
    print(f"Total rivers screened: {len(river_dirs)}")
    print(f"QC tracking file saved to: {qc_tracking_path}")
    print(f"Individual river reports saved to: {output_dir}")
    print(f"\nRivers with flagged masks:")
    for _, row in qc_df[qc_df['flagged_masks'] > 0].iterrows():
        print(f"  - {row['river_name']}: {row['flagged_masks']}/{row['total_masks']} masks flagged")

if __name__ == "__main__":
    # Set your paths here
    base_directory = r"E:\Dissertation\Data\RiverMapping\RiverMasks"
    output_directory = r"E:\Dissertation\Data\RiverMapping\MaskQC"
    
    # Run the screening
    # Set create_visualizations=False for faster CSV-only mode
    screen_all_rivers(base_directory, output_directory, z_threshold=3, create_visualizations=False)

Found 135 river directories

[1/135] Processing Aladan_VerkhoyanskiyPerevoz... Analyzing... Flagging... Saving CSV... ✓ 20 masks, 0 flagged
[2/135] Processing Amazonas_Jatuarana... Analyzing... Flagging... Saving CSV... ✓ 38 masks, 0 flagged
[3/135] Processing Amazonas_Tamshiyacu... Analyzing... Flagging... Saving CSV... ✓ 31 masks, 1 flagged
[4/135] Processing AmuDarya_Kerki... Analyzing... Flagging... Saving CSV... ✓ 29 masks, 0 flagged
[5/135] Processing Amur_Khabarovsk... Analyzing... Flagging... Saving CSV... ✓ 28 masks, 0 flagged
[6/135] Processing Amur_Komsomolsk... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 2 flagged
[7/135] Processing Amyl_Kachulka... Analyzing... Flagging... Saving CSV... ✓ 24 masks, 1 flagged
[8/135] Processing Apalachicola_NearBlountstown... Analyzing... Flagging... Saving CSV... ✓ 37 masks, 1 flagged
[9/135] Processing Araguaia_Aruana... Analyzing... Flagging... Saving CSV... ✓ 36 masks, 0 flagged
[10/135] Processing Araguaia_LuizAlves... Analyzing

[82/135] Processing Ob_Mogochin... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 15 flagged
[83/135] Processing Ob_Phominskoje... Analyzing... Flagging... Saving CSV... ✓ 32 masks, 1 flagged
[84/135] Processing Ob_Prokhorkino... Analyzing... Flagging... Saving CSV... ✓ 18 masks, 0 flagged
[85/135] Processing Orinoco_CiudadBolivar... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 12 flagged
[86/135] Processing Orinoco_Musinacio... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 5 flagged
[87/135] Processing Panj_NizPyandzh... Analyzing... Flagging... Saving CSV... ✓ 31 masks, 1 flagged
[88/135] Processing Paraguay_Asuncion... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 2 flagged
[89/135] Processing Paraguay_PortoMurtinho... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 2 flagged
[90/135] Processing Parana_Chapeton... Analyzing... Flagging... Saving CSV... ✓ 39 masks, 1 flagged
[91/135] Processing Parana_Corrientes... Analyzing... Flagging... Saving CSV... ✓ 29 ma

## Interactive QC viewer

In [1]:
import os
import pandas as pd
import numpy as np
import rasterio
import matplotlib
matplotlib.use('TkAgg')  # Use interactive backend
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import warnings

# Enable interactive mode
plt.ion()

class MaskQCViewer:
    """
    Interactive viewer for QC'ing river masks.
    Shows flagged masks first, allows quick deletion marking and QC completion.
    """
    
    def __init__(self, qc_dir, base_mask_dir):
        self.qc_dir = qc_dir
        self.base_mask_dir = base_mask_dir
        self.current_river_idx = 0
        self.current_mask_idx = 0
        self.masks_to_delete = []
        
        # Load QC tracking
        self.qc_tracking = pd.read_csv(os.path.join(qc_dir, 'QC_tracking.csv'))
        
        # Filter to rivers that need QC (have flagged masks or not yet QC'd)
        self.rivers_to_qc = self.qc_tracking[
            (self.qc_tracking['flagged_masks'] > 0) | 
            (self.qc_tracking['qc_complete'] == False)
        ].copy()
        
        self.rivers_to_qc = self.rivers_to_qc.sort_values('pct_flagged', ascending=False)
        
        print(f"Found {len(self.rivers_to_qc)} rivers needing QC")
        print(f"Rivers with flagged masks: {(self.rivers_to_qc['flagged_masks'] > 0).sum()}")
        
    def load_river_data(self, river_name):
        """Load statistics CSV for a river."""
        csv_path = os.path.join(self.qc_dir, river_name, f"{river_name}_statistics.csv")
        if os.path.exists(csv_path):
            df = pd.read_csv(csv_path)
            # Sort by flagged first, then by z_score magnitude
            df['abs_z'] = df['z_score'].abs()
            df = df.sort_values(['flagged', 'abs_z'], ascending=[False, False])
            return df
        return None
    
    def view_river(self, river_idx):
        """Start QC for a specific river."""
        if river_idx >= len(self.rivers_to_qc):
            print("All rivers QC'd!")
            return
        
        self.current_river_idx = river_idx
        river_row = self.rivers_to_qc.iloc[river_idx]
        river_name = river_row['river_name']
        
        print(f"\n{'='*60}")
        print(f"River {river_idx + 1}/{len(self.rivers_to_qc)}: {river_name}")
        print(f"Total masks: {river_row['total_masks']}, Flagged: {river_row['flagged_masks']}")
        print(f"{'='*60}")
        
        # Load river data
        self.current_df = self.load_river_data(river_name)
        if self.current_df is None:
            print(f"No statistics found for {river_name}, skipping...")
            self.view_river(river_idx + 1)
            return
        
        self.current_river_name = river_name
        self.current_mask_idx = 0
        self.masks_to_delete = []
        
        # Show first mask
        self.show_mask()
    
    def show_mask(self):
        """Display current mask with statistics and controls."""
        if self.current_mask_idx >= len(self.current_df):
            self.finish_river()
            return
        
        row = self.current_df.iloc[self.current_mask_idx]
        mask_path = os.path.join(self.base_mask_dir, self.current_river_name, 
                                 "Cleaned", row['filename'])
        
        # Create figure
        plt.close('all')
        self.fig = plt.figure(figsize=(14, 8))
        
        # Load and display mask
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning)
            with rasterio.open(mask_path) as src:
                data = src.read(1)
        
        # Normalize for display
        data_normalized = data / data.max() if data.max() > 0 else data
        
        ax = plt.subplot(1, 1, 1)
        ax.imshow(data_normalized, cmap='Blues', vmin=0, vmax=1)
        ax.axis('off')
        
        # Build title with statistics
        title = f"{self.current_river_name} - Mask {self.current_mask_idx + 1}/{len(self.current_df)}\n"
        title += f"{row['filename']}\n"
        title += f"Wet: {row['pct_wet']:.2f}% | Z-score: {row['z_score']:.2f}\n"
        
        if row['flagged']:
            title += "⚠ FLAGGED: "
            flags = []
            if row['flag_all_dry']: flags.append("All Dry")
            if row['flag_all_wet']: flags.append("All Wet")
            if row['flag_extreme_outlier']: flags.append("Extreme Outlier")
            if row['flag_non_binary']: flags.append("Non-Binary")
            if row['flag_tiny_wet']: flags.append("Tiny Wet Area")
            title += ", ".join(flags)
        
        ax.set_title(title, fontsize=11, fontweight='bold', 
                    color='red' if row['flagged'] else 'black')
        
        # Add navigation info
        info_text = f"Press: [D]elete | [K]eep | [N]ext | [P]rev | [F]inish River | [Q]uit"
        self.fig.text(0.5, 0.02, info_text, ha='center', fontsize=10, 
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
        
        # Connect keyboard events
        self.cid = self.fig.canvas.mpl_connect('key_press_event', self.on_key)
        
        plt.tight_layout()
        plt.draw()
        plt.pause(0.001)  # Allow GUI to update
    
    def on_key(self, event):
        """Handle keyboard shortcuts."""
        print(f"Key pressed: {event.key}")  # Debug output
        
        if event.key == 'd':
            # Mark for deletion
            row = self.current_df.iloc[self.current_mask_idx]
            if row['filename'] not in self.masks_to_delete:
                self.masks_to_delete.append(row['filename'])
                print(f"  Marked for deletion: {row['filename']}")
            self.current_mask_idx += 1
            plt.close(self.fig)
            self.show_mask()
        
        elif event.key == 'k':
            # Keep and move to next
            print(f"  Kept: {self.current_df.iloc[self.current_mask_idx]['filename']}")
            self.current_mask_idx += 1
            plt.close(self.fig)
            self.show_mask()
        
        elif event.key == 'n':
            # Next mask
            self.current_mask_idx = min(self.current_mask_idx + 1, len(self.current_df) - 1)
            plt.close(self.fig)
            self.show_mask()
        
        elif event.key == 'p':
            # Previous mask
            self.current_mask_idx = max(self.current_mask_idx - 1, 0)
            plt.close(self.fig)
            self.show_mask()
        
        elif event.key == 'f':
            # Finish this river
            plt.close(self.fig)
            self.finish_river()
        
        elif event.key == 'q':
            # Quit
            print("\nQuitting QC session...")
            self.save_progress()
            plt.close('all')
    
    def finish_river(self):
        """Complete QC for current river and move to next."""
        # Save deletion list
        if self.masks_to_delete:
            delete_list_path = os.path.join(self.qc_dir, self.current_river_name, 
                                           "masks_to_delete.txt")
            with open(delete_list_path, 'w') as f:
                f.write("\n".join(self.masks_to_delete))
            print(f"\n  Saved {len(self.masks_to_delete)} masks to delete")
        
        # Mark as QC complete
        self.qc_tracking.loc[
            self.qc_tracking['river_name'] == self.current_river_name, 
            'qc_complete'
        ] = True
        
        self.qc_tracking.loc[
            self.qc_tracking['river_name'] == self.current_river_name, 
            'notes'
        ] = f"{len(self.masks_to_delete)} marked for deletion"
        
        # Save tracking file
        self.save_progress()
        
        plt.close('all')
        
        # Move to next river
        self.view_river(self.current_river_idx + 1)
    
    def save_progress(self):
        """Save QC tracking file."""
        self.qc_tracking.to_csv(os.path.join(self.qc_dir, 'QC_tracking.csv'), index=False)
        print("  Progress saved")
    
    def start_qc(self):
        """Start the QC process."""
        print(f"\n{'='*60}")
        print("INTERACTIVE MASK QC VIEWER")
        print(f"{'='*60}")
        print("\nKeyboard shortcuts:")
        print("  D - Mark for deletion and advance")
        print("  K - Keep mask and advance")
        print("  N - Next mask (without marking)")
        print("  P - Previous mask")
        print("  F - Finish current river and move to next")
        print("  Q - Quit (progress is saved)")
        print(f"\n{'='*60}\n")
        
        # Start with first river
        self.view_river(0)

if __name__ == "__main__":
    # Set your paths
    qc_directory = r"E:\Dissertation\Data\RiverMapping\MaskQC"
    mask_directory = r"E:\Dissertation\Data\RiverMapping\RiverMasks"
    
    # Create viewer and start
    viewer = MaskQCViewer(qc_directory, mask_directory)
    viewer.start_qc()

Found 135 rivers needing QC
Rivers with flagged masks: 81

INTERACTIVE MASK QC VIEWER

Keyboard shortcuts:
  D - Mark for deletion and advance
  K - Keep mask and advance
  N - Next mask (without marking)
  P - Previous mask
  F - Finish current river and move to next
  Q - Quit (progress is saved)



River 1/135: Logone_Bongor
Total masks: 37, Flagged: 21
Key pressed: shift
Key pressed: N
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
Key pressed: n
