In [16]:
import arcpy
import os

def diagnose_and_repair_geometries(input_fc, output_gdb, run_repair=True):
    """
    Diagnose and optionally repair invalid geometries from a feature class.

    Args:
        input_fc (str): Path to input feature class (e.g. .sde or .gdb feature class)
        output_gdb (str): Path to output file geodatabase (will be created if doesn't exist)
        run_repair (bool): Whether to attempt geometry repair using buffer(0)

    Returns:
        dict: Summary of diagnostics and (optional) repairs
    """
    arcpy.env.overwriteOutput = True

    # Create output GDB if it doesn't exist
    if not arcpy.Exists(output_gdb):
        arcpy.CreateFileGDB_management(os.path.dirname(output_gdb), os.path.basename(output_gdb))

    spatial_ref = arcpy.Describe(input_fc).spatialReference

    # Output feature class names
    invalid_fc = os.path.join(output_gdb, "InvalidGeometries")
    repaired_fc = os.path.join(output_gdb, "RepairedGeometries") if run_repair else None

    # Create output schemas
    arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="InvalidGeometries",
                                        geometry_type="POLYGON", spatial_reference=spatial_ref)
    if run_repair:
        arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="RepairedGeometries",
                                            geometry_type="POLYGON", spatial_reference=spatial_ref)

    invalid_count = 0
    repaired_count = 0

    fields = ["OID@", "SHAPE@", "treefarm_id", "parcelnumber"]


    with arcpy.da.SearchCursor(input_fc, fields) as search_cursor, \
         arcpy.da.InsertCursor(invalid_fc, ["SHAPE@"]) as invalid_writer, \
         (arcpy.da.InsertCursor(repaired_fc, ["SHAPE@"]) if run_repair else None) as repaired_writer:

        while True:
            try:
                row = next(search_cursor)
            except StopIteration:
                break
            except Exception as fetch_err:
                print(f"❌ Failed to read row from cursor: {fetch_err}")
                continue

            try:
                oid, shape, treefarm_id, parcelnumber = row

                if shape is None:
                    print(f"⚠️ Null shape at OID {oid}")
                    continue

                if not shape.isValid:
                    invalid_writer.insertRow((shape,))
                    invalid_count += 1

                    if run_repair:
                        try:
                            repaired_shape = shape.buffer(0)
                            if repaired_shape and repaired_shape.isValid:
                                repaired_writer.insertRow((repaired_shape,))
                                repaired_count += 1
                            else:
                                print(f"⚠️ Repaired still invalid (OID {oid})")
                        except Exception as repair_err:
                            print(f"❌ Repair failed at OID {oid}: {repair_err}")

            except Exception as row_process_err:
                print(f"❌ Failed to process geometry at row: {row_process_err}")


    summary = {
        "🔎 Total Invalid Geometries": invalid_count,
        "🛠️  Total Repaired Geometries": repaired_count if run_repair else "Not run",
        "📍 Invalid Geometries Output": invalid_fc,
        "🧽 Repaired Geometries Output": repaired_fc if run_repair else "Not run"
    }

    print("\n📋 Geometry Validation Summary:")
    for k, v in summary.items():
        print(f"  {k}: {v}")

    return summary

result = diagnose_and_repair_geometries(
    input_fc = r"C:\Mac\Home\Documents\ArcGIS\Projects\ATFS_GeomErrors\SQLServer-100-atfs_gdb(dbeaver).sde\atfs_gdb.dbo.TreeFarm",
    output_gdb=r"C:\temp\TreeFarm_Validation.gdb",
    run_repair=True
)



❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm][STATE_ID = 0]
❌ Failed to read row from cursor: The number of points is less than required for feature [atfs_gdb.dbo.TreeFarm

KeyboardInterrupt: 

In [None]:
import arcpy
import os
import csv

unreadable_count = 0

def diagnose_and_repair_geometries(input_fc, output_gdb, run_repair=True):
    arcpy.env.overwriteOutput = True

    # Ensure output GDB exists
    if not arcpy.Exists(output_gdb):
        arcpy.CreateFileGDB_management(os.path.dirname(output_gdb), os.path.basename(output_gdb))

    spatial_ref = arcpy.Describe(input_fc).spatialReference

    invalid_fc = os.path.join(output_gdb, "InvalidGeometries")
    repaired_fc = os.path.join(output_gdb, "RepairedGeometries") if run_repair else None
    log_csv = os.path.join(output_gdb, "GeometryDiagnostics.csv")

    arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="InvalidGeometries",
                                        geometry_type="POLYGON", spatial_reference=spatial_ref)
    if run_repair:
        arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="RepairedGeometries",
                                            geometry_type="POLYGON", spatial_reference=spatial_ref)

    invalid_count = 0
    repaired_count = 0
    unreadable_count = 0

    fields = ["OID@", "SHAPE@", "treefarm_id", "parcelnumber"]



    with open(log_csv, mode="w", newline="") as csvfile:
        logwriter = csv.writer(csvfile)
        logwriter.writerow(["OID", "TreeFarmID", "ParcelNumber", "Status", "Details"])

        with arcpy.da.SearchCursor(input_fc, fields) as search_cursor, \
             arcpy.da.InsertCursor(invalid_fc, ["SHAPE@"]) as invalid_writer, \
             (arcpy.da.InsertCursor(repaired_fc, ["SHAPE@"]) if run_repair else None) as repaired_writer:

            while True:
                try:
                    row = next(search_cursor)
                except StopIteration:
                    break
                except Exception as fetch_err:
                    unreadable_count += 1
                    # ❌ Log as unreadable with minimal info
                    logwriter.writerow(["?", "?", "?", "Unreadable", str(fetch_err)])
                    continue

                try:
                    oid, shape, treefarm_id, parcelnumber = row

                    if shape is None:
                        logwriter.writerow([oid, treefarm_id, parcelnumber, "Null Geometry", "Shape is None"])
                        continue

                    if not shape.isValid:
                        invalid_writer.insertRow((shape,))
                        invalid_count += 1
                        logwriter.writerow([oid, treefarm_id, parcelnumber, "Invalid", "Failed isValid()"])

                        if run_repair:
                            try:
                                repaired_shape = shape.buffer(0)
                                if repaired_shape and repaired_shape.isValid:
                                    repaired_writer.insertRow((repaired_shape,))
                                    repaired_count += 1
                                    logwriter.writerow([oid, treefarm_id, parcelnumber, "Repaired", "Buffer(0) succeeded"])
                                else:
                                    logwriter.writerow([oid, treefarm_id, parcelnumber, "Repair Failed", "Still invalid"])
                            except Exception as repair_err:
                                logwriter.writerow([oid, treefarm_id, parcelnumber, "Repair Error", str(repair_err)])
                    else:
                        # Log valid if you want full audit
                        # logwriter.writerow([oid, treefarm_id, parcelnumber, "Valid", ""])
                        pass

                except Exception as row_process_err:
                    unreadable_count += 1
                    logwriter.writerow(["?", "?", "?", "Processing Error", str(row_process_err)])
                    continue





    summary = {
        "🔎 Invalid Geometries": invalid_count,
        "🧽 Repaired Geometries": repaired_count if run_repair else "Not run",
        "🚫 Unreadable Features": unreadable_count,
        "📍 Invalid Features Output": invalid_fc,
        "🚫 Unreadable Features Skipped": unreadable_count,
        "🛠️  Repaired Features Output": repaired_fc if run_repair else "Not run",
        "📝 Diagnostics CSV": log_csv
    }

    print("\n📋 Geometry Validation Summary:")
    for k, v in summary.items():
        print(f"  {k}: {v}")

    return summary

result = diagnose_and_repair_geometries(
    input_fc = r"C:\Mac\Home\Documents\ArcGIS\Projects\ATFS_GeomErrors\SQLServer-100-atfs_gdb(dbeaver).sde\atfs_gdb.dbo.TreeFarm",
    output_gdb=r"C:\temp\TreeFarm_Validation.gdb",
    run_repair=True
)


In [None]:
import arcpy
import os
import csv
import time
from datetime import datetime

def diagnose_and_repair_geometries(input_fc, output_gdb, run_repair=True):
    """
    Diagnose and repair geometries with comprehensive logging and progress tracking.
    """
    print(f"🚀 Starting geometry validation at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    start_time = time.time()

    arcpy.env.overwriteOutput = True

    # Get total record count for progress tracking
    try:
        total_records = int(arcpy.GetCount_management(input_fc).getOutput(0))
        print(f"📊 Total records to process: {total_records:,}")
    except Exception as e:
        print(f"⚠️ Could not get record count: {e}")
        total_records = None

    # Ensure output GDB exists
    if not arcpy.Exists(output_gdb):
        print(f"📁 Creating output geodatabase: {output_gdb}")
        arcpy.CreateFileGDB_management(os.path.dirname(output_gdb), os.path.basename(output_gdb))

    try:
        spatial_ref = arcpy.Describe(input_fc).spatialReference
        print(f"🗺️ Spatial Reference: {spatial_ref.name}")
    except Exception as e:
        print(f"❌ Failed to get spatial reference: {e}")
        return {"error": f"Failed to get spatial reference: {e}"}

    # Output paths
    invalid_fc = os.path.join(output_gdb, "InvalidGeometries")
    repaired_fc = os.path.join(output_gdb, "RepairedGeometries") if run_repair else None
    log_csv = os.path.join(os.path.dirname(output_gdb), "GeometryDiagnostics.csv")  # Save CSV outside GDB

    # Create output feature classes
    print("🏗️ Creating output feature classes...")
    try:
        arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="InvalidGeometries",
                                            geometry_type="POLYGON", spatial_reference=spatial_ref)

        # Add the data fields to invalid geometries FC
        arcpy.AddField_management(invalid_fc, "ORIG_OID", "LONG")
        arcpy.AddField_management(invalid_fc, "treefarm_id", "TEXT", field_length=50)
        arcpy.AddField_management(invalid_fc, "parcelnumber", "TEXT", field_length=50)

        if run_repair:
            arcpy.CreateFeatureclass_management(out_path=output_gdb, out_name="RepairedGeometries",
                                                geometry_type="POLYGON", spatial_reference=spatial_ref)
            # Add the data fields to repaired geometries FC
            arcpy.AddField_management(repaired_fc, "ORIG_OID", "LONG")
            arcpy.AddField_management(repaired_fc, "treefarm_id", "TEXT", field_length=50)
            arcpy.AddField_management(repaired_fc, "parcelnumber", "TEXT", field_length=50)

    except Exception as e:
        print(f"❌ Failed to create output feature classes: {e}")
        return {"error": f"Failed to create output feature classes: {e}"}

    # Initialize counters
    invalid_count = 0
    repaired_count = 0
    unreadable_count = 0
    null_geometry_count = 0
    valid_count = 0
    processed_count = 0

    fields = ["OID@", "SHAPE@", "treefarm_id", "parcelnumber"]

    print("📝 Creating diagnostics CSV and starting processing...")

    # Progress tracking variables
    last_report_time = time.time()
    report_interval = 1000  # Report every 1000 records

    try:
        with open(log_csv, mode="w", newline="") as csvfile:
            logwriter = csv.writer(csvfile)
            logwriter.writerow(["OID", "TreeFarmID", "ParcelNumber", "Status", "Details", "Timestamp"])

            # Use regular for loop instead of while True for better performance
            with arcpy.da.SearchCursor(input_fc, fields) as search_cursor:

                # Create insert cursors with data fields
                invalid_insert_fields = ["SHAPE@", "ORIG_OID", "treefarm_id", "parcelnumber"]
                repaired_insert_fields = ["SHAPE@", "ORIG_OID", "treefarm_id", "parcelnumber"]

                with arcpy.da.InsertCursor(invalid_fc, invalid_insert_fields) as invalid_writer, \
                     (arcpy.da.InsertCursor(repaired_fc, repaired_insert_fields) if run_repair else None) as repaired_writer:

                    for row in search_cursor:
                        try:
                            processed_count += 1

                            # Progress reporting
                            if processed_count % report_interval == 0 or (time.time() - last_report_time) >= 30:
                                elapsed = time.time() - start_time
                                if total_records:
                                    progress = (processed_count / total_records) * 100
                                    estimated_total = elapsed * total_records / processed_count
                                    remaining = estimated_total - elapsed
                                    print(f"⏳ Progress: {processed_count:,}/{total_records:,} ({progress:.1f}%) - "
                                          f"Invalid: {invalid_count:,} - Valid: {valid_count:,} - "
                                          f"Elapsed: {elapsed/60:.1f}min - Est. remaining: {remaining/60:.1f}min")
                                else:
                                    print(f"⏳ Processed: {processed_count:,} - Invalid: {invalid_count:,} - "
                                          f"Valid: {valid_count:,} - Elapsed: {elapsed/60:.1f}min")
                                last_report_time = time.time()

                            # Process the row
                            oid, shape, treefarm_id, parcelnumber = row
                            current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

                            # Handle null geometries
                            if shape is None:
                                null_geometry_count += 1
                                logwriter.writerow([oid, treefarm_id, parcelnumber, "Null Geometry", "Shape is None", current_time])
                                continue

                            # Check geometry validity
                            if not shape.isValid:
                                invalid_count += 1

                                # Insert into invalid geometries FC with all data
                                invalid_writer.insertRow([shape, oid, treefarm_id, parcelnumber])
                                logwriter.writerow([oid, treefarm_id, parcelnumber, "Invalid", "Failed isValid()", current_time])

                                if run_repair:
                                    try:
                                        repaired_shape = shape.buffer(0)
                                        if repaired_shape and repaired_shape.isValid:
                                            repaired_count += 1
                                            # Insert into repaired geometries FC with all data
                                            repaired_writer.insertRow([repaired_shape, oid, treefarm_id, parcelnumber])
                                            logwriter.writerow([oid, treefarm_id, parcelnumber, "Repaired", "Buffer(0) succeeded", current_time])
                                        else:
                                            logwriter.writerow([oid, treefarm_id, parcelnumber, "Repair Failed", "Still invalid after buffer(0)", current_time])
                                    except Exception as repair_err:
                                        logwriter.writerow([oid, treefarm_id, parcelnumber, "Repair Error", str(repair_err), current_time])
                            else:
                                valid_count += 1
                                # Optionally log valid geometries (uncomment if needed)
                                # logwriter.writerow([oid, treefarm_id, parcelnumber, "Valid", "", current_time])

                        except Exception as row_process_err:
                            unreadable_count += 1
                            current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                            logwriter.writerow(["?", "?", "?", "Processing Error", str(row_process_err), current_time])
                            print(f"❌ Error processing row {processed_count}: {row_process_err}")
                            continue

    except Exception as e:
        print(f"❌ Critical error during processing: {e}")
        return {"error": f"Critical error during processing: {e}"}

    # Final summary
    end_time = time.time()
    total_time = end_time - start_time

    print(f"\n✅ Processing completed in {total_time/60:.1f} minutes")

    # Get actual record counts from output feature classes
    try:
        invalid_fc_count = int(arcpy.GetCount_management(invalid_fc).getOutput(0))
        repaired_fc_count = int(arcpy.GetCount_management(repaired_fc).getOutput(0)) if run_repair else 0
    except:
        invalid_fc_count = "Unknown"
        repaired_fc_count = "Unknown"

    summary = {
        "📊 Total Records Processed": processed_count,
        "✅ Valid Geometries": valid_count,
        "🔎 Invalid Geometries": invalid_count,
        "🧽 Repaired Geometries": repaired_count if run_repair else "Not run",
        "⚠️ Null Geometries": null_geometry_count,
        "🚫 Unreadable Features": unreadable_count,
        "⏱️ Processing Time (minutes)": round(total_time/60, 1),
        "📍 Invalid Features Output": invalid_fc,
        "📊 Invalid FC Record Count": invalid_fc_count,
        "🛠️ Repaired Features Output": repaired_fc if run_repair else "Not run",
        "📊 Repaired FC Record Count": repaired_fc_count if run_repair else "Not run",
        "📝 Diagnostics CSV": log_csv
    }

    print("\n📋 Geometry Validation Summary:")
    for k, v in summary.items():
        print(f"  {k}: {v}")

    # Performance statistics
    if total_time > 0:
        records_per_second = processed_count / total_time
        print(f"\n📈 Performance: {records_per_second:.1f} records/second")

    return summary

# Main execution
if __name__ == "__main__":
    try:
        result = diagnose_and_repair_geometries(
            input_fc=r"C:\Mac\Home\Documents\ArcGIS\Projects\ATFS_GeomErrors\SQLServer-100-atfs_gdb(dbeaver).sde\atfs_gdb.dbo.TreeFarm",
            output_gdb=r"C:\temp\TreeFarm_Validation.gdb",
            run_repair=True
        )

        print(f"\n🎉 Script completed successfully!")

    except Exception as main_err:
        print(f"❌ Script failed with error: {main_err}")
        import traceback
        traceback.print_exc()