In [17]:
import arcpy
import pandas as pd
import os
from time import time

arcpy.env.overwriteOutput = True

def join_and_create_feature_class(input_fc, input_field, join_fc, join_field):
    start_time = time()

    def gdb_to_dataframe(fc):
        if os.path.splitext(fc)[1].lower() == ".csv":
            df = pd.read_csv(fc)
            print(f"CSV Columns: {df.columns.tolist()}")  # Debug: print CSV columns
            return df
        else:
            fields = [field.name for field in arcpy.ListFields(fc)]
            data = [row for row in arcpy.da.SearchCursor(fc, fields)]
            df = pd.DataFrame(data, columns=fields)
            print(f"Feature Class Columns: {df.columns.tolist()}")  # Debug: print FC columns
            return df
    
    input_df = gdb_to_dataframe(input_fc)
    join_df = gdb_to_dataframe(join_fc)
    
    # Perform the join using pandas
    joined_df = pd.merge(input_df, join_df, left_on=input_field, right_on=join_field, how='left')
    
    # Filter for matched records
    matched_records = joined_df[joined_df[join_field].notna()]
    
    # Calculate the percentage of success based on join_fc
    total_records_join_fc = len(join_df)
    matched_percentage = (len(matched_records) / total_records_join_fc) * 100
    
    print(f"Matched Records (Total: {len(matched_records)} / {total_records_join_fc} - {matched_percentage:.2f}%)")
    
    if os.path.splitext(join_fc)[1].lower() == ".csv":
        csv_folder = os.path.dirname(join_fc)
        gdb_name = os.path.splitext(os.path.basename(join_fc))[0] + ".gdb"
        gdb_path = os.path.join(csv_folder, gdb_name)
        
        if not arcpy.Exists(gdb_path):
            join_fc_gdb = arcpy.management.CreateFileGDB(csv_folder, os.path.splitext(os.path.basename(join_fc))[0])[0]
        else:
            join_fc_gdb = gdb_path
    else:
        join_fc_gdb = os.path.dirname(arcpy.Describe(join_fc).catalogPath)
    
    out_fc_name = f"{os.path.basename(input_fc)}_joined_on_{os.path.basename(join_fc)}_{matched_percentage:.2f}".replace(".", "_")
    out_fc = os.path.join(join_fc_gdb, out_fc_name)
    
    # Create in-memory feature class to improve performance
    temp_fc = arcpy.management.CreateFeatureclass("in_memory", out_fc_name, "POINT", spatial_reference=arcpy.Describe(input_fc).spatialReference)
    
    # Add fields dynamically
    for field in join_df.columns:
        if field not in [join_field]:
            arcpy.management.AddField(temp_fc, field, "TEXT")
    
    # Load the geometries and attributes into a dictionary
    geometry_dict = {}
    with arcpy.da.SearchCursor(input_fc, ["OID@", "SHAPE@"] + [input_field]) as search_cursor:
        for row in search_cursor:
            oid = row[0]
            shape = row[1]
            field_value = row[2]
            geometry_dict[field_value] = shape
    
    
    # Insert the matched records
    insert_fields = ["SHAPE@"] + [col for col in matched_records.columns if col not in [join_field, 'OBJECTID']]
    with arcpy.da.InsertCursor(temp_fc, insert_fields) as insert_cursor:
        for _, matched_row in matched_records.iterrows():
            shape = geometry_dict.get(matched_row[input_field])
            if shape:
                insert_values = [shape] + [matched_row[field] for field in insert_fields[1:]]
                insert_cursor.insertRow(insert_values)
    
    arcpy.management.CopyFeatures(temp_fc, out_fc)
    arcpy.management.Delete(temp_fc)
    
    print(f"Feature class created at {out_fc}")
    print(f"Processing completed in {time() - start_time:.2f} seconds")

# Example usage:
join_and_create_feature_class(
    input_fc=r"C:\data\DM_ALL_STRUCS.gdb\DM_ALL_STRUCS",
    input_field="SAP_FLOC_ID",
    join_fc=r"P:\PROJECTS\2024Proj\2024_IGPE_ProblemStatements_RemoteLocations_Analysis\Data\subset.csv",
    join_field="Floc"
)


Feature Class Columns: ['OBJECTID', 'SHAPE', 'M3D_FID', 'M3D_SCE_STRUCTURE_NO', 'SAP_FLOC_ID', 'SAP_FLOC_OBJ_TYP', 'SAP_E_TECH_OBJECT_TYPE', 'M3D_SOURCE_STR_TYPE', 'SAP_PLANT_SECTION', 'SAP_PLANNER_GROUP', 'SAP_E_HIGH_FIRE_AREA', 'SAP_E_EDISON_COMMUNICATION', 'COMMON_ID', 'ID', 'CDS_LABEL_TEXT_BASE', 'CDS_LABEL_TEXT_STACK', 'CDS_CIRCUIT_CONCAT', 'SAP_CITY', 'CDS_EQUIPMENT_ENTITY', 'CDS_SAP_EQUIPMENT_ENTITY']
CSV Columns: ['TSD', 'Notification', 'Floc', 'FlocType', 'FLOC_Status', 'Problem_Stat', 'Notf_Equipment', 'Notf_EquipType', 'EQ_Status', 'PCD_Flag', 'Sys_Status_All', 'Usr_Status_All', 'Priority', 'Notf_Date', 'Notf_End_Date', 'YearNotfDue', 'Notf_Ref_Date', 'NotfPatrolType', 'NotfObjectPartCode', 'NotfDamageCode', 'Tier_Level', 'Latitude', 'Longitude', 'Work_Type', 'Level_1', 'Level_2', 'Level_3', 'NotfCircuitName', 'NotfCircuitNumber', 'WorkOrder', 'WO_Desc']
Matched Records (Total: 48 / 50 - 96.00%)


RuntimeError: Cannot find field 'M3D_FID'