In [1]:
import pandas as pd

# Sample data for substations with additional columns
substations = pd.DataFrame({
    'SubstationID': [1, 2, 3],
    'SubstationName': ['ALDER', 'BIRCH', 'CEDAR'],
    'Location': ['North', 'East', 'West'],
    'Capacity': [100, 200, 300]
})

# Sample data for records
records = pd.DataFrame({
    'RecordID': [101, 102, 103, 104, 105, 106],
    'SubstationName': ['ALDER', 'ALDER', 'BIRCH', 'CEDAR', 'CEDAR', 'CEDAR'],
    'Value': [10, 20, 30, 40, 50, 60]
})

print("Substations DataFrame:")
print(substations)
print("\nRecords DataFrame:")
print(records)

# Perform the join
joined_df = pd.merge(records, substations, on='SubstationName', how='left')

print("\nJoined DataFrame:")
print(joined_df)


Substations DataFrame:
   SubstationID SubstationName Location  Capacity
0             1          ALDER    North       100
1             2          BIRCH     East       200
2             3          CEDAR     West       300

Records DataFrame:
   RecordID SubstationName  Value
0       101          ALDER     10
1       102          ALDER     20
2       103          BIRCH     30
3       104          CEDAR     40
4       105          CEDAR     50
5       106          CEDAR     60

Joined DataFrame:
   RecordID SubstationName  Value  SubstationID Location  Capacity
0       101          ALDER     10             1    North       100
1       102          ALDER     20             1    North       100
2       103          BIRCH     30             2     East       200
3       104          CEDAR     40             3     West       300
4       105          CEDAR     50             3     West       300
5       106          CEDAR     60             3     West       300


In [16]:
import pandas as pd
import arcpy

def feature_class_to_dataframe(feature_class, fields):
    """
    Convert an ArcGIS feature class to a pandas DataFrame.

    Parameters:
    feature_class (str): Path to the feature class.
    fields (list): List of fields to include in the DataFrame.

    Returns:
    pd.DataFrame: The resulting DataFrame.
    """
    data = [row for row in arcpy.da.SearchCursor(feature_class, fields)]
    df = pd.DataFrame(data, columns=fields)
    return df

def join_dataframes(in_df, join_df, in_field, join_field):
    """
    Perform a many-to-one join between two DataFrames.

    Parameters:
    in_df (pd.DataFrame): The input DataFrame (e.g., substations).
    join_df (pd.DataFrame): The DataFrame to join (e.g., records).
    in_field (str): The field name in the input DataFrame to join on.
    join_field (str): The field name in the join DataFrame to join on.

    Returns:
    pd.DataFrame: The resulting joined DataFrame.
    """
    joined_df = pd.merge(join_df, in_df, left_on=join_field, right_on=in_field, how='left')
    return joined_df

# Paths to your feature classes
substations_fc = r"P:\PROJECTS\Special_Projects\Ready_Grid_Jun_Wen\Working\Ready_Grid_Jun_Wen.gdb\TCGMDMART_CDS_SUB_V_SUBSTATION_DM"
records_fc = r"P:\PROJECTS\Special_Projects\Ready_Grid_Jun_Wen\Working\Ready_Grid_Jun_Wen.gdb\LGPR_CEVT"

# include all fields in the DataFrames using arcpy list fields
substation_fields = arcpy.ListFields(substations_fc)
record_fields = arcpy.ListFields(records_fc)
substation_fields =[f.name for f in substation_fields]
record_fields = [f.name for f in record_fields]
print(substation_fields)
print(record_fields)

# Convert feature classes to DataFrames
substations_df = feature_class_to_dataframe(substations_fc, substation_fields)
records_df = feature_class_to_dataframe(records_fc, record_fields)

# Perform the join
result_df = join_dataframes(substations_df, records_df, 'GESW_NAME', 'SUB')

# Display the result
# print(result_df)

# convert result_df to a feature class
result_fc = r"P:\PROJECTS\Special_Projects\Ready_Grid_Jun_Wen\Working\Result.csv"
result_df.to_csv(result_fc, index=False)
print("Result saved to:", result_fc)



['OBJECTID', 'Shape', 'M3D_FID', 'M3D_SCE_FLOC', 'GESW_A_BANK_NUM', 'GESW_GESW_ID', 'SAP_E_IN_SERVICE_DATE', 'GESW_ISO_', 'GESW_NAME', 'GESW_OWNER', 'GESW_SUBSTATION_NO', 'GESW_SUBST_DESC', 'GESW_SUBST_ID', 'GESW_SUBST_STAT_CD', 'GESW_SWITCH_CNTR_CD', 'GESW_SWITCH_CNTR_NAM', 'SAP_CITY', 'SAP_DESCRIPTION', 'SAP_DISTRICT', 'SAP_DISTRICT_NAME', 'SAP_E_SUB_TYPE', 'SAP_E_SWITCHING_CENTER', 'SAP_E_SYSTEM_VOLTAGE', 'SAP_E_TECH_OBJECT_TYPE', 'SAP_FLOC_DESCRIPTION', 'SAP_FLOC_ID', 'SAP_MANUFACTURER', 'SAP_MANUFSERIALNO', 'SAP_POSTAL_CODE', 'SAP_MAIN_WORKCTR', 'SAP_STARTUPDATE', 'SAP_STREET', 'COMMON_ID', 'SAP_EQUIP_OBJ_TYPE', 'SAP_FLOC_OBJ_TYP', 'SAP_PLANT_SECTION', 'CDS_CIRCUIT_CONCAT', 'M3D_ID_SUBSTATION_TYPE_VAL', 'long', 'lat']
['OBJECTID', 'Application_Intake', 'App_Intake__Year_Month_', 'Project_Name', 'Confidence', 'Load_Sector', 'Status', 'System', 'Region', 'Substation', 'Circuit', 'Study_Start_Date', 'Year_Month', 'Study_Complete_Date', 'Study_Due_Date', 'Past_Due', 'Study_Status', 'R