In [1]:
### import
import arcpy
import os
import shutil
import pandas as pd

In [2]:
### input parameter
folder = rf"C:\Users\xianl\Desktop\running" # for ipynb testing
#folder=arcpy.GetParameterAsText(0)
if os.path.exists(folder):
    shutil.rmtree(folder)
os.makedirs(folder)

### environment
arcpy.env.workspace = folder
arcpy.env.overwriteOutput = True

### create file geodatabase
running_gdb = arcpy.management.CreateFileGDB(folder, "running.gdb")
centroids_gdb = arcpy.management.CreateFileGDB(folder, "centroids.gdb")
boundaries_gdb = arcpy.management.CreateFileGDB(folder, "boundaries.gdb")
spaces_gdb = arcpy.management.CreateFileGDB(folder, "spaces.gdb")
result_gdb = arcpy.management.CreateFileGDB(folder, "result.gdb")
sa_gdb = arcpy.management.CreateFileGDB(folder, "sa.gdb")

In [28]:
### Define Functions
def prepare_population_centroids_data():
    # Read LSOA2021_centroids (England and Wales)
    LSOA2021_centroids_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/LSOA2021_centroids/FeatureServer/0"
    LSOA2021_centroids = arcpy.conversion.FeatureClassToFeatureClass(
        LSOA2021_centroids_url, running_gdb, "LSOA2021_centroids"
    )
    arcpy.AddField_management(LSOA2021_centroids, "CODE", "TEXT")
    arcpy.CalculateField_management(LSOA2021_centroids, "CODE", "!LSOA21CD!", "PYTHON3")
    arcpy.AddField_management(LSOA2021_centroids, "TYPE", "TEXT")
    arcpy.CalculateField_management(LSOA2021_centroids, "TYPE", '"LSOA"', "PYTHON3")
    LSOA2021_centroids_fields = arcpy.ListFields(LSOA2021_centroids)
    for field in LSOA2021_centroids_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape"]:
            arcpy.DeleteField_management(LSOA2021_centroids, field.name)

    # Read DZ2011_centroids (Scotland)
    DZ2011_centroids_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/DZ2011_centroids/FeatureServer/0"
    DZ2011_centroids = arcpy.conversion.FeatureClassToFeatureClass(
        DZ2011_centroids_url, running_gdb, "DZ2011_centroids"
    )
    arcpy.AddField_management(DZ2011_centroids, "CODE", "TEXT")
    arcpy.CalculateField_management(DZ2011_centroids, "CODE", "!DataZone!", "PYTHON3")
    arcpy.AddField_management(DZ2011_centroids, "TYPE", "TEXT")
    arcpy.CalculateField_management(DZ2011_centroids, "TYPE", '"DZ"', "PYTHON3")
    DZ2011_centroids_fields = arcpy.ListFields(DZ2011_centroids)
    for field in DZ2011_centroids_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape"]:
            arcpy.DeleteField_management(DZ2011_centroids, field.name)

    # Read SDZ2021_centroids (Northern Ireland)
    SDZ2021_centroids_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/SDZ2021_centroids/FeatureServer/0"
    SDZ2021_centroids_raw = arcpy.TableToTable_conversion(
        SDZ2021_centroids_url, running_gdb, "SDZ2021_centroids_raw_table"
    )
    SDZ2021_centroids = arcpy.management.XYTableToPoint(
        SDZ2021_centroids_raw,
        os.path.join(running_gdb.getOutput(0), "SDZ2021_centroids"),
        "X",
        "Y",
        coordinate_system=arcpy.SpatialReference(29902),
    )
    arcpy.AddField_management(SDZ2021_centroids, "CODE", "TEXT")
    arcpy.CalculateField_management(SDZ2021_centroids, "CODE", "!SDZ2021_code!", "PYTHON3")
    arcpy.AddField_management(SDZ2021_centroids, "TYPE", "TEXT")
    arcpy.CalculateField_management(SDZ2021_centroids, "TYPE", '"SDZ"', "PYTHON3")
    SDZ2021_centroids_fields = arcpy.ListFields(SDZ2021_centroids)
    for field in SDZ2021_centroids_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape"]:
            arcpy.DeleteField_management(SDZ2021_centroids, field.name)

    # Merge Population Centroids
    centroids_to_merge = [
        os.path.join(running_gdb.getOutput(0), "LSOA2021_centroids"),
        os.path.join(running_gdb.getOutput(0), "DZ2011_centroids"),
        os.path.join(running_gdb.getOutput(0), "SDZ2021_centroids"),
    ]
    centroids = arcpy.management.Merge(
        centroids_to_merge, os.path.join(centroids_gdb.getOutput(0), "centroids")
    )
    # Print
    print("Population Centroids Data Prepared")
    
def prepare_boundaries_data():
    # Read LSOA2021_boundaries (England and Wales)
    LSOA2021_boundaries_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/LSOA2021_boundaries/FeatureServer/0"
    LSOA2021_boundaries = arcpy.conversion.FeatureClassToFeatureClass(
        LSOA2021_boundaries_url, running_gdb, "LSOA2021_boundaries"
    )
    arcpy.AddField_management(LSOA2021_boundaries, "CODE", "TEXT")
    arcpy.CalculateField_management(LSOA2021_boundaries, "CODE", "!LSOA21CD!", "PYTHON3")
    arcpy.AddField_management(LSOA2021_boundaries, "NAME", "TEXT")
    arcpy.CalculateField_management(LSOA2021_boundaries, "NAME", "!LSOA21NM!", "PYTHON3")
    arcpy.AddField_management(LSOA2021_boundaries, "TYPE", "TEXT")
    arcpy.CalculateField_management(LSOA2021_boundaries, "TYPE", '"LSOA"', "PYTHON3")
    LSOA2021_boundaries_fields = arcpy.ListFields(LSOA2021_boundaries)
    for field in LSOA2021_boundaries_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape", "NAME", "Shape_Length", "Shape_Area"]:
            arcpy.DeleteField_management(LSOA2021_boundaries, field.name)

    # Read DZ2011_boundaries (Scotland)
    DZ2011_boundaries_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/DZ2011_boundaries/FeatureServer/0"
    DZ2011_boundaries = arcpy.conversion.FeatureClassToFeatureClass(
        DZ2011_boundaries_url, running_gdb, "DZ2011_boundaries"
    )
    arcpy.AddField_management(DZ2011_boundaries, "CODE", "TEXT")
    arcpy.CalculateField_management(DZ2011_boundaries, "CODE", "!DataZone!", "PYTHON3")
    arcpy.AlterField_management(DZ2011_boundaries, "Name", "old_name", "old_name")
    arcpy.AddField_management(DZ2011_boundaries, "NAME", "TEXT")
    arcpy.CalculateField_management(DZ2011_boundaries, "NAME", "!old_name!", "PYTHON3")
    arcpy.AddField_management(DZ2011_boundaries, "TYPE", "TEXT")
    arcpy.CalculateField_management(DZ2011_boundaries, "TYPE", '"DZ"', "PYTHON3")
    DZ2011_boundaries_fields = arcpy.ListFields(DZ2011_boundaries)
    for field in DZ2011_boundaries_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape", "NAME", "Shape_Length", "Shape_Area"]:
            arcpy.DeleteField_management(DZ2011_boundaries, field.name)

    # Read SDZ2021_boundaries (Northern Ireland)
    SDZ2021_boundaries_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/SDZ2021_boundaries/FeatureServer/0"
    SDZ2021_boundaries = arcpy.conversion.FeatureClassToFeatureClass(
        SDZ2021_boundaries_url, running_gdb, "SDZ2021_boundaries"
    )
    arcpy.AddField_management(SDZ2021_boundaries, "CODE", "TEXT")
    arcpy.CalculateField_management(SDZ2021_boundaries, "CODE", "!SDZ2021_cd!", "PYTHON3")
    arcpy.AddField_management(SDZ2021_boundaries, "NAME", "TEXT")
    arcpy.CalculateField_management(SDZ2021_boundaries, "NAME", "!SDZ2021_nm!", "PYTHON3")
    arcpy.AddField_management(SDZ2021_boundaries, "TYPE", "TEXT")
    arcpy.CalculateField_management(SDZ2021_boundaries, "TYPE", '"SDZ"', "PYTHON3")
    SDZ2021_boundaries_fields = arcpy.ListFields(SDZ2021_boundaries)
    for field in SDZ2021_boundaries_fields:
        if field.name not in ["CODE", "TYPE", "OBJECTID", "Shape", "NAME", "Shape_Length", "Shape_Area"]:
            arcpy.DeleteField_management(SDZ2021_boundaries, field.name)

    # Merge Boundaires
    boundaries_to_merge = [
        os.path.join(running_gdb.getOutput(0), "LSOA2021_boundaries"),
        os.path.join(running_gdb.getOutput(0), "DZ2011_boundaries"),
        os.path.join(running_gdb.getOutput(0), "SDZ2021_boundaries"),
    ]
    boundaries = arcpy.management.Merge(
        boundaries_to_merge, os.path.join(boundaries_gdb.getOutput(0), "boundaries")
    )
    # Print
    print("Boundaries Data Prepared")

def prepare_ww_space_data():
    spaces_url = "https://services3.arcgis.com/xDMb8Us7jzsHQ7bn/arcgis/rest/services/WW_Spaces_for_map_data/FeatureServer/0"
    spaces = arcpy.conversion.FeatureClassToFeatureClass(spaces_url, spaces_gdb, "spaces")
    arcpy.AddField_management(spaces, "Monday", "TEXT")
    arcpy.CalculateField_management(spaces, "Monday", "!monday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Tuesday", "TEXT")
    arcpy.CalculateField_management(spaces, "Tuesday", "!tuesday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Wednesday", "TEXT")
    arcpy.CalculateField_management(spaces, "Wednesday", "!wednesday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Thursday", "TEXT")
    arcpy.CalculateField_management(spaces, "Thursday", "!thursday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Friday", "TEXT")
    arcpy.CalculateField_management(spaces, "Friday", "!friday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Saturday", "TEXT")
    arcpy.CalculateField_management(spaces, "Saturday", "!saturday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Sunday", "TEXT")
    arcpy.CalculateField_management(spaces, "Sunday", "!sunday!", "PYTHON3")
    arcpy.AddField_management(spaces, "Space_Open", "TEXT")
    arcpy.CalculateField_management(spaces, "Space_Open", "!Space_Open_!", "PYTHON3")
    arcpy.AddField_management(spaces, "Space_From", "TEXT")
    arcpy.CalculateField_management(spaces, "Space_From", "!Space_from_2022!", "PYTHON3")
    spaces_fields = arcpy.ListFields(spaces)
    for field in spaces_fields:
        if field.name not in ["OBJECTID","Shape", "Name", "Space_Type", "Description", "Region", "Postcode", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", "Space_Open", "Hide", "Space_From", "Open_Space_Page"]:
            arcpy.DeleteField_management(spaces, field.name)
    # Print
    print("Warm Welcome Spaces List Prepared")
    
def calculate_census_data():
    # Read Census Data
    LSOA2021_household_composition_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/559506e1399f4d2bb90722369d3feb49/data")
    SDZ2021_household_composition_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/bfb6e7b747cc4431a08271da41bcaed7/data")
    DZ2011_household_composition_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/3680a5405a58459d8530e8127d608fd3/data")
    LSOA2021_ethnic_group_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/983853858aa2495594d0b4920f5e5d51/data")
    SDZ2021_ethnic_group_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/2a103898869c4eb889ab2650c2608cc8/data")
    DZ2011_ethnic_group_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/91210ca62c4349caacd36afec57af35e/data")
    LSOA2021_disability_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/7dbd6768e023401888b72a27d5bbcbb4/data")
    SDZ2021_disability_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/187862478c2e4f82ac56bfd4fbd76268/data")   
    DZ2011_disability_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/ab3ebb587a014e058da63acd1de18f3f/data")
    LSOA2021_children_in_household_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/802531e1c41b4b02a3d0345291de832a/data")
    SDZ2021_children_in_household_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/fbbdf647ca8b41a2b56fc3905f9eb2a0/data")
    DZ2011_children_in_household_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/98f295d623b34a7e927f429ed0b722ba/data")
    LSOA2021_population_density_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/03743a238669499a9cbf78840bcaebc6/data")
    SDZ2021_population_density_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/f9d9df321c004725aea865103bb39e48/data")   
    DZ2011_population_density_raw = pd.read_csv("https://uclondon.maps.arcgis.com/sharing/rest/content/items/68612a500d6b4a6e8961d93500460db8/data")

    # Calculate Ratio
    def calculate_target_ratio(
        df, target, areas_code_col, target_col, observation_col, type
    ):
        df[target_col] = df[target_col].str.lower()
        target_df = df[df[target_col].str.contains(target)]
        target_ratio = (
            target_df.groupby(areas_code_col)[observation_col].sum()
            / df.groupby(areas_code_col)[observation_col].sum()
        )
        result_df = target_ratio.reset_index(name="RATIO")
        result_df = result_df.rename(columns={areas_code_col: "CODE"})
        result_df["TYPE"] = type
        return result_df

    def calculate_target_ratio_DZ(df, target, areas_code_col, total_col, type):
        columns_of_interest = [col for col in df.columns if target in col.lower()]
        result_df = pd.DataFrame()
        result_df["CODE"] = df[areas_code_col]
        result_df["sum"] = df[columns_of_interest].sum(axis=1)
        result_df["RATIO"] = result_df["sum"] / df[total_col]
        result_df["TYPE"] = type
        result_df = result_df[["CODE", "RATIO", "TYPE"]]
        return result_df
    
    # Clean Population Density
    def clean_population_density(df, target, areas_code_col, type):
        columns_of_interest = [col for col in df.columns if target in col.lower()]
        result_df = pd.DataFrame()
        result_df["CODE"] = df[areas_code_col]
        if type == "LSOA":
            result_df["population_density"] = df[columns_of_interest] / 100
        else:
            result_df["population_density"] = df[columns_of_interest]
        result_df["TYPE"] = type
        result_df = result_df[["CODE", "TYPE", "population_density"]]
        return result_df
    
    lone_parent_LSOA = calculate_target_ratio(
        LSOA2021_household_composition_raw,
        "lone parent",
        "Lower layer Super Output Areas Code",
        "Household composition (15 categories)",
        "Observation",
        "LSOA",
    )

    bangladeshi_LSOA = calculate_target_ratio(
        LSOA2021_ethnic_group_raw,
        "bangladeshi",
        "Lower layer Super Output Areas Code",
        "Ethnic group (20 categories)",
        "Observation",
        "LSOA",
    )

    pakistani_LSOA = calculate_target_ratio(
        LSOA2021_ethnic_group_raw,
        "pakistani",
        "Lower layer Super Output Areas Code",
        "Ethnic group (20 categories)",
        "Observation",
        "LSOA",
    )

    disability_LSOA = calculate_target_ratio(
        LSOA2021_disability_raw,
        "day-to-day activities limited a",
        "Lower layer Super Output Areas Code",
        "Disability (5 categories)",
        "Observation",
        "LSOA",
    )

    children_LSOA = calculate_target_ratio(
        LSOA2021_children_in_household_raw,
        "three or more children",
        "Lower layer Super Output Areas Code",
        "Adults and children in household (11 categories)",
        "Observation",
        "LSOA",
    )
    
    population_density_LSOA = clean_population_density(
        LSOA2021_population_density_raw,
        "observation",
        "Lower layer Super Output Areas Code",
        "LSOA",
    )
    
    lone_parent_SDZ = calculate_target_ratio(
        SDZ2021_household_composition_raw,
        "lone parent",
        "Census 2021 Super Data Zone Code",
        "Household Composition Label",
        "Count",
        "SDZ",
    )

    pakistani_SDZ = calculate_target_ratio(
        SDZ2021_ethnic_group_raw,
        "pakistani",
        "Census 2021 Super Data Zone Code",
        "Ethnic Group Label",
        "Count",
        "SDZ",
    )

    disability_SDZ = calculate_target_ratio(
        SDZ2021_disability_raw,
        "day-to-day activities limited a",
        "Census 2021 Super Data Zone Code",
        "Health Problem or Disability (Long-term) Label",
        "Count",
        "SDZ",
    )


    children_SDZ = calculate_target_ratio(
        SDZ2021_children_in_household_raw,
        "three or more children",
        "Census 2021 Super Data Zone Code",
        "Adults and Children Label",
        "Count",
        "SDZ",
    )

    population_density_SDZ = clean_population_density(
        SDZ2021_population_density_raw,
        "population density",
        "Geography code",
        "SDZ",
    )

    children_DZ = calculate_target_ratio_DZ(
        DZ2011_children_in_household_raw,
        "three or more dependent children",
        "Datazone 2011",
        "All families in households",
        "DZ",
    )

    disability_DZ = calculate_target_ratio_DZ(
        DZ2011_disability_raw,
        "day-to-day activities limited a",
        "Datazone 2011",
        "All people",
        "DZ",
    )

    lone_parent_DZ = calculate_target_ratio_DZ(
        DZ2011_household_composition_raw,
        "lone parent",
        "Datazone 2011",
        "All households",
        "DZ",
    )

    pakistani_DZ = calculate_target_ratio_DZ(
        DZ2011_ethnic_group_raw, "pakistani", "Datazone 2011", "All people", "DZ"
    )

    bangladeshi_DZ = calculate_target_ratio_DZ(
        DZ2011_ethnic_group_raw, "bangladeshi", "Datazone 2011", "All people", "DZ"
    )

    population_density_DZ = clean_population_density(
        DZ2011_population_density_raw,
        "density",
        "Datazone 2011",
        "DZ",
    )
    
    # Merge Census Data
    lone_parent = pd.concat([lone_parent_LSOA, lone_parent_SDZ, lone_parent_DZ], axis=0)
    lone_parent.reset_index(drop=True, inplace=True)
    pakistani = pd.concat([pakistani_LSOA, pakistani_SDZ, pakistani_DZ], axis=0)
    pakistani.reset_index(drop=True, inplace=True)
    bangladeshi = pd.concat([bangladeshi_LSOA, bangladeshi_DZ], axis=0)
    bangladeshi.reset_index(drop=True, inplace=True)
    disability = pd.concat([disability_LSOA, disability_SDZ, disability_DZ], axis=0)
    disability.reset_index(drop=True, inplace=True)
    children = pd.concat([children_LSOA, children_SDZ, children_DZ], axis=0)
    children.reset_index(drop=True, inplace=True)
    population_density = pd.concat([population_density_LSOA, population_density_SDZ, population_density_DZ], axis=0)
    population_density.reset_index(drop=True, inplace=True)
    lone_parent.rename(columns={"RATIO": "lone_parent"}, inplace=True)
    pakistani.rename(columns={"RATIO": "pakistani"}, inplace=True)
    bangladeshi.rename(columns={"RATIO": "bangladeshi"}, inplace=True)
    disability.rename(columns={"RATIO": "disability"}, inplace=True)
    children.rename(columns={"RATIO": "children"}, inplace=True)
    pakistani.drop(columns=["TYPE"], errors="ignore", inplace=True)
    bangladeshi.drop(columns=["TYPE"], errors="ignore", inplace=True)
    disability.drop(columns=["TYPE"], errors="ignore", inplace=True)
    children.drop(columns=["TYPE"], errors="ignore", inplace=True)
    population_density.drop(columns=["TYPE"], errors="ignore", inplace=True)
    census = pd.merge(lone_parent, pakistani, on="CODE", how="outer", suffixes=('_lone_parent', '_pakistani'))
    census = pd.merge(census, bangladeshi, on="CODE", how="outer", suffixes=('', '_bangladeshi'))
    census = pd.merge(census, disability, on="CODE", how="outer", suffixes=('', '_disability'))
    census = pd.merge(census, children, on="CODE", how="outer", suffixes=("", "_children"))
    census = pd.merge(census, population_density, on="CODE", how="outer", suffixes=("", "_population_density"))
    census.reset_index(drop=True, inplace=True)
    census.dropna(how='all', inplace=True)
    census.to_csv(rf"{folder}\census.csv", index=False)
    print("Census Data Prepared")
    
def split_by_count(input_fc, count=999):
    # Get a list of OBJECTIDs from the input feature class
    objectids = [row[0] for row in arcpy.da.SearchCursor(input_fc, "OBJECTID")]
    total_count = len(objectids)
    # If the total count is less than or equal to the specified count, exit the function
    if total_count <= count:
        return
    # Create a feature layer from the input feature class
    arcpy.management.MakeFeatureLayer(input_fc, "temp_layer")
    # Retrieve the directory and the name of the input feature class
    folder_path, fc_name = os.path.split(input_fc)
    fc_name_noext = os.path.splitext(fc_name)[0]  # Get the name without the extension
    start_index = 0
    chunk_num = 1
    while start_index < total_count:
        end_index = start_index + count
        if end_index > total_count:
            end_index = total_count
        current_ids = objectids[start_index:end_index]
        sql_query = "OBJECTID IN ({})".format(",".join(map(str, current_ids)))
        # Select the subset of features
        arcpy.management.SelectLayerByAttribute(
            "temp_layer", "NEW_SELECTION", sql_query
        )
        # Name for the output feature class
        output_fc = os.path.join(folder_path, f"{fc_name_noext}_{chunk_num}")
        # Export the selected features to a new feature class
        arcpy.management.CopyFeatures("temp_layer", output_fc)
        start_index = end_index
        chunk_num += 1
    # Remove the original feature class and the temporary feature layer
    arcpy.management.Delete(input_fc)
    arcpy.management.Delete("temp_layer")

def FeatureClassGenerator(workspace, wild_card, feature_type, recursive):
    with arcpy.EnvManager(workspace=workspace):
        dataset_list = [""]
        if recursive:
            datasets = arcpy.ListDatasets()
            dataset_list.extend(datasets)
            for dataset in dataset_list:
                featureclasses = arcpy.ListFeatureClasses(
                    wild_card, feature_type, dataset
                )
                for fc in featureclasses:
                    yield os.path.join(workspace, dataset, fc), fc

def get_latest_feature_dataset_in_gdb(gdb_path):
    arcpy.env.workspace = gdb_path
    feature_datasets = arcpy.ListDatasets()
    if not feature_datasets:
        return None
    return feature_datasets[-1]

def sa_analysis(_folder_):
    # To allow overwriting outputs change overwriteOutput option to True.
    arcpy.env.overwriteOutput = True
    Network_Data_Source = "https://www.arcgis.com/"
    _spaces_gdb = rf"{folder}\spaces.gdb"
    _folder_ = rf"{folder}"
    _sa_gdb = rf"{folder}\sa.gdb"
    _working_gdb = arcpy.management.CreateFileGDB(
        out_folder_path=_folder_, out_name="working"
    )[0]
    for point, names in FeatureClassGenerator(_spaces_gdb, "", "", "NOT_RECURSIVE"):
        # Process: Make Service Area Analysis Layer (Make Service Area Analysis Layer)
        arcpy.env.workspace = rf"{folder}\working.gdb"
        Service_Area_layer = arcpy.na.MakeServiceAreaAnalysisLayer(
            network_data_source=Network_Data_Source,
            travel_mode="Walking Time",
            cutoffs=[30],
            time_of_day="2023/9/1",
            polygon_detail="STANDARD",
            geometry_at_overlaps="OVERLAP",
            polygon_trim_distance="50 Meters",
            geometry_at_cutoffs="DISKS",
        )[0]
        # Process: Add Facilities (Add Locations)
        Service_Area_layer_with_names = arcpy.na.AddLocations(
            in_network_analysis_layer=Service_Area_layer,
            sub_layer="Facilities",
            in_table=point,
        )[0]
        # Check if point is empty
        feature_count = int(arcpy.GetCount_management(point).getOutput(0))
        if feature_count == 0:
            # Create an empty polygon feature class named sa_{names} with two float fields
            output_fc_path = rf"{folder}\sa.gdb\sa_{names}"
            arcpy.CreateFeatureclass_management(
                out_path=_sa_gdb, out_name=f"sa_{names}", geometry_type="POLYGON"
            )
            arcpy.AddField_management(output_fc_path, "FromBreak", "FLOAT")
            arcpy.AddField_management(output_fc_path, "ToBreak", "FLOAT")
        else:
            # Process: Solve (Solve)
            Service_Area_result, Solve_Succeeded = arcpy.na.Solve(
                in_network_analysis_layer=Service_Area_layer_with_names
            )

            # Select_Data
            def find_and_export_sa_features(
                gdb_path, feature_dataset_name, output_folder
            ):
                arcpy.env.workspace = os.path.join(gdb_path, feature_dataset_name)
                feature_classes = arcpy.ListFeatureClasses()
                sa_features = [fc for fc in feature_classes if "SAPolygons" in fc]
                for sa_feature in sa_features:
                    new_name = rf"sa_{names}"
                    output_path = os.path.join(output_folder, new_name)
                    arcpy.CopyFeatures_management(sa_feature, output_path)
            latest_dataset = get_latest_feature_dataset_in_gdb(_working_gdb)
            find_and_export_sa_features(_working_gdb, latest_dataset, _sa_gdb)
            arcpy.Delete_management(os.path.join(_working_gdb, latest_dataset))
    print("Service Area Analysis Completed")
            
def get_common_name(fcs):
    if len(fcs) >= 2:
        return os.path.commonprefix(fcs).rstrip("_")
    else:
        return None

def group_feature_classes_by_field(gdb):
    arcpy.env.workspace = gdb
    grouped_fcs = {}
    # List all feature classes in the geodatabase
    fcs = arcpy.ListFeatureClasses()
    # Iterate over feature classes and group by the field between underscores
    for fc in fcs:
        parts = fc.split("_")
        if len(parts) > 2:
            key_field = parts[1]
            if key_field not in grouped_fcs:
                grouped_fcs[key_field] = []
            grouped_fcs[key_field].append(fc)
    return grouped_fcs

def merge_and_rename_feature_classes_by_group(gdb):
    grouped_fcs = group_feature_classes_by_field(gdb)
    for key, fcs in grouped_fcs.items():
        common_name = get_common_name(fcs)
        if common_name:
            # Merge the feature classes
            temp_output_fc = f"{common_name}_merged"
            arcpy.Merge_management(fcs, temp_output_fc)
            # Rename the merged feature class by removing everything after the last underscore
            final_name = "_".join(temp_output_fc.split("_")[:-1])
            arcpy.Rename_management(temp_output_fc, final_name)
            # Delete the source feature classes
            for fc in fcs:
                arcpy.Delete_management(fc)

def result_calculation(boundaries, centroids, sa_spaces):
    access_count = fr"{folder}\running.gdb\access_count"
    access_count_boundaries = fr"{folder}\running.gdb\access_count_boundaries"
    # Spatial Join
    arcpy.analysis.SpatialJoin(
        target_features=centroids,
        join_features=sa_spaces,
        out_feature_class=access_count,
        search_radius="50 Meters",
    )
    # Field mapping
    field_mappings = arcpy.FieldMappings()
    # Field map for boundaries CODE
    field_map_boundaries_CODE = arcpy.FieldMap()
    field_map_boundaries_CODE.addInputField(boundaries, "CODE")
    field_mappings.addFieldMap(field_map_boundaries_CODE)
    # Field map for boundaries NAME
    field_map_boundaries_NAME = arcpy.FieldMap()
    field_map_boundaries_NAME.addInputField(boundaries, "Name")
    field_mappings.addFieldMap(field_map_boundaries_NAME)
    # Field map for boundaries NAME
    field_map_boundaries_TYPE = arcpy.FieldMap()
    field_map_boundaries_TYPE.addInputField(boundaries, "TYPE")
    field_mappings.addFieldMap(field_map_boundaries_TYPE)
    # Field map for Count
    join_count_field_map = arcpy.FieldMap()
    join_count_field_map.addInputField(access_count, "Join_Count")
    join_count_field = join_count_field_map.outputField
    join_count_field.name = "COUNT"
    join_count_field.aliasName = "COUNT"
    join_count_field_map.outputField = join_count_field
    field_mappings.addFieldMap(join_count_field_map)
    # Spatial Join
    arcpy.analysis.SpatialJoin(
        target_features=boundaries,
        join_features=access_count,
        out_feature_class=access_count_boundaries,
        field_mapping=field_mappings,
    )
    # Delete Field
    field_mappings_output = arcpy.FieldMappings()
    field_map_CODE = arcpy.FieldMap()
    field_map_CODE.addInputField(access_count_boundaries, "CODE")
    field_mappings_output.addFieldMap(field_map_CODE)
    field_map_NAME = arcpy.FieldMap()
    field_map_NAME.addInputField(access_count_boundaries, "NAME")
    field_mappings_output.addFieldMap(field_map_NAME)
    field_map_TYPE = arcpy.FieldMap()
    field_map_TYPE.addInputField(access_count_boundaries, "TYPE")
    field_mappings_output.addFieldMap(field_map_TYPE)
    field_map_count = arcpy.FieldMap()
    field_map_count.addInputField(access_count_boundaries, "COUNT")
    field_mappings_output.addFieldMap(field_map_count)
    arcpy.conversion.FeatureClassToFeatureClass(
        in_features=access_count_boundaries,
        out_path=rf"{folder}\running.gdb",
        out_name="accessibility",
        field_mapping=field_mappings_output,
    )
    ### Join Table and Calculate Warm Bank Desert
    census = arcpy.conversion.TableToTable(
        rf"{folder}\census.csv", rf"{folder}\running.gdb", "census"
    )
    temp_field_name = "CODE_TYPE"
    arcpy.AddField_management(rf"{folder}\running.gdb\accessibility", "TYPE_CODE", "TEXT")
    arcpy.AddField_management(rf"{folder}\running.gdb\census", "TYPE_CODE", "TEXT")
    expression = "!TYPE! + '_' + !CODE!"
    arcpy.CalculateField_management(
        rf"{folder}\running.gdb\accessibility", "TYPE_CODE", expression, "PYTHON3"
    )
    arcpy.CalculateField_management(
        rf"{folder}\running.gdb\census", "TYPE_CODE", expression, "PYTHON3"
    )

    arcpy.management.JoinField(
        rf"{folder}\running.gdb\accessibility",
        "TYPE_CODE",
        rf"{folder}\running.gdb\census",
        "TYPE_CODE",
    )
    arcpy.DeleteField_management(rf"{folder}\running.gdb\accessibility", "TYPE_CODE")
    arcpy.DeleteField_management(rf"{folder}\running.gdb\census", "TYPE_CODE")
    fields_to_keep = [
        "CODE",
        "NAME",
        "TYPE",
        "lone_parent",
        "pakistani",
        "bangladeshi",
        "disability",
        "children",
        "population_density"
    ]
    field_to_rename = "COUNT"
    new_field_name = "accessibility"
    field_mappings = arcpy.FieldMappings()
    for field_name in fields_to_keep:
        field_map = arcpy.FieldMap()
        field_map.addInputField(rf"{folder}\running.gdb\accessibility", field_name)
        field_mappings.addFieldMap(field_map)
    field_map = arcpy.FieldMap()
    field_map.addInputField(rf"{folder}\running.gdb\accessibility", field_to_rename)
    output_field = field_map.outputField
    output_field.name = new_field_name
    output_field.aliasName = new_field_name
    field_map.outputField = output_field
    field_mappings.addFieldMap(field_map)
    arcpy.conversion.FeatureClassToFeatureClass(
        in_features=rf"{folder}\running.gdb\accessibility",
        out_path=rf"{folder}\result.gdb",
        out_name="result",
        field_mapping=field_mappings,
    )

    columns = [
        "accessibility",
        "lone_parent",
        "pakistani",
        "bangladeshi",
        "disability",
        "children"
    ]
    wbd_fields = [
        "WBD_lone_parent",
        "WBD_pakistani",
        "WBD_bangladeshi",
        "WBD_disability",
        "WBD_children"
    ]

    for field in wbd_fields:
        if len(arcpy.ListFields(rf"{folder}\result.gdb\result", field)) == 0:
            arcpy.AddField_management(rf"{folder}\result.gdb\result", field, "SHORT")

    mean_values = {}
    for col in columns:
        with arcpy.da.SearchCursor(rf"{folder}\result.gdb\result", col) as cursor:
            values = [row[0] for row in cursor if row[0] is not None]
            mean_values[col] = sum(values) / len(values)
            
    fields_to_update = [
            "lone_parent",
            "WBD_lone_parent",
            "pakistani",
            "WBD_pakistani",
            "bangladeshi",
            "WBD_bangladeshi",
            "disability",
            "WBD_disability",
            "children",
            "WBD_children"
        ]            
    
    def generate_expression(field, wbd_field, mean_value, mean_accessibility):
        expression = f"""Reclass(!{field}!, !{wbd_field}!, {mean_value}, {mean_accessibility})"""
        return expression
    
    code_block = """
def UpdateWBD(value, accessibility, mean_value, mean_accessibility):
    if value is None or accessibility is None:
        return 0
    if value > mean_value and accessibility < mean_accessibility:
        return 1
    else:
        return 0
"""
    
    mean_accessibility = mean_values["accessibility"]

    # lone_parent
    expression_lone_parent = f"UpdateWBD(!lone_parent!, !accessibility!, {mean_values['lone_parent']}, {mean_accessibility})"
    arcpy.CalculateField_management(
        rf"{folder}\result.gdb\result", 
        "WBD_lone_parent", 
        expression_lone_parent, 
        "PYTHON3", 
        code_block
    )

    # pakistani
    expression_pakistani = f"UpdateWBD(!pakistani!, !accessibility!, {mean_values['pakistani']}, {mean_accessibility})"
    arcpy.CalculateField_management(
        rf"{folder}\result.gdb\result", 
        "WBD_pakistani", 
        expression_pakistani, 
        "PYTHON3", 
        code_block
    )

    # bangladeshi
    expression_bangladeshi = f"UpdateWBD(!bangladeshi!, !accessibility!, {mean_values['bangladeshi']}, {mean_accessibility})"
    arcpy.CalculateField_management(
        rf"{folder}\result.gdb\result", 
        "WBD_bangladeshi", 
        expression_bangladeshi, 
        "PYTHON3", 
        code_block
    )

    # disability
    expression_disability = f"UpdateWBD(!disability!, !accessibility!, {mean_values['disability']}, {mean_accessibility})"
    arcpy.CalculateField_management(
        rf"{folder}\result.gdb\result", 
        "WBD_disability", 
        expression_disability, 
        "PYTHON3", 
        code_block
    )

    # children
    expression_children = f"UpdateWBD(!children!, !accessibility!, {mean_values['children']}, {mean_accessibility})"
    arcpy.CalculateField_management(
        rf"{folder}\result.gdb\result", 
        "WBD_children", 
        expression_children, 
        "PYTHON3", 
        code_block
    )
    
    field_mappings = arcpy.FieldMappings()
    final_fields_to_keep = {
        "CODE": "Area_Code",
        "NAME": "Area_Name",
        "TYPE": "Area_Type",
        "accessibility": "Accessibility",
        "population_density": "Population_Density",
        "lone_parent": "Lone_Parent",
        "WBD_lone_parent": "WBD_Lone_Parent",
        "pakistani": "Pakistani",
        "WBD_pakistani": "WBD_Pakistani",
        "bangladeshi": "Bangladeshi",
        "WBD_bangladeshi": "WBD_Bangladeshi",
        "disability": "Disability",
        "WBD_disability": "WBD_Disability",
        "children": "Three_Children",
        "WBD_children": "WBD_Three_Children"
    }
    
    for old_field, new_field in final_fields_to_keep.items():
        arcpy.AlterField_management(rf"{folder}\result.gdb\result", old_field, new_field, new_field)

    field_mappings = arcpy.FieldMappings()
    for new_field in final_fields_to_keep.values():
        field_map = arcpy.FieldMap()
        field_map.addInputField(rf"{folder}\result.gdb\result", new_field)
        field_mappings.addFieldMap(field_map)

    arcpy.FeatureClassToFeatureClass_conversion(rf"{folder}\result.gdb\result", 
                                                rf"{folder}\result.gdb", 
                                                "Warm_Bank_Deserts", 
                                                field_mapping=field_mappings)

    print("Result Calculation Completed")

In [4]:
### Prepare Darasets
with arcpy.EnvManager(
    outputCoordinateSystem='PROJCS["British_National_Grid",GEOGCS["GCS_OSGB_1936",DATUM["D_OSGB_1936",SPHEROID["Airy_1830",6377563.396,299.3249646]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",400000.0],PARAMETER["False_Northing",-100000.0],PARAMETER["Central_Meridian",-2.0],PARAMETER["Scale_Factor",0.9996012717],PARAMETER["Latitude_Of_Origin",49.0],UNIT["Meter",1.0]]',
    scratchWorkspace=folder,
    workspace=folder,
):
    prepare_population_centroids_data()
    prepare_boundaries_data()
    prepare_ww_space_data()
    calculate_census_data()
    split_by_count(rf"{folder}\spaces.gdb\spaces")
    sa_analysis(folder)
    merge_and_rename_feature_classes_by_group(rf"{folder}\sa.gdb")
    print("Prepared!")

Population Centroids Data Prepared
Boundaries Data Prepared
Warm Welcome Spaces List Prepared
Census Data Prepared
Service Area Analysis Completed
Prepared!


In [29]:
### Calculation Results
with arcpy.EnvManager(
    outputCoordinateSystem='PROJCS["British_National_Grid",GEOGCS["GCS_OSGB_1936",DATUM["D_OSGB_1936",SPHEROID["Airy_1830",6377563.396,299.3249646]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",400000.0],PARAMETER["False_Northing",-100000.0],PARAMETER["Central_Meridian",-2.0],PARAMETER["Scale_Factor",0.9996012717],PARAMETER["Latitude_Of_Origin",49.0],UNIT["Meter",1.0]]',
    scratchWorkspace=folder,
    workspace=folder,
):
    result_calculation(
            boundaries=rf"{folder}\boundaries.gdb\boundaries",
            centroids=rf"{folder}\centroids.gdb\centroids",
            sa_spaces=rf"{folder}\sa.gdb\sa_spaces",
        )
    print("Success!")

Result Calculation Completed
Success!
