## clean_data_cross_at_least_three_bases

This notebook is designed to:

- Count how many bases does each feature class presents.
- Copy those feature classes which cross at least 3 bases into a new folder called "03_clean_data_cross_at_least_three_bases".
- Upload the data to AGOL.
- calculate the percentage of data coverage of each base 

In [1]:
import arcpy
import os

In [2]:
source_folder = r"C:\Users\ma000551\Desktop\AFRICOM\AFRICOM_CLEAN_DATA\02_raw_no_duplicates_or_empty_feature_layers"

clean_gdbs_path = [
    fr"{source_folder}\Chebelley_40_MASTER.gdb",
    #fr"{source_folder}\CIP_311_AB201.gdb",
    #fr"{source_folder}\Niamey_SDSFIE_311_Master_9Jan2023.gdb",
    fr"{source_folder}\SDSFIE_05Apr2023.gdb",
    # update
    fr"{source_folder}\AB201_JAN2024.gdb",
    fr"{source_folder}\Niamey_SDSFIE_311_Master_18MAY2023.gdb"
]

#### create "03_clean_data_cross_at_least_three_bases" folder

In [4]:
basefolder = r"C:\Users\ma000551\Desktop\AFRICOM\AFRICOM_CLEAN_DATA"

# Name of the folder to be created
output_folder_name = "03_clean_data_cross_at_least_three_bases"

# Path to the new folder
new_folder_path = os.path.join(basefolder, output_folder_name)

# Check if the directory already exists
if not os.path.exists(new_folder_path):
    # Create the directory if it doesn't exist
    os.makedirs(new_folder_path)

#### prints out how many bases does each feature class presents

In [12]:
# Dictionary to store encountered feature class names and the count of their occurrences
fc_counts = {}

# Dictionary to store feature class names and the list of geodatabase names where they occur
fc_gdbs = {}

# Loop through the rest of the geodatabases
for gdb in clean_gdbs_path:
    # Set the workspace for the current gdb
    arcpy.env.workspace = gdb
    gdb_name = os.path.basename(arcpy.env.workspace)
    
    # list all datasets in gdbs
    feature_fds = arcpy.ListDatasets("", "")
    
    # if the database has datasets
    if feature_fds:
        for fd in feature_fds:
            
            # list all the feature classes
            fcs = arcpy.ListFeatureClasses("*", "ALL", fd)

    else:
        # List all feature classes in the current gdb
        fcs = arcpy.ListFeatureClasses()

        # Check if fcs is not None
        if fcs:
            # Check each feature class
            for fc in fcs:
                # Convert feature class name to lowercase
                fc_lower = fc.lower()  
                # Increment count for the feature class name in fc_counts
                fc_counts[fc_lower] = fc_counts.get(fc_lower, 0) + 1
                # Add the current gdb name to the list of geodatabase names for the feature class
                fc_gdbs.setdefault(fc_lower, set()).add(gdb_name)
                

In [13]:
fc_counts

{'installation_p': 2, 'installation_a': 4, 'site_p': 1, 'site_a': 1, 'zones_a_12_2022': 1, 'emergencymedicalpoint_p': 1, 'wall_l': 4, 'wall_a': 2, 'controlmonument_p': 2, 'esqdarc_a': 2, 'impactarea_a': 1, 'militarylandingzone_a': 1, 'militaryobservationposition_p': 1, 'spentmunitionsstorage_p': 1, 'spentmunitionsstorage_a': 2, 'pavementslab_a': 4, 'airaccidentpotentialzone_a': 1, 'airfieldimaginarysurface_a': 4, 'designatedtobaccouse_p': 1, 'futureprojectsite_a': 2, 'landmanagementzone_a': 2, 'landuse_a': 2, 'standoffdistancearc_a': 1, 'building_a': 4, 'structure_a': 4, 'tower_p': 2, 'tower_a': 3, 'accesscontrol_l': 4, 'accesscontrol_p': 4, 'fence_l': 4, 'restrictedarea_a': 1, 'vehicleparking_a': 2, 'pavementmarking_a': 2, 'pavementmarking_l': 1, 'airfield_a': 2, 'roadway_l': 1, 'roadway_a': 2, 'roadcenterline_l': 3, 'trafficcontrolpostsign_p': 2, 'airfield_l': 2, 'caccesspoint_p': 1, 'cconduit_l': 1, 'cfibercable_l': 1, 'eairfieldlight_p': 2, 'eexteriorlight_p': 4, 'egenerator_p': 4,

In [5]:
fc_gdbs

{'Installation_P': {'Chebelley_40_MASTER.gdb'}, 'Installation_A': {'CIP_311_AB201.gdb', 'Chebelley_40_MASTER.gdb', 'SDSFIE_05Apr2023.gdb', 'Niamey_SDSFIE_311_Master_9Jan2023.gdb'}, 'Site_P': {'Chebelley_40_MASTER.gdb'}, 'Site_A': {'Chebelley_40_MASTER.gdb'}, 'Zones_A_12_2022': {'Chebelley_40_MASTER.gdb'}, 'EmergencyMedicalPoint_P': {'Chebelley_40_MASTER.gdb'}, 'Wall_L': {'CIP_311_AB201.gdb', 'Chebelley_40_MASTER.gdb', 'SDSFIE_05Apr2023.gdb', 'Niamey_SDSFIE_311_Master_9Jan2023.gdb'}, 'Wall_A': {'CIP_311_AB201.gdb', 'Chebelley_40_MASTER.gdb'}, 'ControlMonument_P': {'Chebelley_40_MASTER.gdb'}, 'EsqdArc_A': {'Chebelley_40_MASTER.gdb'}, 'ImpactArea_A': {'Chebelley_40_MASTER.gdb'}, 'MilitaryLandingZone_A': {'Chebelley_40_MASTER.gdb'}, 'MilitaryObservationPosition_P': {'Chebelley_40_MASTER.gdb'}, 'SpentMunitionsStorage_P': {'Chebelley_40_MASTER.gdb'}, 'SpentMunitionsStorage_A': {'Chebelley_40_MASTER.gdb'}, 'PavementSlab_A': {'CIP_311_AB201.gdb', 'Chebelley_40_MASTER.gdb', 'SDSFIE_05Apr2023.gd

#### Copy those feature classes which cross at least 3 bases into "03_clean_data_cross_at_least_three_bases".

In [17]:
# loop through the counts to find feature classes occurring at least 3 times
for fc_name, count in fc_counts.items():
    
    if count >= 3:
        # Loop through the set of gdb names for the current feature class
        for gdb_name in fc_gdbs[fc_name]:
            # Create a folder with the source gdb name if it doesn't exist
            gdb_output_folder = os.path.join(new_folder_path, gdb_name)
            if not arcpy.Exists(gdb_output_folder):
                arcpy.management.CreateFileGDB(os.path.dirname(gdb_output_folder), gdb_name)


            # Copy the feature class to the output folder
            arcpy.conversion.FeatureClassToFeatureClass(os.path.join(source_folder, gdb_name, fc_name), gdb_output_folder, os.path.basename(fc_name))

            print(f"finishing copied {fc_name} to {gdb_name}")
print("Completed")

finishing copied installation_a to AB201_JAN2024.gdb
finishing copied installation_a to Niamey_SDSFIE_311_Master_18MAY2023.gdb
finishing copied installation_a to Chebelley_40_MASTER.gdb
finishing copied installation_a to SDSFIE_05Apr2023.gdb
finishing copied wall_l to AB201_JAN2024.gdb
finishing copied wall_l to Niamey_SDSFIE_311_Master_18MAY2023.gdb
finishing copied wall_l to Chebelley_40_MASTER.gdb
finishing copied wall_l to SDSFIE_05Apr2023.gdb
finishing copied pavementslab_a to AB201_JAN2024.gdb
finishing copied pavementslab_a to Niamey_SDSFIE_311_Master_18MAY2023.gdb
finishing copied pavementslab_a to Chebelley_40_MASTER.gdb
finishing copied pavementslab_a to SDSFIE_05Apr2023.gdb
finishing copied airfieldimaginarysurface_a to AB201_JAN2024.gdb
finishing copied airfieldimaginarysurface_a to Niamey_SDSFIE_311_Master_18MAY2023.gdb
finishing copied airfieldimaginarysurface_a to Chebelley_40_MASTER.gdb
finishing copied airfieldimaginarysurface_a to SDSFIE_05Apr2023.gdb
finishing copied

#### copy culvert data into new folder
Since the culvert data is named differently in different databases, it may not appear in the previous result. This part is to find and copy feature classes containing "culvert" from geodatabases into new folder.

In [5]:
# Loop through the rest of the geodatabases
for gdb in clean_gdbs_path:
    # Set the workspace for the current gdb
    arcpy.env.workspace = gdb
    gdb_name = os.path.basename(arcpy.env.workspace)
    
    # Define the output folder path
    output_folder = os.path.join(new_folder_path, gdb_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # List all datasets in gdbs
    feature_fds = arcpy.ListDatasets("", "")
    
    # If the database has datasets
    if feature_fds:
        for fd in feature_fds:
            # List all the feature classes
            fcs = arcpy.ListFeatureClasses("*culvert*", "ALL", fd)  # Using wildcard to find similar names
            for fc in fcs:
                arcpy.conversion.FeatureClassToFeatureClass(os.path.join(gdb, fd, fc), output_folder, os.path.basename(fc))
                print(f"Finished copying {fc} to {output_folder}")

    else:
        # List all feature classes in the current gdb
        fcs = arcpy.ListFeatureClasses("*culvert*", "ALL")  # Using wildcard to find similar names
        for fc in fcs:
            arcpy.conversion.FeatureClassToFeatureClass(os.path.join(gdb, fc), output_folder, os.path.basename(fc))
            print(f"Finished copying {fc} to {output_folder}")


Finished copying SwCulvert_L to C:\Users\ma000551\Desktop\AFRICOM\AFRICOM_CLEAN_DATA\03_clean_data_cross_at_least_three_bases\Chebelley_40_MASTER.gdb
Finished copying SwCulvert_L to C:\Users\ma000551\Desktop\AFRICOM\AFRICOM_CLEAN_DATA\03_clean_data_cross_at_least_three_bases\AB201_JAN2024.gdb
Finished copying dCulvert_L to C:\Users\ma000551\Desktop\AFRICOM\AFRICOM_CLEAN_DATA\03_clean_data_cross_at_least_three_bases\Niamey_SDSFIE_311_Master_18MAY2023.gdb


#### rename GDBs

| Original                    | Rename                              |
|-----------------------------|-------------------------------------|
| AB201_JAN2024               | manu_dayek_agadez_2024-01          |
| Niamey_SDSFIE_311_Master_18MAY2023 | diori_hamani_niamey_2023-05   |
| CIP_311_AB201               | manu_dayek_agadez_2022-04          |
| Niamey_SDSFIE_311_Master_9Jan2023 | diori_hamani_niamey_2023-01   |
| Chebelley_40_MASTER         | chabelley_2023-03                   |
| SDSFIE_05Apr2023            | manda_bay_2023-04                   |


#### upload to AGOL


### calculate the coverage check of each base
create fields "culvert", "building", "impervious"
- if data present : 1
- if data not present AND visual inspect present : -1
- if data not present AND visual inspect not present : 0
- To narrow the scope, we focused on the area where we have data, if out of the scope: Null

Then create fields "has_data", "lack_data" to present the above data

In [3]:
arcpy.env.workspace = r'C:\Users\ma000551\Desktop\AFRICOM\FC_check\FC_check.gdb'
# fishnets
feature_classes = ["fishnet_chabelley", "fishnet_AB201", "fishnet_Niamey", "fishnet_manda_bay"]

for fc in feature_classes:
    with arcpy.da.UpdateCursor(fc, ["culvert", "building", "impervious", "has_data", "lack_data"]) as cursor:
        for row in cursor:
            
            # if all the value are null, skip those rows
            if all(field_value is None for field_value in row[:3]):
                continue  
            
            # initialize counters
            has_count = 0
            lack_count = 0
            
            # check if any field is 1 or -1
            for field_value in row[:3]:
                if field_value == 1:
                    has_count += 1  # if 1，has_count +1
                elif field_value == -1:
                    lack_count += 1  # if -1，lack_count +1
                    
            # assign the counts to the respective fields
            row[3] = has_count
            row[4] = lack_count
            
            cursor.updateRow(row)


#### Calculate the percentage of lack data count (where was assigned by -1) / not null grid count 

In [4]:
arcpy.env.workspace = r'C:\Users\ma000551\Desktop\AFRICOM\FC_check\FC_check.gdb'
# feature classes
feature_classes = ["fishnet_chabelley", "fishnet_AB201", "fishnet_Niamey", "fishnet_manda_bay"]

# attribute fields
attribute_fields = ["culvert", "building", "impervious", "has_data", "lack_data"]

for fc in feature_classes:
    print(f"---{fc}---")

    for field in attribute_fields:

        if field == "lack_data":
            # select rows where "lack_data" is not equal to 0 and not null
            where_clause = f"{field} IS NOT NULL AND {field} <> 0 "

            lack_data = arcpy.management.SelectLayerByAttribute(fc, "NEW_SELECTION", where_clause)

            # count the selected features where "lack_data" is not equal to 0
            lack_data_count = int(arcpy.management.GetCount(lack_data).getOutput(0))
            
            # calculate total count of features where field is not null
            total_count = arcpy.management.SelectLayerByAttribute(fc, "NEW_SELECTION", f"{field} IS NOT NULL")
            
            
            total_count_non_null = int(arcpy.management.GetCount(total_count).getOutput(0))
            
            print ("lack data count: ", lack_data_count)
            print ("non null total count: ", total_count_non_null)

            # calculate the ratio
            lack_data_ratio = lack_data_count / total_count_non_null
            print(f"lack data ratio: {lack_data_ratio:.2%}")
            print("")


---fishnet_chabelley---
lack data count:  37
non null total count:  442
lack data ratio: 8.37%
﻿
---fishnet_AB201---
lack data count:  25
non null total count:  1108
lack data ratio: 2.26%
﻿
---fishnet_Niamey---
lack data count:  174
non null total count:  1327
lack data ratio: 13.11%
﻿
---fishnet_manda_bay---
lack data count:  84
non null total count:  711
lack data ratio: 11.81%
﻿


### convert layer to shapfile

In [6]:
source_folder = r"C:\Users\ma000551\Desktop\AFRICOM\02_raw_no_duplicates_or_empty_feature_layers"

clean_gdbs_path = [
    fr"{source_folder}\Chebelley_40_MASTER.gdb",
    fr"{source_folder}\CIP_311_AB201.gdb",
    fr"{source_folder}\Niamey_SDSFIE_311_Master_9Jan2023.gdb",
    fr"{source_folder}\SDSFIE_05Apr2023.gdb"
]

output_folder = r"C:\Users\ma000551\Desktop\AFRICOM\Shapefiles" 

In [7]:
def fc_shp(gdb_path, out_folder, selected_fcs):
    arcpy.env.workspace = gdb_path
    
    for fc in selected_fcs:
        out_name = os.path.join(out_folder, os.path.splitext(fc)[0] + ".shp")
        arcpy.conversion.FeatureClassToShapefile(fc, out_folder)
        print(f"{fc} converted to Shapefile successfully!")

In [8]:
# print all gdbs
print("All geodatabase:")
for i, gdb_path in enumerate(clean_gdbs_path):
    print(f"{i + 1}. {gdb_path}")

# choose gdb
choice = int(input("Enter the number of the geodatabase you want to convert: "))
gdb_path = clean_gdbs_path[choice - 1]

# set the workspace to selected gdb
arcpy.env.workspace = gdb_path

# select all feature classes
feature_classes = arcpy.ListFeatureClasses()

# print which feature classes are in the gdb
print(f"Available feature classes in {os.path.basename(gdb_path)}:")
for i, fc in enumerate(feature_classes):
    print(f"{i + 1}. {fc}")

# select the fcs
selected_choices = input("Enter the numbers of the feature classes you want to convert (separated by comma): ")
selected_choices = list(map(int, selected_choices.split(',')))
selected_fcs = [feature_classes[choice - 1] for choice in selected_choices]

# create folder
gdb_name = os.path.splitext(os.path.basename(gdb_path))[0]
out_folder = os.path.join(output_folder, gdb_name)
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
# execute the function    
fc_shp(gdb_path, out_folder, selected_fcs)

All geodatabase:
1. C:\Users\ma000551\Desktop\AFRICOM\02_raw_no_duplicates_or_empty_feature_layers\Chebelley_40_MASTER.gdb
2. C:\Users\ma000551\Desktop\AFRICOM\02_raw_no_duplicates_or_empty_feature_layers\CIP_311_AB201.gdb
3. C:\Users\ma000551\Desktop\AFRICOM\02_raw_no_duplicates_or_empty_feature_layers\Niamey_SDSFIE_311_Master_9Jan2023.gdb
4. C:\Users\ma000551\Desktop\AFRICOM\02_raw_no_duplicates_or_empty_feature_layers\SDSFIE_05Apr2023.gdb
Enter the number of the geodatabase you want to convert: 1
Available feature classes in Chebelley_40_MASTER.gdb:
1. Installation_P
2. Installation_A
3. Site_P
4. Site_A
5. Zones_A_12_2022
6. EmergencyMedicalPoint_P
7. Wall_L
8. Wall_A
9. ControlMonument_P
10. EsqdArc_A
11. ImpactArea_A
12. MilitaryLandingZone_A
13. MilitaryObservationPosition_P
14. SpentMunitionsStorage_P
15. SpentMunitionsStorage_A
16. PavementSlab_A
17. AirAccidentPotentialZone_A
18. AirfieldImaginarySurface_A
19. DesignatedTobaccoUse_P
20. FutureProjectSite_A
21. LandManagementZ