# **Import and Set Paths**

In [None]:
!pip install geopandas
!pip install shapely
!pip install geofeather

In [None]:
import os
import sys
import pandas as pd
import warnings
import os
import sys
from io import StringIO

import warnings
warnings.filterwarnings("ignore", message="Expecting property name enclosed in double quotes: line 1 column 2 (char 1)", module="fiona.ogrext")

# Create a custom stream class to filter out specific messages
class FilteredStream:
    def __init__(self, stream):
        self.stream = stream
        self.buffer = StringIO()

    def write(self, data):
        if "WARNING:fiona.ogrext:Expecting property name enclosed in double quotes" not in data:
            self.stream.write(data)
        else:
            self.buffer.write(data)  # Capture discarded data if needed

    def flush(self):
        self.stream.flush()

# Replace sys.stderr with our filtered stream
sys.stderr = FilteredStream(sys.stderr)


# **Weights**

## Communications

### Cell Towers

**Dataset: Cellular Towers** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::cellular-towers-1/about

In [None]:
import geopandas as gpd
import pandas as pd
import os
import time

# Define the weights for StrucType
struc_type_weights = {
    'GTOWER': 0.100,
    'LTOWER': 0.110,
    'MAST': 0.050,
    'POLE': 0.040,
    'MTOWER': 0.060,
    'TANK': 0.030,
    'B': 0.020,
    'BTWR': 0.005,
    'UPOLE': 0.010,
    'TOWER': 0.160,
    'Other': 0.345,  # Represents the sum of other structure types
    'Blank': 0.070
}

# Define the weight for LicStatus
lic_status_weights = {
    'A': 0.500,  # Active
    'Inactive': 0.000  # Assuming that non-active statuses are not as important
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Function to calculate the weight for each row
def calculate_tower_weight(row):
    # Get the weight for the structure type
    struc_type_weight = struc_type_weights.get(row['StrucType'], struc_type_weights['Other'])

    # Get the weight for the license status
    lic_status_weight = lic_status_weights.get(row['LicStatus'], 0.000)

    # Calculate the combined weight
    combined_weight = 0.5 * struc_type_weight + 0.5 * lic_status_weight

    # Ensure the weight is valid (non-negative and not null)
    return ensure_valid_weight(combined_weight)

# Function to apply weights and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation and field stripping...")
    start_time = time.time()

    # Apply weight calculation
    gdf['Weight'] = gdf.apply(calculate_tower_weight, axis=1)

    # Strip unnecessary fields, keeping only essential ones: Weight and geometry
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Function to process, save, and display file size reduction
def process_and_save_stripped_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_stripped = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the stripped and weighted dataset to the new folder
    print(f"Saving stripped and weighted dataset to {output_path}...")
    gdf_stripped.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define input and output paths
input_path = "/geoJSON/cleaned/Cellular_Towers_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_stripped_geojson(input_path, output_dir)


### Microwave Service Towers

**Dataset: Microwave Service Towers** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::microwave-service-towers/about

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import geopandas as gpd
import pandas as pd
import os
import time

# Define the weights for StrucType
struc_type_weights = {
    'GTOWER': 0.100,
    'LTOWER': 0.150,
    'TOWER': 0.070,
    'MTOWER': 0.060,
    'TANK': 0.040,
    'MAST': 0.040,
    'POLE': 0.040,
    'BANT': 0.030,
    'SILO': 0.020,
    'Other': 0.350,  # Represents the sum of other structure types
    'Blank': 0.000   # Assuming blank or undefined entries have no importance
}

# Define the weight for LicStatus
lic_status_weights = {
    'A': 0.500,  # Active
    'Inactive': 0.000  # Assuming that non-active statuses are not as important
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Function to calculate the weight for each row
def calculate_tower_weight(row):
    # Get the weight for the structure type
    struc_type_weight = struc_type_weights.get(row['StrucType'], struc_type_weights['Other'])

    # Get the weight for the license status
    lic_status_weight = lic_status_weights.get(row['LicStatus'], 0.000)

    # Calculate the combined weight
    combined_weight = (
        0.5 * struc_type_weight +  # 50% weight to structure type
        0.5 * lic_status_weight    # 50% weight to license status
    )

    # Ensure the weight is valid (non-negative and not null)
    return ensure_valid_weight(combined_weight)

# Function to apply weights and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation and field stripping...")
    start_time = time.time()

    # Apply weight calculation
    gdf['Weight'] = gdf.apply(calculate_tower_weight, axis=1)

    # Strip unnecessary fields, keeping only essential ones: Weight and geometry
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Function to process, save, and display file size reduction
def process_and_save_stripped_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_stripped = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the stripped and weighted dataset to the new folder
    print(f"Saving stripped and weighted dataset to {output_path}...")
    gdf_stripped.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define input and output paths
input_path = "/geoJSON/cleaned/Microwave_Service_Towers_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_stripped_geojson(input_path, output_dir)


## Education

### Colleges and Universities
**Dataset: Colleges and Universities -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::colleges-and-universities/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import time

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    3: 0.150,
    1: 0.300,
    2: 0.200,
    -3: 0.000
}

sector_weights = {
    2: 0.200,
    9: 0.150,
    4: 0.150,
    1: 0.100,
    'Other': 0.160
}

level_weights = {
    1: 0.300,
    3: 0.250,
    2: 0.200,
    -3: 0.000
}

hi_offer_weights = {
    0: 0.050,
    40: 0.300,
    20: 0.200,
    30: 0.150,
    'Other': 0.280
}

deg_grant_weights = {
    1: 0.400,
    2: 0.100,
    -3: 0.000
}

inst_size_weights = {
    1: 0.200,
    2: 0.150,
    3: 0.100,
    4: 0.100,
    'Other': 0.050
}

size_set_weights = {
    -2: 0.050,
    18: 0.050,
    22: 0.050,
    'Other': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.650,
    'SECTOR': 0.760,
    'LEVEL_': 0.750,
    'HI_OFFER': 0.980,
    'DEG_GRANT': 0.500,
    'INST_SIZE': 0.600,
    'SIZE_SET': 0.200
}

# Define quantitative attribute weights
quantitative_weights = {
    'POPULATION': 0.150,
    'PT_ENROLL': 0.050,
    'FT_ENROLL': 0.150,
    'TOT_ENROLL': 0.150
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_college_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    sector_weight = ensure_valid_weight(sector_weights.get(row.get('SECTOR', 'Other'), sector_weights['Other'])) * qualitative_importance_grades['SECTOR']
    level_weight = ensure_valid_weight(level_weights.get(row.get('LEVEL_', 0), 0.0)) * qualitative_importance_grades['LEVEL_']
    hi_offer_weight = ensure_valid_weight(hi_offer_weights.get(row.get('HI_OFFER', 'Other'), hi_offer_weights['Other'])) * qualitative_importance_grades['HI_OFFER']
    deg_grant_weight = ensure_valid_weight(deg_grant_weights.get(row.get('DEG_GRANT', 0), 0.0)) * qualitative_importance_grades['DEG_GRANT']
    inst_size_weight = ensure_valid_weight(inst_size_weights.get(row.get('INST_SIZE', 'Other'), inst_size_weights['Other'])) * qualitative_importance_grades['INST_SIZE']
    size_set_weight = ensure_valid_weight(size_set_weights.get(row.get('SIZE_SET', 'Other'), size_set_weights['Other'])) * qualitative_importance_grades['SIZE_SET']

    # Calculate quantitative weights
    pop_weight = ensure_valid_weight((row.get('POPULATION', 0) / np.nanmax([row.get('POPULATION', 1), 1])) * quantitative_weights['POPULATION']) if not pd.isna(row.get('POPULATION')) else 0
    pt_enroll_weight = ensure_valid_weight((row.get('PT_ENROLL', 0) / np.nanmax([row.get('PT_ENROLL', 1), 1])) * quantitative_weights['PT_ENROLL']) if not pd.isna(row.get('PT_ENROLL')) else 0
    ft_enroll_weight = ensure_valid_weight((row.get('FT_ENROLL', 0) / np.nanmax([row.get('FT_ENROLL', 1), 1])) * quantitative_weights['FT_ENROLL']) if not pd.isna(row.get('FT_ENROLL')) else 0
    tot_enroll_weight = ensure_valid_weight((row.get('TOT_ENROLL', 0) / np.nanmax([row.get('TOT_ENROLL', 1), 1])) * quantitative_weights['TOT_ENROLL']) if not pd.isna(row.get('TOT_ENROLL')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        sector_weight +
        level_weight +
        hi_offer_weight +
        deg_grant_weight +
        inst_size_weight +
        size_set_weight +
        pop_weight +
        pt_enroll_weight +
        ft_enroll_weight +
        tot_enroll_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation and field stripping...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_college_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Colleges_and_Universities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Private Schools
**Dataset: Private Schools -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::private-schools/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    1: 0.500,
    2: 0.150,
    7: 0.120,
    4: 0.100,
    6: 0.070,
    3: 0.050,
    5: 0.010
}

level_weights = {
    1: 0.600,
    3: 0.300,
    2: 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.500,
    'LEVEL_': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'POPULATION': 0.250,
    'ENROLLMENT': 0.200,
    'FT_TEACHERS': 0.050
}

# Ensure weight is valid (non-null, non-negative)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set invalid weights to 0.0
    return float(max(weight, 0.0))  # Ensure positive value

# Step 2: Calculate the weight for each row
def calculate_school_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    level_weight = ensure_valid_weight(level_weights.get(row.get('LEVEL_', 0), 0.0)) * qualitative_importance_grades['LEVEL_']

    # Calculate quantitative weights
    pop_weight = ensure_valid_weight((row.get('POPULATION', 0) / np.nanmax([row.get('POPULATION', 1), 1])) * quantitative_weights['POPULATION']) if not pd.isna(row.get('POPULATION')) else 0
    enroll_weight = ensure_valid_weight((row.get('ENROLLMENT', 0) / np.nanmax([row.get('ENROLLMENT', 1), 1])) * quantitative_weights['ENROLLMENT']) if not pd.isna(row.get('ENROLLMENT')) else 0
    ft_teachers_weight = ensure_valid_weight((row.get('FT_TEACHERS', 0) / np.nanmax([row.get('FT_TEACHERS', 1), 1])) * quantitative_weights['FT_TEACHERS']) if not pd.isna(row.get('FT_TEACHERS')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        level_weight +
        pop_weight +
        enroll_weight +
        ft_teachers_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each school...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_school_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Private_Schools_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Emergency Services

### Fire and Emergency Services
**Dataset: Fire and Emergency Medical Service (EMS) Stations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::fire-and-emergency-medical-service-ems-stations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
distribution_policy_weights = {
    'E4': 0.150
}

ftype_weights = {
    740: 0.200
}

data_security_weights = {
    5.0: 0.250
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'DISTRIBUTION_POLICY': 0.150,
    'FTYPE': 0.200,
    'DATA_SECURITY': 0.250
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_ems_station_weight(row):
    # Calculate qualitative weights
    distribution_policy_weight = ensure_valid_weight(distribution_policy_weights.get(row.get('DISTRIBUTION_POLICY', 0), 0.0)) * qualitative_importance_grades['DISTRIBUTION_POLICY']
    ftype_weight = ensure_valid_weight(ftype_weights.get(row.get('FTYPE', 0), 0.0)) * qualitative_importance_grades['FTYPE']
    data_security_weight = ensure_valid_weight(data_security_weights.get(row.get('DATA_SECURITY', 0.0), 0.0)) * qualitative_importance_grades['DATA_SECURITY']

    # Combine qualitative weights (no quantitative attributes in this dataset)
    total_weight = (
        distribution_policy_weight +
        ftype_weight +
        data_security_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each EMS station...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_ems_station_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/EMS_Stations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Local Law Enforcement
**Dataset: Local Law Enforcement Locations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::local-law-enforcement-locations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    "LOCAL POLICE DEPARTMENT": 0.400,
    "SHERIFF'S OFFICE": 0.250,
    "SPECIAL JURISDICTION": 0.150,
    "PRIMARY STATE AGENCY": 0.150,
    "CONSTABLE/MARSHAL": 0.050
}

status_weights = {
    "OPEN": 0.950,
    "NOT AVAILABLE": 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.650,
    'STATUS': 0.350
}

# Define quantitative attribute weights
quantitative_weights = {
    'POPULATION': 0.200,
    'FTSWORN': 0.400,
    'FTCIV': 0.200,
    'PTSWORN': 0.100,
    'PTCIV': 0.100
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_law_enforcement_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']

    # Calculate quantitative weights
    pop_weight = ensure_valid_weight((row.get('POPULATION', 0) / np.nanmax([row.get('POPULATION', 1), 1])) * quantitative_weights['POPULATION']) if not pd.isna(row.get('POPULATION')) else 0
    ftsworn_weight = ensure_valid_weight((row.get('FTSWORN', 0) / np.nanmax([row.get('FTSWORN', 1), 1])) * quantitative_weights['FTSWORN']) if not pd.isna(row.get('FTSWORN')) else 0
    ftciv_weight = ensure_valid_weight((row.get('FTCIV', 0) / np.nanmax([row.get('FTCIV', 1), 1])) * quantitative_weights['FTCIV']) if not pd.isna(row.get('FTCIV')) else 0
    ptsworn_weight = ensure_valid_weight((row.get('PTSWORN', 0) / np.nanmax([row.get('PTSWORN', 1), 1])) * quantitative_weights['PTSWORN']) if not pd.isna(row.get('PTSWORN')) else 0
    ptciv_weight = ensure_valid_weight((row.get('PTCIV', 0) / np.nanmax([row.get('PTCIV', 1), 1])) * quantitative_weights['PTCIV']) if not pd.isna(row.get('PTCIV')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        pop_weight +
        ftsworn_weight +
        ftciv_weight +
        ptsworn_weight +
        ptciv_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each law enforcement agency...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_law_enforcement_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Local_Law_Enforcement_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Energy

### Electric

#### Electric Power Transformers and Grids
**Dataset: Electric Power Transmission Lines -** https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::electric-power-transmission-lines/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'AC; OVERHEAD': 0.500,
    'OVERHEAD': 0.200,
    'AC': 0.100,
    'NOT AVAILABLE': 0.000,
    'AC; UNDERGROUND': 0.050,
    'UNDERGROUND': 0.030,
    'DC; OVERHEAD': 0.010,
    'DC': 0.005,
    'DC; UNDERGROUND': 0.005
}

status_weights = {
    'IN SERVICE': 0.800,
    'NOT AVAILABLE': 0.000,
    'INACTIVE': 0.050,
    'UNDER CONSTRUCTION': 0.030,
    'PROPOSED': 0.020
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.900,
    'STATUS': 0.900
}

# Define quantitative attribute weights
quantitative_weights = {
    'VOLTAGE': 0.200
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_power_grid_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']

    # Calculate quantitative weights
    voltage_weight = ensure_valid_weight((row.get('VOLTAGE', 0) / np.nanmax([row.get('VOLTAGE', 1), 1])) * quantitative_weights['VOLTAGE']) if not pd.isna(row.get('VOLTAGE')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        voltage_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each power transmission line...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_power_grid_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Power_Transmission_Lines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Peak Shaving Facilities
**Dataset: Peak Shaving Facilities -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::peak-shaving-facilities/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'LNG PEAK SHAVING': 0.800,
    'PROPANE AIR PEAK SHAVING': 0.200
}

status_weights = {
    'ACTIVE': 0.900,
    'UNDER CONSTRUCTION': 0.050,
    'UNKNOWN': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.500,
    'STATUS': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'NUMTANKS': 0.200,
    'NUM_VAPE': 0.150,
    'MAXVAPCAP': 0.250,
    'TOTALCAP': 0.400
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_peak_shaving_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']

    # Calculate quantitative weights
    numtanks_weight = ensure_valid_weight((row.get('NUMTANKS', 0) / np.nanmax([row.get('NUMTANKS', 1), 1])) * quantitative_weights['NUMTANKS']) if not pd.isna(row.get('NUMTANKS')) else 0
    num_vape_weight = ensure_valid_weight((row.get('NUM_VAPE', 0) / np.nanmax([row.get('NUM_VAPE', 1), 1])) * quantitative_weights['NUM_VAPE']) if not pd.isna(row.get('NUM_VAPE')) else 0
    maxvapcap_weight = ensure_valid_weight((row.get('MAXVAPCAP', 0) / np.nanmax([row.get('MAXVAPCAP', 1), 1])) * quantitative_weights['MAXVAPCAP']) if not pd.isna(row.get('MAXVAPCAP')) else 0
    totalcap_weight = ensure_valid_weight((row.get('TOTALCAP', 0) / np.nanmax([row.get('TOTALCAP', 1), 1])) * quantitative_weights['TOTALCAP']) if not pd.isna(row.get('TOTALCAP')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        numtanks_weight +
        num_vape_weight +
        maxvapcap_weight +
        totalcap_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each peak shaving facility...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_peak_shaving_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Peak_Shaving_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


Loading dataset from /content/drive/MyDrive/CI_Sentinal/geoJSON/cleaned/Peak_Shaving_Facilities_Cleaned.geojson...
Dataset loaded successfully.
Initial file size: 0.03 MB
Starting weight calculation for each peak shaving facility...
Weight calculation and field stripping completed in 0.12 seconds.
Saving weighted dataset to /content/drive/MyDrive/CI_Sentinal/geoJSON/cleaned_weighted/Peak_Shaving_Facilities_Cleaned_weighted_cleaned.geojson...
Final file size: 0.01 MB
File size reduced by: 0.02 MB (59.92% reduction).


### Gas

#### Adove Ground LNG Storage
**Dataset: Above Ground LNG Storage Facilities -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::above-ground-lng-storage-facilities/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'VEHICULAR FUEL': 0.400,
    'STORAGE W LIQUEFACTION': 0.300,
    'STORAGE W/O LIQUEFACTION': 0.200,
    'STORAGE W BOTH': 0.050,
    'STRANDED UTILITY': 0.015,
    'NOT AVAILABLE': 0.015,
    'OTHER': 0.010,
    'STORAGE - BUNKERING': 0.010
}

status_weights = {
    'ACTIVE': 0.950,
    'UNDER CONSTRUCTION': 0.020,
    'ABANDONED': 0.015,
    'NOT AVAILABLE': 0.015
}

lng_source_weights = {
    'TRUCK': 0.700,
    'LIQUEFACTION': 0.290,
    'NOT AVAILABLE': 0.010
}

con_type_weights = {
    'VERTICAL TANK': 0.400,
    'REFRIGERATED TANK': 0.300,
    'ISO HORIZONTAL TANK': 0.250,
    'NOT AVAILABLE': 0.030,
    'VERTICAL TANK / ISO HORIZONTAL TANK': 0.020
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.300,
    'STATUS': 0.300,
    'LNG_SOURCE': 0.250,
    'CON_TYPE': 0.150
}

# Define quantitative attribute weights
quantitative_weights = {
    'NUMTANKS': 0.300,
    'TOTALCAP': 0.700
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_lng_storage_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']
    lng_source_weight = ensure_valid_weight(lng_source_weights.get(row.get('LNG_SOURCE', 0), 0.0)) * qualitative_importance_grades['LNG_SOURCE']
    con_type_weight = ensure_valid_weight(con_type_weights.get(row.get('CON_TYPE', 0), 0.0)) * qualitative_importance_grades['CON_TYPE']

    # Calculate quantitative weights
    numtanks_weight = ensure_valid_weight((row.get('NUMTANKS', 0) / np.nanmax([row.get('NUMTANKS', 1), 1])) * quantitative_weights['NUMTANKS']) if not pd.isna(row.get('NUMTANKS')) else 0
    totalcap_weight = ensure_valid_weight((row.get('TOTALCAP', 0) / np.nanmax([row.get('TOTALCAP', 1), 1])) * quantitative_weights['TOTALCAP']) if not pd.isna(row.get('TOTALCAP')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        lng_source_weight +
        con_type_weight +
        numtanks_weight +
        totalcap_weight
    )

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each LNG storage facility...")
    start_time = time.time()

    # Initialize the progress bar
    with tqdm(total=len(gdf), desc="Calculating weights", unit="rows") as pbar:
        for index, row in gdf.iterrows():
            gdf.at[index, 'Weight'] = calculate_lng_storage_weight(row)
            if index % 100 == 0:
                pbar.update(100)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/LNG_Storage_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Biodiesel Plants
**Dataset: Biodiesel Plants -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::biodiesel-plants/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schema for Cap_Mmgal
cap_mmgal_weight = 1.000  # Cap_Mmgal is the key attribute here

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set invalid or missing weights to 0.0
    return float(max(weight, 0.0))  # Ensure positive weights

# Step 2: Calculate the weight for each row
def calculate_biodiesel_plant_weight(row):
    # Calculate weight based on Cap_Mmgal
    cap_weight = ensure_valid_weight((row.get('Cap_Mmgal', 0) / np.nanmax([row.get('Cap_Mmgal', 1), 1])) * cap_mmgal_weight) if not pd.isna(row.get('Cap_Mmgal')) else 0

    # Since we only have Cap_Mmgal, the total weight is the cap_weight
    total_weight = cap_weight

    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each biodiesel plant...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_biodiesel_plant_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Biodiesel_Plants_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Ethanol Plants
**Dataset: Ethanol Plants -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::ethanol-plants/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'ETHANOL PLANT': 1.000
}

status_weights = {
    'IN SERVICE': 0.950,
    'UNDER CONSTRUCTION': 0.040,
    'IDLE': 0.010
}

feedstock_weights = {
    'CORN KERNELS, MILO, GRAINS, SUGAR': 0.300,
    'CORN KERNELS': 0.200,
    'CORN KERNELS, MILO': 0.050,
    'GRAINS': 0.050,
    'BEVERAGE AND/OR BREWERY WASTE': 0.030,
    'MUNICIPAL SOLID WASTE': 0.025,
    'CORN KERNELS, GRAINS': 0.025,
    'WHEY': 0.020,
    'SUGAR': 0.020,
    'WOODY BIOMASS': 0.020,
    'CORN STOVER': 0.020,
    'CORN STOVER, ENERGY GRASSES': 0.020,
    'CORN KERNELS, SUGAR': 0.020,
    'WASTE DRIVEN': 0.010,
    'WOODY BIOMASS, WOODY SUGARS': 0.010,
    'CORN KERNELS, CORN KERNEL FIBER': 0.010,
    'MILO, GRAINS': 0.010,
    'ENERGY GRASSES': 0.010,
    'WOOD SUGARS': 0.010,
    'CORN KERNELS, MILO, GRAINS': 0.010,
    'CORN KERNELS, ENERGY TOBACCO': 0.010,
    'WASTE SUGAR, STARCHES': 0.010,
    'CORN STOVER, ENERGY GRASSES, WOODY BIOMASS': 0.010,
    'MUNICIPAL SOLID WASTE, WOODY BIOMASS': 0.010
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.300,
    'STATUS': 0.400,
    'FEEDSTOCK': 0.300
}

# Define quantitative attribute weights
quantitative_weights = {
    'CURRENTCAP': 0.700,
    'NAMEPLATE': 0.300
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row

def calculate_ethanol_plant_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']
    feedstock_weight = ensure_valid_weight(feedstock_weights.get(row.get('FEEDSTOCK', 0), 0.0)) * qualitative_importance_grades['FEEDSTOCK']

    # Calculate quantitative weights
    currentcap_weight = ensure_valid_weight((row.get('CURRENTCAP', 0) / np.nanmax([row.get('CURRENTCAP', 1), 1])) * quantitative_weights['CURRENTCAP']) if not pd.isna(row.get('CURRENTCAP')) else 0
    nameplate_weight = ensure_valid_weight((row.get('NAMEPLATE', 0) / np.nanmax([row.get('NAMEPLATE', 1), 1])) * quantitative_weights['NAMEPLATE']) if not pd.isna(row.get('NAMEPLATE')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        feedstock_weight +
        currentcap_weight +
        nameplate_weight
    )

    # Round the total weight to 4 decimal places
    return round(ensure_valid_weight(total_weight), 4)

# Step 3: Apply the weights to the dataset and remove unnecessary fields

def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each ethanol plant...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_ethanol_plant_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped


# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Ethanol_Plants_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Hydrocarbon Gas Liquid Pipelines
**Dataset: Hydrocarbon Gas Liquid Pipelines -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::hydrocarbon-gas-liquid-pipelines/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
qualitative_weights = {
    'Shape_Leng': 0.000001,
    'Shape__Length': 0.000001
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'Shape_Leng': 0.000001,
    'Shape__Length': 0.000001
}

# Function to ensure valid weight value (no NULL or negative weights) and round to 4 decimal places
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return round(float(max(weight, 0.0)), 4)  # Ensure the weight is non-negative and rounded to 4 decimal places

# Step 2: Calculate the weight for each row
def calculate_pipeline_weight(row):
    # Calculate qualitative weights
    shape_leng_weight = row.get('Shape_Leng', 0) * qualitative_importance_grades['Shape_Leng'] if not pd.isna(row.get('Shape_Leng', 0)) else 0
    shape_length_weight = row.get('Shape__Length', 0) * qualitative_importance_grades['Shape__Length'] if not pd.isna(row.get('Shape__Length', 0)) else 0

    # Combine qualitative weights
    total_weight = (
        shape_leng_weight +
        shape_length_weight
    )

    # Return the total weight, ensuring valid weight and rounding to 4 decimal places
    return ensure_valid_weight(total_weight)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each pipeline segment...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_pipeline_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Hydrocarbon_Pipelines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### LNG Import and Export Terminals
**Dataset: Liquified Natural Gas Import Exports and Terminals -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::liquified-natural-gas-import-exports-and-terminals/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'IMPORT': 0.600,
    'EXPORT': 0.400
}

status_weights = {
    'IN SERVICE': 0.800,
    'UNDER CONSTRUCTION': 0.160,
    'SUSPENDED': 0.040
}

contype_weights = {
    'LNG TANKER': 0.960,
    'ISO CONTAINER': 0.040
}

ie_port_weights = {
    'COVE POINT': 0.120,
    'ELBA ISLAND': 0.080,
    'HACKBERRY': 0.080,
    'SABINE PASS': 0.080,
    'FREEPORT': 0.080,
    'Other': 0.560  # For all other ports
}

impexpctry_weights = {
    'NOT APPLICABLE': 0.500,
    'Country Combinations': 0.500
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.250,
    'STATUS': 0.250,
    'CONTYPE': 0.250,
    'IE_PORT': 0.150,
    'IMPEXPCTRY': 0.100
}

# Define quantitative attribute weights
quantitative_weights = {
    'STORCAP': 0.200,
    'CURRENTCAP': 0.200,
    'BERTHS': 0.100,
    'STORAGE': 0.200,
    'APPCAP': 0.150,
    'VOLUME': 0.150
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_lng_terminal_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']
    contype_weight = ensure_valid_weight(contype_weights.get(row.get('CONTYPE', 0), 0.0)) * qualitative_importance_grades['CONTYPE']
    ie_port_weight = ensure_valid_weight(ie_port_weights.get(row.get('IE_PORT', 'Other'), 0.0)) * qualitative_importance_grades['IE_PORT']
    impexpctry_weight = ensure_valid_weight(impexpctry_weights.get(row.get('IMPEXPCTRY', 'Country Combinations'), 0.0)) * qualitative_importance_grades['IMPEXPCTRY']

    # Calculate quantitative weights
    storcap_weight = ensure_valid_weight((row.get('STORCAP', 0) / np.nanmax([row.get('STORCAP', 1), 1])) * quantitative_weights['STORCAP']) if not pd.isna(row.get('STORCAP')) else 0
    currentcap_weight = ensure_valid_weight((row.get('CURRENTCAP', 0) / np.nanmax([row.get('CURRENTCAP', 1), 1])) * quantitative_weights['CURRENTCAP']) if not pd.isna(row.get('CURRENTCAP')) else 0
    berths_weight = ensure_valid_weight((row.get('BERTHS', 0) / np.nanmax([row.get('BERTHS', 1), 1])) * quantitative_weights['BERTHS']) if not pd.isna(row.get('BERTHS')) else 0
    storage_weight = ensure_valid_weight((row.get('STORAGE', 0) / np.nanmax([row.get('STORAGE', 1), 1])) * quantitative_weights['STORAGE']) if not pd.isna(row.get('STORAGE')) else 0
    appcap_weight = ensure_valid_weight((row.get('APPCAP', 0) / np.nanmax([row.get('APPCAP', 1), 1])) * quantitative_weights['APPCAP']) if not pd.isna(row.get('APPCAP')) else 0
    volume_weight = ensure_valid_weight((row.get('VOLUME', 0) / np.nanmax([row.get('VOLUME', 1), 1])) * quantitative_weights['VOLUME']) if not pd.isna(row.get('VOLUME')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        contype_weight +
        ie_port_weight +
        impexpctry_weight +
        storcap_weight +
        currentcap_weight +
        berths_weight +
        storage_weight +
        appcap_weight +
        volume_weight
    )

    # Round the total weight to 4 decimal places
    return round(ensure_valid_weight(total_weight), 4)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each LNG terminal...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_lng_terminal_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/LNG_Terminals_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Natural Gas Compressor Stations
**Dataset: Natural Gas Compressor Stations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::natural-gas-compressor-stations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'NATURAL GAS COMPRESSOR STATION': 1.000
}

status_weights = {
    'IN SERVICE': 0.800,
    'NOT AVAILABLE': 0.100,
    'SUSPENDED': 0.070,
    'ABANDONED': 0.025,
    'CLOSED': 0.005
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.500,
    'STATUS': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'NUM_UNITS': 0.150,
    'CERT_HP': 0.200,
    'PLANT_COST': 0.150,
    'EXP_FUEL': 0.100,
    'EXP_OTHER': 0.100,
    'GAS_COMPRE': 0.150,
    'OP_NUM_COM': 0.100
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_compressor_station_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 0), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 0), 0.0)) * qualitative_importance_grades['STATUS']

    # Calculate quantitative weights
    num_units_weight = ensure_valid_weight((row.get('NUM_UNITS', 0) / np.nanmax([row.get('NUM_UNITS', 1), 1])) * quantitative_weights['NUM_UNITS']) if not pd.isna(row.get('NUM_UNITS')) else 0
    cert_hp_weight = ensure_valid_weight((row.get('CERT_HP', 0) / np.nanmax([row.get('CERT_HP', 1), 1])) * quantitative_weights['CERT_HP']) if not pd.isna(row.get('CERT_HP')) else 0
    plant_cost_weight = ensure_valid_weight((row.get('PLANT_COST', 0) / np.nanmax([row.get('PLANT_COST', 1), 1])) * quantitative_weights['PLANT_COST']) if not pd.isna(row.get('PLANT_COST')) else 0
    exp_fuel_weight = ensure_valid_weight((row.get('EXP_FUEL', 0) / np.nanmax([row.get('EXP_FUEL', 1), 1])) * quantitative_weights['EXP_FUEL']) if not pd.isna(row.get('EXP_FUEL')) else 0
    exp_other_weight = ensure_valid_weight((row.get('EXP_OTHER', 0) / np.nanmax([row.get('EXP_OTHER', 1), 1])) * quantitative_weights['EXP_OTHER']) if not pd.isna(row.get('EXP_OTHER')) else 0
    gas_compre_weight = ensure_valid_weight((row.get('GAS_COMPRE', 0) / np.nanmax([row.get('GAS_COMPRE', 1), 1])) * quantitative_weights['GAS_COMPRE']) if not pd.isna(row.get('GAS_COMPRE')) else 0
    op_num_com_weight = ensure_valid_weight((row.get('OP_NUM_COM', 0) / np.nanmax([row.get('OP_NUM_COM', 1), 1])) * quantitative_weights['OP_NUM_COM']) if not pd.isna(row.get('OP_NUM_COM')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        num_units_weight +
        cert_hp_weight +
        plant_cost_weight +
        exp_fuel_weight +
        exp_other_weight +
        gas_compre_weight +
        op_num_com_weight
    )

    # Round the total weight to 4 decimal places
    return round(ensure_valid_weight(total_weight), 4)

# Step 3: Apply the weights to the dataset and remove unnecessary fields
def apply_weights_and_strip_fields(gdf):
    print("Starting weight calculation for each compressor station...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_compressor_station_weight, axis=1)

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and strip unnecessary fields
    gdf_weighted = apply_weights_and_strip_fields(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Gas_Compressor_Stations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Natural Gas Pipelines
**Dataset: Natural Gas Pipelines -** https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::natural-gas-pipelines/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
typepipe_weights = {
    'Interstate': 0.600,
    'Intrastate': 0.350,
    'Gathering': 0.040,
    'Other': 0.010
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPEPIPE': 1.000
}

# Define quantitative attribute weights
quantitative_weights = {
    'Shape_Leng': 1.000
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_pipeline_weight(row):
    # Calculate qualitative weights
    typepipe_weight = ensure_valid_weight(typepipe_weights.get(row.get('TYPEPIPE', 'Other'), 0.0)) * qualitative_importance_grades['TYPEPIPE']

    # Calculate quantitative weights
    length_weight = ensure_valid_weight((row.get('Shape_Leng', 0) / np.nanmax([row.get('Shape_Leng', 1), 1])) * quantitative_weights['Shape_Leng']) if not pd.isna(row.get('Shape_Leng')) else 0

    # Combine qualitative and quantitative weights
    total_weight = typepipe_weight + length_weight

    # Round the total weight to 4 decimal places
    return round(ensure_valid_weight(total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields
def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each pipeline segment...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_pipeline_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/NGL_Pipelines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Natural Gas Processing Plants
**Dataset: Natural Gas Processing Plants -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::natural-gas-processing-plants/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'NATURAL GAS LIQUIDS EXTRACTION PLANT': 0.300,
    'NATURAL GAS PROCESSING PLANT': 0.250,
    'NOT AVAILABLE': 0.100,
    'NATURAL GAS PROCESSING AND NATURAL GAS LIQUIDS EXTRACTION PLANT': 0.150,
    'CRUDE PETROLEUM AND NATURAL GAS EXTRACTION PLANT': 0.125,
    'SWEET NATURAL GAS PROCESSING PLANT': 0.075
}

status_weights = {
    'ACTIVE': 0.800,
    'NOT AVAILABLE': 0.150,
    'INACTIVE': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.500,
    'STATUS': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'GASCAP': 0.250,
    'PLANTFLOW': 0.250,
    'BTUCONTENT': 0.150,
    'PROCAMTBLS': 0.150,
    'GASSTORCAP': 0.100,
    'LIQSTORCAP': 0.100
}

# Function to ensure valid weight value (no NULL or negative weights)
def ensure_valid_weight(weight):
    if pd.isnull(weight) or weight < 0:
        return 0.0  # Set any invalid weight to 0.0
    return float(max(weight, 0.0))  # Ensure the weight is non-negative

# Step 2: Calculate the weight for each row
def calculate_ng_processing_weight(row):
    # Calculate qualitative weights
    type_weight = ensure_valid_weight(type_weights.get(row.get('TYPE', 'NOT AVAILABLE'), 0.0)) * qualitative_importance_grades['TYPE']
    status_weight = ensure_valid_weight(status_weights.get(row.get('STATUS', 'NOT AVAILABLE'), 0.0)) * qualitative_importance_grades['STATUS']

    # Calculate quantitative weights
    gascap_weight = ensure_valid_weight((row.get('GASCAP', 0) / np.nanmax([row.get('GASCAP', 1), 1])) * quantitative_weights['GASCAP']) if not pd.isna(row.get('GASCAP')) else 0
    plantflow_weight = ensure_valid_weight((row.get('PLANTFLOW', 0) / np.nanmax([row.get('PLANTFLOW', 1), 1])) * quantitative_weights['PLANTFLOW']) if not pd.isna(row.get('PLANTFLOW')) else 0
    btucontent_weight = ensure_valid_weight((row.get('BTUCONTENT', 0) / np.nanmax([row.get('BTUCONTENT', 1), 1])) * quantitative_weights['BTUCONTENT']) if not pd.isna(row.get('BTUCONTENT')) else 0
    procamtbls_weight = ensure_valid_weight((row.get('PROCAMTBLS', 0) / np.nanmax([row.get('PROCAMTBLS', 1), 1])) * quantitative_weights['PROCAMTBLS']) if not pd.isna(row.get('PROCAMTBLS')) else 0
    gasstorcap_weight = ensure_valid_weight((row.get('GASSTORCAP', 0) / np.nanmax([row.get('GASSTORCAP', 1), 1])) * quantitative_weights['GASSTORCAP']) if not pd.isna(row.get('GASSTORCAP')) else 0
    liqstorcap_weight = ensure_valid_weight((row.get('LIQSTORCAP', 0) / np.nanmax([row.get('LIQSTORCAP', 1), 1])) * quantitative_weights['LIQSTORCAP']) if not pd.isna(row.get('LIQSTORCAP')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        gascap_weight +
        plantflow_weight +
        btucontent_weight +
        procamtbls_weight +
        gasstorcap_weight +
        liqstorcap_weight
    )

    # Round the total weight to 4 decimal places
    return round(ensure_valid_weight(total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields
def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each natural gas processing plant...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_ng_processing_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/NG_Processing_Plants_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Petroleum Terminals
**Dataset: Petroleum Terminals -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::petroleum-terminals/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'BULK TERMINAL': 0.300,
    'MARINE TERMINAL': 0.250,
    'REFINERY TERMINAL': 0.100,
    'Other Types': 0.350
}

status_weights = {
    'IN SERVICE': 0.950,
    'DISMANTLED': 0.030,
    'NOT IN USE': 0.020
}

commodity_weights = {
    'REFINED': 0.250,
    'REFINED, CHEMICALS': 0.150,
    'NOT AVAILABLE': 0.050,
    'CRUDE': 0.100,
    'Other Commodities': 0.450
}

posrel_weights = {
    'WITHIN 40 FEET': 0.900,
    'WITHIN 1 MILE': 0.050,
    'Other Proximities': 0.050
}

transport_in_weights = {
    'YES': 0.700,
    'NO': 0.250,
    'NOT AVAILABLE': 0.050
}

transport_out_weights = {
    'YES': 0.700,
    'NO': 0.250,
    'NOT AVAILABLE': 0.050
}

# Define individual commodity-specific weights
commodity_specific_weights = {
    'ASPHALT': 0.080,
    'CHEMICALS': 0.100,
    'PROPANE': 0.050,
    'BUTANE': 0.040,
    'REFINED': 0.200,
    'ETHANOL': 0.060,
    'BIODIESEL': 0.050,
    'CRUDE_OIL': 0.150,
    'JETFUEL': 0.100,
    'GASOLINE': 0.120,
    'DISTILLATE': 0.200,
    'AVGAS': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.200,
    'STATUS': 0.200,
    'COMMODITY': 0.150,
    'POSREL': 0.100,
    'TRUCK_IN': 0.050,
    'TRUCK_OUT': 0.050,
    'PIPE_IN': 0.050,
    'PIPE_OUT': 0.050,
    'MARINE_IN': 0.050,
    'MARINE_OUT': 0.050,
    'RAIL_IN': 0.050,
    'RAIL_OUT': 0.050,
    'COMMODITIES': 0.100
}

# Define quantitative attribute weights
quantitative_weights = {
    'CAPACITY': 0.200
}

# Step 2: Calculate the weight for each row

def calculate_terminal_weight(row):
    # Calculate qualitative weights
    type_weight = type_weights.get(row['TYPE'], type_weights['Other Types']) * qualitative_importance_grades['TYPE']
    status_weight = status_weights.get(row['STATUS'], 0.0) * qualitative_importance_grades['STATUS']
    commodity_weight = commodity_weights.get(row['COMMODITY'], commodity_weights['Other Commodities']) * qualitative_importance_grades['COMMODITY']
    posrel_weight = posrel_weights.get(row['POSREL'], posrel_weights['Other Proximities']) * qualitative_importance_grades['POSREL']

    # Transport modes
    truck_in_weight = transport_in_weights.get(row['TRUCK_IN'], 0.0) * qualitative_importance_grades['TRUCK_IN']
    truck_out_weight = transport_out_weights.get(row['TRUCK_OUT'], 0.0) * qualitative_importance_grades['TRUCK_OUT']
    pipe_in_weight = transport_in_weights.get(row['PIPE_IN'], 0.0) * qualitative_importance_grades['PIPE_IN']
    pipe_out_weight = transport_out_weights.get(row['PIPE_OUT'], 0.0) * qualitative_importance_grades['PIPE_OUT']
    marine_in_weight = transport_in_weights.get(row['MARINE_IN'], 0.0) * qualitative_importance_grades['MARINE_IN']
    marine_out_weight = transport_out_weights.get(row['MARINE_OUT'], 0.0) * qualitative_importance_grades['MARINE_OUT']
    rail_in_weight = transport_in_weights.get(row['RAIL_IN'], 0.0) * qualitative_importance_grades['RAIL_IN']
    rail_out_weight = transport_out_weights.get(row['RAIL_OUT'], 0.0) * qualitative_importance_grades['RAIL_OUT']

    # Commodity-specific weights
    commodities_weight = sum([commodity_specific_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITIES'] for commodity in commodity_specific_weights if row.get(commodity, 'NO') == 'YES'])

    # Calculate quantitative weights
    capacity_weight = (row['CAPACITY'] / np.nanmax(row['CAPACITY'])) * quantitative_weights['CAPACITY'] if not pd.isna(row['CAPACITY']) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        commodity_weight +
        posrel_weight +
        truck_in_weight +
        truck_out_weight +
        pipe_in_weight +
        pipe_out_weight +
        marine_in_weight +
        marine_out_weight +
        rail_in_weight +
        rail_out_weight +
        commodities_weight +
        capacity_weight
    )

    # Ensure valid and positive weight
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields
def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each petroleum terminal...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_terminal_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Petroleum_Terminals_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Oil

#### Natural Gas Wells
**Dataset: Oil and Natural Gas Wells -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::oil-and-natural-gas-wells/about

In [None]:
from datetime import datetime

def calculate_well_importance(well):
    importance_score = 0.0

    # Weights for each attribute
    weights = {
        'status_weight': 0.25,
        'prodtype_weight': 0.25,
        'compdate_weight': 0.25,
        'totdepth_weight': 0.25,
    }

    # Status importance mapping
    status_importance = {
        'PRODUCING WELL': 1.0,
        'ACTIVE WELL': 0.8,
        'NON-ACTIVE WELL': 0.4,
        'UNKNOWN WELL': 0.2,
        'WELL DEVELOPMENT': 0.6,
        'STORAGE WELL/MAINTENANCE WELL/OBSERVATION WELL': 0.5,
        'PRODUCING, NON-ACTIVE WELL': 0.7,
    }

    # Production type importance mapping
    prodtype_importance = {
        'OIL': 1.0,
        'GAS': 0.9,
        'OIL & NATURAL GAS': 1.0,
        'UNKNOWN': 0.2,
    }

    # Normalize and weigh the well's status
    status_score = status_importance.get(well['STATUS'], 0)
    importance_score += status_score * weights['status_weight']

    # Normalize and weigh the production type
    prodtype_score = prodtype_importance.get(well['PRODTYPE'], 0)
    importance_score += prodtype_score * weights['prodtype_weight']

    # Completion date factor (more recent = higher score)
    try:
        compdate = datetime.strptime(well['COMPDATE'], '%m/%d/%Y %I:%M:%S %p')
        years_since_completion = (datetime.now() - compdate).days / 365.25
        compdate_score = max(1 - (years_since_completion / 100), 0)  # Assuming a 100-year relevance span
    except ValueError:  # In case of a date parsing error or -999 values
        compdate_score = 0
    importance_score += compdate_score * weights['compdate_weight']

    # Total depth factor (deeper wells might be more significant due to larger investments)
    try:
        totdepth = float(well['TOTDEPTH'])
        max_depth = 30000  # Example max depth for normalization
        totdepth_score = min(totdepth / max_depth, 1)
    except ValueError:  # In case of a parsing error or -999 values
        totdepth_score = 0
    importance_score += totdepth_score * weights['totdepth_weight']

    # Ensure the score does not exceed 1.0
    importance_score = min(importance_score, 1.0)

    return round(importance_score, 3)

# Example usage with a hypothetical well data
example_well = {
    'STATUS': 'PRODUCING WELL',
    'PRODTYPE': 'OIL & NATURAL GAS',
    'COMPDATE': '4/9/1951 12:00:00 AM',
    'TOTDEPTH': 15000,
}

importance_score = calculate_well_importance(example_well)
print("Importance Score:", importance_score)


#### Oil Wells

#### Oil Refineries
**Dataset: Oil Refineries -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::oil-refineries/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import time

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'LARGE COMPLEX REFINERY': 0.250,
    'MEDIUM SIZED MEDIUM COMPLEXITY REFINERY': 0.150,
    'VERY LARGE COMPLEX REFINERY': 0.200,
    'Other': 0.400
}

status_weights = {
    'IN SERVICE': 0.900,
    'CLOSED': 0.100
}

posrel_weights = {
    'WITHIN 166 FEET': 0.900,
    'WITHIN 1 MILE': 0.080,
    'WITHIN 40 FEET': 0.020
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.300,
    'STATUS': 0.400,
    'POSREL': 0.300
}

# Define quantitative attribute weights
quantitative_weights = {
    'CAPACITY': 0.200,
    'US_RANK': 0.100,
    'CRUDE': 0.150,
    'VACDIST': 0.100,
    'COKING': 0.050,
    'THERMALOP': 0.020,
    'CATCRACK': 0.100,
    'CATREFORM': 0.050,
    'CATHYDCRCK': 0.050,
    'CATHYDTRT': 0.100,
    'ALKY': 0.030,
    'POLDIM': 0.010,
    'AROMATIC': 0.020,
    'ISOMER': 0.030,
    'LUBES': 0.020,
    'OXYGENATES': 0.020,
    'HYDRGN': 0.010,
    'COKE': 0.030,
    'SULFUR': 0.020,
    'ASPHALT': 0.020
}

# Step 2: Calculate the weight for each row
def calculate_refinery_weight(row, gdf):
    # Use 0 if any field is missing (i.e., NaN or not present)
    type_weight = type_weights.get(row.get('TYPE', 'Other'), 0.0) * qualitative_importance_grades['TYPE']
    status_weight = status_weights.get(row.get('STATUS', ''), 0.0) * qualitative_importance_grades['STATUS']
    posrel_weight = posrel_weights.get(row.get('POSREL', ''), 0.0) * qualitative_importance_grades['POSREL']

    # Calculate quantitative weights with default value 0 for missing fields
    cap_weight = (row.get('CAPACITY', 0) / np.nanmax(gdf['CAPACITY'])) * quantitative_weights['CAPACITY'] if pd.notna(row.get('CAPACITY')) else 0
    rank_weight = (row.get('US_RANK', 0) / np.nanmax(gdf['US_RANK'])) * quantitative_weights['US_RANK'] if pd.notna(row.get('US_RANK')) else 0
    crude_weight = (row.get('CRUDE', 0) / np.nanmax(gdf['CRUDE'])) * quantitative_weights['CRUDE'] if pd.notna(row.get('CRUDE')) else 0
    vacdist_weight = (row.get('VACDIST', 0) / np.nanmax(gdf['VACDIST'])) * quantitative_weights['VACDIST'] if pd.notna(row.get('VACDIST')) else 0
    coking_weight = (row.get('COKING', 0) / np.nanmax(gdf['COKING'])) * quantitative_weights['COKING'] if pd.notna(row.get('COKING')) else 0
    thermalop_weight = (row.get('THERMALOP', 0) / np.nanmax(gdf['THERMALOP'])) * quantitative_weights['THERMALOP'] if pd.notna(row.get('THERMALOP')) else 0
    catcrack_weight = (row.get('CATCRACK', 0) / np.nanmax(gdf['CATCRACK'])) * quantitative_weights['CATCRACK'] if pd.notna(row.get('CATCRACK')) else 0
    catreform_weight = (row.get('CATREFORM', 0) / np.nanmax(gdf['CATREFORM'])) * quantitative_weights['CATREFORM'] if pd.notna(row.get('CATREFORM')) else 0
    cathydcrck_weight = (row.get('CATHYDCRCK', 0) / np.nanmax(gdf['CATHYDCRCK'])) * quantitative_weights['CATHYDCRCK'] if pd.notna(row.get('CATHYDCRCK')) else 0
    cathydtrt_weight = (row.get('CATHYDTRT', 0) / np.nanmax(gdf['CATHYDTRT'])) * quantitative_weights['CATHYDTRT'] if pd.notna(row.get('CATHYDTRT')) else 0
    alky_weight = (row.get('ALKY', 0) / np.nanmax(gdf['ALKY'])) * quantitative_weights['ALKY'] if pd.notna(row.get('ALKY')) else 0
    poldim_weight = (row.get('POLDIM', 0) / np.nanmax(gdf['POLDIM'])) * quantitative_weights['POLDIM'] if pd.notna(row.get('POLDIM')) else 0
    aromatic_weight = (row.get('AROMATIC', 0) / np.nanmax(gdf['AROMATIC'])) * quantitative_weights['AROMATIC'] if pd.notna(row.get('AROMATIC')) else 0
    isomer_weight = (row.get('ISOMER', 0) / np.nanmax(gdf['ISOMER'])) * quantitative_weights['ISOMER'] if pd.notna(row.get('ISOMER')) else 0
    lubes_weight = (row.get('LUBES', 0) / np.nanmax(gdf['LUBES'])) * quantitative_weights['LUBES'] if pd.notna(row.get('LUBES')) else 0
    oxygenates_weight = (row.get('OXYGENATES', 0) / np.nanmax(gdf['OXYGENATES'])) * quantitative_weights['OXYGENATES'] if pd.notna(row.get('OXYGENATES')) else 0
    hydrgn_weight = (row.get('HYDRGN', 0) / np.nanmax(gdf['HYDRGN'])) * quantitative_weights['HYDRGN'] if pd.notna(row.get('HYDRGN')) else 0
    coke_weight = (row.get('COKE', 0) / np.nanmax(gdf['COKE'])) * quantitative_weights['COKE'] if pd.notna(row.get('COKE')) else 0
    sulfur_weight = (row.get('SULFUR', 0) / np.nanmax(gdf['SULFUR'])) * quantitative_weights['SULFUR'] if pd.notna(row.get('SULFUR')) else 0
    asphalt_weight = (row.get('ASPHALT', 0) / np.nanmax(gdf['ASPHALT'])) * quantitative_weights['ASPHALT'] if pd.notna(row.get('ASPHALT')) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        type_weight +
        status_weight +
        posrel_weight +
        cap_weight +
        rank_weight +
        crude_weight +
        vacdist_weight +
        coking_weight +
        thermalop_weight +
        catcrack_weight +
        catreform_weight +
        cathydcrck_weight +
        cathydtrt_weight +
        alky_weight +
        poldim_weight +
        aromatic_weight +
        isomer_weight +
        lubes_weight +
        oxygenates_weight +
        hydrgn_weight +
        coke_weight +
        sulfur_weight +
        asphalt_weight
    )

    return round(total_weight, 4)  # Ensure 4 decimal places and always return a number

# Step 3: Apply the weights to the dataset
def apply_weights_to_refineries_dataset(gdf):
    print("Starting to calculate weights for each refinery...")
    start_time = time.time()

    # Apply the weight calculation function to each row, passing gdf for context
    gdf['Weight'] = gdf.apply(lambda row: calculate_refinery_weight(row, gdf), axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Remove unwanted fields and keep only Weight and geometry
def remove_unnecessary_fields(gdf):
    print("Removing unnecessary fields, keeping only Weight and geometry...")
    gdf = gdf[['Weight', 'geometry']]
    return gdf

# Step 5: Save the cleaned and weighted dataset
def process_and_save_weighted_geojson(input_path, output_path):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Size in MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_refineries_dataset(gdf)

    # Remove unnecessary fields (keep only 'Weight' and 'geometry')
    gdf_cleaned = remove_unnecessary_fields(gdf_weighted)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_cleaned.to_file(output_path, driver='GeoJSON')

    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Size in MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Step 6: Define paths and process the dataset
input_path = "/geoJSON/cleaned/Oil_Refineries_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Construct the output path
output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
output_path = os.path.join(output_dir, output_file_name)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_path)


Loading dataset from /content/drive/MyDrive/CI_Sentinal/geoJSON/cleaned/Oil_Refineries_Cleaned.geojson...
Dataset loaded successfully.
Initial file size: 0.10 MB
Starting to calculate weights for each refinery...
Weights calculated. Time elapsed: 0.42 seconds.
Removing unnecessary fields, keeping only Weight and geometry...
Saving weighted dataset to /content/drive/MyDrive/CI_Sentinal/geoJSON/cleaned_weighted/Oil_Refineries_Cleaned_weighted_cleaned.geojson...
Final file size: 0.02 MB
File size reduced by: 0.08 MB (78.50% reduction).


## Financials

### County Business Patterns
**Dataset: County Business Patterns -** https://hub.arcgis.com/datasets/USCensus::county-business-patterns-counties-2021/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
statefp_weights = {
    '48': 0.150,  # Texas
    '13': 0.100,  # Georgia
    'Other': 0.750  # Other states collectively
}

lsad_weights = {
    '06': 0.800,
    '13': 0.100,
    'Other': 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'STATEFP': 0.500,
    'LSAD': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'ESTAB_00': 0.200,
    'ESTAB_21': 0.020,
    'ESTAB_22': 0.020,
    'ESTAB_23': 0.050,
    'ESTAB_31_33': 0.050,
    'ESTAB_42': 0.050,
    'ESTAB_44_45': 0.070,
    'ESTAB_48_49': 0.040,
    'ESTAB_51': 0.040,
    'ESTAB_52': 0.050,
    'ESTAB_53': 0.050,
    'ESTAB_54': 0.070,
    'ESTAB_55': 0.030,
    'ESTAB_56': 0.050,
    'ESTAB_61': 0.040,
    'ESTAB_62': 0.070,
    'ESTAB_71': 0.040,
    'ESTAB_72': 0.050,
    'ESTAB_81': 0.050,
    'PAYANN_00': 0.070,
    'PAYANN_21': 0.020,
    'PAYANN_22': 0.020,
    'PAYANN_23': 0.040,
    'PAYANN_31_33': 0.070,
    'PAYANN_42': 0.050,
    'PAYANN_44_45': 0.050,
    'PAYANN_48_49': 0.050,
    'PAYANN_51': 0.060,
    'PAYANN_52': 0.060,
    'PAYANN_53': 0.040,
    'PAYANN_54': 0.060,
    'PAYANN_55': 0.060,
    'PAYANN_56': 0.050,
    'PAYANN_61': 0.050,
    'PAYANN_62': 0.060,
    'PAYANN_71': 0.040,
    'PAYANN_72': 0.050,
    'PAYANN_81': 0.040,
    'PAYANNAVG_CALC': 0.050
}

# Step 2: Calculate the weight for each row

def calculate_cbp_weight(row):
    # Calculate qualitative weights
    statefp_weight = statefp_weights.get(row['STATEFP'], statefp_weights['Other']) * qualitative_importance_grades['STATEFP']
    lsad_weight = lsad_weights.get(row['LSAD'], lsad_weights['Other']) * qualitative_importance_grades['LSAD']

    # Calculate quantitative weights
    quantitative_weight = 0
    for column, weight in quantitative_weights.items():
        # Ensure the column value is numeric and skip non-numeric data
        if pd.api.types.is_numeric_dtype(row[column]):
            value = row.get(column, 0)  # Default to 0 if missing
            max_value = np.nanmax([value if value is not None else 0 for value in row[column:]])
            if max_value > 0:
                quantitative_weight += (value / max_value) * weight if value is not None else 0

    # Combine qualitative and quantitative weights
    total_weight = statefp_weight + lsad_weight + quantitative_weight

    # Ensure weight is positive and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset and remove any null geometries

def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each County Business Pattern record...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_cbp_weight, axis=1)

    # Remove null geometries
    gdf_cleaned = gdf[gdf.geometry.notnull()].copy()

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf_cleaned

# Step 4: Process and save the dataset with weights and remove unnecessary fields

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and remove null geometries
    gdf_weighted = apply_weights_and_filter(gdf)

    # Keep only the 'Weight' and 'geometry' columns
    gdf_weighted = gdf_weighted[['Weight', 'geometry']]

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/CBP_2021_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### FDIC Insured Banks
**Dataset: FDIC Insured Banks -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::fdic-insured-banks/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define BKCLASS weights
bkclass_weights = {
    'N': 0.3,  # National bank class
    'SM': 0.2,  # State Member
    'NM': 0.1,  # Non-Member
    'SI': 0.1,  # Savings Institution
    'Other': 0.05
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'BKCLASS': 0.1
}

# Define quantitative attribute weights
quantitative_weights = {
    'ASSET': 0.5,
    'DEPDOM': 0.4
}

# Step 2: Calculate the weight for each row

def calculate_bank_weight(row):
    # Ensure valid fields with default to 0 for missing values
    asset_value = row.get('ASSET', 0.0)
    depdom_value = row.get('DEPDOM', 0.0)
    bkclass_value = row.get('BKCLASS', 'Other')  # Default to 'Other' if BKCLASS is missing

    # Calculate qualitative weights for BKCLASS
    bkclass_weight = bkclass_weights.get(bkclass_value, bkclass_weights['Other']) * qualitative_importance_grades['BKCLASS']

    # Calculate quantitative weights for ASSET and DEPDOM
    max_asset = 1e12  # Example max asset in dollars
    max_depdom = 1e11  # Example max domestic deposits in dollars

    asset_weight = (asset_value / max_asset) * quantitative_weights['ASSET'] if asset_value > 0 else 0
    depdom_weight = (depdom_value / max_depdom) * quantitative_weights['DEPDOM'] if depdom_value > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = bkclass_weight + asset_weight + depdom_weight

    # Ensure valid and positive weight, rounded to 4 decimals
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields
def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each FDIC bank...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_bank_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/FDIC_Banks_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Gold Bullion Depositories
**Dataset: Gold Bullion Depositories -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::gold-bullion-repositories/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
hsipthemes_weights = {
    "CRITICAL INFRASTRUCTURE, PDD-63; BANKING, FINANCE, AND INSURANCE; BULLION REPOSITORIES": 1.000
}

naicscode_weights = {
    423940: 0.870,
    339911: 0.057,
    921130: 0.057,
    521110: 0.016
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'HSIPTHEMES': 0.500,
    'NAICSCODE': 0.500
}

# Step 2: Calculate the weight for each row

def calculate_gold_depository_weight(row):
    # Ensure valid fields with default to 0 for missing values
    hsipthemes_weight = hsipthemes_weights.get(row.get('HSIPTHEMES', ''), 0.0) * qualitative_importance_grades['HSIPTHEMES']
    naicscode_weight = naicscode_weights.get(row.get('NAICSCODE', 0), 0.0) * qualitative_importance_grades['NAICSCODE']

    # Combine qualitative weights
    total_weight = hsipthemes_weight + naicscode_weight

    # Ensure valid and positive weight, rounded to 4 decimals
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights and remove features with null geometry

def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each gold depository...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_gold_depository_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated, null geometries filtered, and fields stripped in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and filter null geometries
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Gold_Repositories_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Food

### Public Refridgerated Warehouse
**Dataset: Public Refrigerated Warehouses -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::public-refrigerated-warehouses/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
loc_name_weights = {
    'US_RoofTop': 0.500,
    'US_Streets': 0.400,
    'US_Zipcode': 0.050,
    'US_StreetName': 0.030,
    'US_Zip4': 0.020
}

# Define overall importance grades for the qualitative properties
qualitative_importance_grades = {
    'Loc_name': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'Volume': 1.000
}

# Step 2: Calculate the weight for each row

def calculate_warehouse_weight(row):
    # Ensure valid fields with default to 0 for missing values
    volume_value = row.get('Volume', 0.0)

    # Calculate qualitative weights
    loc_name_weight = loc_name_weights.get(row['Loc_name'], 0.0) * qualitative_importance_grades['Loc_name']

    # Calculate quantitative weights
    volume_weight = (volume_value / np.nanmax(volume_value)) * quantitative_weights['Volume'] if volume_value > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = loc_name_weight + volume_weight

    # Ensure valid and positive weight, rounded to 4 decimals
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields
def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each refrigerated warehouse...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_warehouse_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Refrigerated_Warehouses_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Government

### Courthouses
**Dataset: Courthouses -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::courthouses-3/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import time
from datetime import datetime

# Step 1: Define weight schemas

# Define qualitative attribute weights
ftype_weights = {
    830: 1.0  # Assigning a weight for Courthouses (830)
}

fcode_weights = {
    83011: 1.0  # Assigning a weight for Courthouse FCODE (83011)
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'FTYPE': 0.5,
    'FCODE': 0.5
}

# Step 2: Calculate the weight for each courthouse

def calculate_courthouse_weight(row):
    # Calculate qualitative weights
    ftype_weight = ftype_weights.get(row.get('FTYPE', 0), 0.0) * qualitative_importance_grades['FTYPE']
    fcode_weight = fcode_weights.get(row.get('FCODE', 0), 0.0) * qualitative_importance_grades['FCODE']

    # Calculate the total weight as a combination of all factors
    total_weight = ftype_weight + fcode_weight

    # Ensure valid and positive weight, rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and strip unnecessary fields

def apply_weights_and_filter(gdf):
    print("Starting weight calculation for each courthouse...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_courthouse_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weight calculation, filtering null geometries, and field stripping completed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure the final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0

    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Courthouses_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Major State Government Buildings
**Dataset: Major State Government Buildings -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::major-state-government-buildings/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
naicscode_weights = {
    '921190': 1.000  # Government building NAICSCODE
}

fips_weights = {
    '48453': 0.400,  # Example FIPS codes with higher significance
    '41047': 0.350,
    '51760': 0.250,
    'Other': 0.050  # Default weight for other FIPS
}

agencies_weights = {
    'DEPARTMENT OF INFORMATION RESOURCES': 0.150,
    'DEPARTMENT OF TRANSPORTATION': 0.140,
    'DEPARTMENT OF EDUCATION': 0.135,
    'DEPARTMENT OF CORRECTIONS': 0.130,
    'SUPREME COURT': 0.125,
    'DEPARTMENT OF REVENUE': 0.120,
    'ATTORNEY GENERAL': 0.100,
    'Other': 0.100
}

num_agency_weights = {
    1: 0.500,
    2: 0.200,
    3: 0.150,
    4: 0.080,
    5: 0.050,
    'NOT AVAILABLE': 0.020
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'NAICSCODE': 0.350,
    'FIPS': 0.250,
    'AGENCIES': 0.250,
    'NUM_AGENCY': 0.150
}

# Step 2: Calculate the weight for each row

def calculate_state_gov_building_weight(row):
    # Calculate qualitative weights
    naicscode_weight = naicscode_weights.get(row.get('NAICSCODE', 'Other'), 0.0) * qualitative_importance_grades['NAICSCODE']
    fips_weight = fips_weights.get(row.get('FIPS', 'Other'), fips_weights['Other']) * qualitative_importance_grades['FIPS']
    agencies_weight = agencies_weights.get(row.get('AGENCIES', 'Other'), agencies_weights['Other']) * qualitative_importance_grades['AGENCIES']

    # Handle 'NUM_AGENCY', ensuring it's a valid integer or treat as 'NOT AVAILABLE'
    num_agency_value = row.get('NUM_AGENCY', 'NOT AVAILABLE')
    if num_agency_value.isdigit():
        num_agency_value = int(num_agency_value)
    else:
        num_agency_value = 'NOT AVAILABLE'

    num_agency_weight = num_agency_weights.get(num_agency_value, num_agency_weights['NOT AVAILABLE']) * qualitative_importance_grades['NUM_AGENCY']

    # Combine qualitative weights
    total_weight = (
        naicscode_weight +
        fips_weight +
        agencies_weight +
        num_agency_weight
    )

    # Ensure valid and positive weight, rounded to 4 decimals
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and keep only required fields
def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each state government building...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_state_gov_building_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated, null geometries filtered, and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/State_Government_Buildings_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### State Capital Buildings
**Dataset: State Capital Buildings -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::state-capitol-buildings-1/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
distribution_policy_weights = {
    'E4': 0.500,
    'E3': 0.300,
    'E2': 0.150,
    'Other': 0.050
}

data_security_weights = {
    5: 0.400,
    4: 0.300,
    3: 0.200,
    2: 0.100,
    1: 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'DISTRIBUTION_POLICY': 0.500,
    'DATA_SECURITY': 0.500
}

# Step 2: Calculate the weight for each row

def calculate_state_capitol_weight(row):
    # Calculate qualitative weights
    distribution_policy_weight = distribution_policy_weights.get(row['DISTRIBUTION_POLICY'], distribution_policy_weights['Other']) * qualitative_importance_grades['DISTRIBUTION_POLICY']
    data_security_weight = data_security_weights.get(row['DATA_SECURITY'], 0.0) * qualitative_importance_grades['DATA_SECURITY']

    # Combine qualitative weights
    total_weight = distribution_policy_weight + data_security_weight

    # Ensure valid and positive weight, rounded to 4 decimals
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and keep only required fields
def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each state capitol building...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_state_capitol_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated, null geometries filtered, and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/State_Capitol_Buildings_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### US Army Corp of Engineers Offices
**Dataset: US Army Corps of Engineers (USACE) Offices -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::us-army-corps-of-engineers-usace-offices-/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights for HQ_DIVISIO
hq_divisio_weights = {
    'Labs & Centers': 0.150,
    'CELRD': 0.150,
    'CENAD': 0.130,
    'CEMVD': 0.130,
    'CESAD': 0.110,
    'CENWD': 0.110,
    'CEPOD': 0.080,
    'CESPD': 0.080,
    'CESWD': 0.080,
    'HQUSACE': 0.040,
    'CETAD': 0.030,
    '249TH': 0.010
}

# Define qualitative attribute weights for TIME_ZONE
time_zone_weights = {
    'Eastern': 0.400,
    'Central': 0.350,
    'Pacific': 0.120,
    'Hawaii': 0.030,
    'Afghanistan': 0.010,
    'Alaska': 0.010,
    'Mountain': 0.010,
    'Korea': 0.010,
    'Japan': 0.010,
    'Other': 0.010
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'HQ_DIVISIO': 0.650,
    'TIME_ZONE': 0.350
}

# Step 2: Calculate the weight for each row

def calculate_usace_office_weight(row):
    # Calculate qualitative weights
    hq_divisio_weight = hq_divisio_weights.get(row.get('HQ_DIVISIO', 'Other'), 0.0) * qualitative_importance_grades['HQ_DIVISIO']
    time_zone_weight = time_zone_weights.get(row.get('TIME_ZONE', 'Other'), 0.0) * qualitative_importance_grades['TIME_ZONE']

    # Combine qualitative weights
    total_weight = hq_divisio_weight + time_zone_weight

    # Ensure the total weight is positive and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and keep only necessary fields
def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each USACE office...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_usace_office_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated, null geometries filtered, and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/USACE_Offices_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Healthcare

### Pharmacies
**Dataset: Pharmacies -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::pharmacies-/about

In [None]:
import geopandas as gpd
import pandas as pd
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
ent_type_weights = {
    '2': 1.000,  # Adjusted to match the example data ('ENT_TYPE': '2')
    '3': 0.000
}

provid_11_weights = {
    'WAL-MART PHARMACY': 0.300,
    'CVS PHARMACY': 0.280,
    'TARGET PHARMACY': 0.140,
    'KROGER PHARMACY': 0.110,
    'SAVON PHARMACY': 0.050,
    'SAMS PHARMACY': 0.040,
    'Other': 0.000  # Default case
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'ENT_TYPE': 0.500,
    'PROVID_11': 0.500
}

# Step 2: Calculate the weight for each row
def calculate_pharmacy_weight(row):
    # Calculate qualitative weights
    ent_type_weight = ent_type_weights.get(row.get('ENT_TYPE', '3'), 0.0) * qualitative_importance_grades['ENT_TYPE']
    provid_11_weight = provid_11_weights.get(row.get('PROVID_11', 'Other'), 0.0) * qualitative_importance_grades['PROVID_11']

    # Combine qualitative weights
    total_weight = ent_type_weight + provid_11_weight

    # Ensure the weight is a float and round to 4 decimal places
    return round(total_weight, 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_pharmacies_dataset(gdf):
    print("Starting to calculate weights for each pharmacy...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_pharmacy_weight, axis=1)

    # Keep only 'Weight' and 'geometry'
    gdf = gdf[['geometry', 'Weight']]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_path):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Apply weights
    gdf_weighted = apply_weights_to_pharmacies_dataset(gdf)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')
    print("Weighted dataset saved successfully.")

# Define paths
input_path = "/geoJSON/cleaned/Pharmacies_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Output file path
output_path = os.path.join(output_dir, "Pharmacies_Weighted_cleaned.geojson")

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_path)


### Dialysis Centers
**Dataset: Dialysis Centers -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::dialysis-centers/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'Dialysis Center': 1.000
}

status_weights = {
    'open': 0.999,
    'unknown': 0.001
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'Type_': 0.500,
    'Status': 0.500
}

# Step 2: Calculate the weight for each row

def calculate_dialysis_weight(row):
    # Calculate qualitative weights
    type_weight = type_weights.get(row.get('Type_', 'Unknown'), 0.0) * qualitative_importance_grades['Type_']
    status_weight = status_weights.get(row.get('Status', 'Unknown'), 0.0) * qualitative_importance_grades['Status']

    # Combine qualitative weights
    total_weight = type_weight + status_weight

    # Ensure the total weight is positive and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights, remove features with null geometry, and keep only necessary fields
def apply_weights_and_filter(gdf):
    print("Starting to calculate weights for each dialysis center...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_dialysis_weight, axis=1)

    # Filter out rows with null geometry
    gdf_filtered = gdf[gdf.geometry.notnull()].copy()

    # Keep only the 'Weight' and 'geometry' fields
    fields_to_keep = ['Weight', 'geometry']
    gdf_stripped = gdf_filtered[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated, null geometries filtered, and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_stripped

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights, filter null geometries, and strip unnecessary fields
    gdf_weighted = apply_weights_and_filter(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Dialysis_Centers_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Hospitals
**Dataset: Hospitals -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::hospitals/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'GENERAL ACUTE CARE': 0.500,
    'CRITICAL ACCESS': 0.300,
    'PSYCHIATRIC': 0.200,
    'Other': 0.000  # This will handle other types not explicitly rated
}

status_weights = {
    'OPEN': 0.950,
    'CLOSED': 0.050
}

naics_weights = {
    622110: 0.800,
    622310: 0.100,
    622210: 0.100
}

trauma_weights = {
    'LEVEL I': 0.300,
    'LEVEL II': 0.200,
    'LEVEL III': 0.300,
    'LEVEL IV': 0.300,
    'TRH': 0.200,
    'TRF': 0.200,
    'CTH': 0.200,
    'NOT AVAILABLE': 0.200
}

helipad_weights = {
    'Y': 0.700,
    'N': 0.300
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.300,
    'STATUS': 0.250,
    'NAICS_CODE': 0.200,
    'TRAUMA': 0.150,
    'HELIPAD': 0.100
}

# Define quantitative attribute weights
quantitative_weights = {
    'BEDS': 1.000  # Since this is the only quantitative attribute, it gets full weight
}

# Step 2: Calculate the weight for each row

def calculate_hospital_weight(row):
    # Calculate qualitative weights
    type_weight = type_weights.get(row.get('TYPE', 'Other'), type_weights['Other']) * qualitative_importance_grades['TYPE']
    status_weight = status_weights.get(row.get('STATUS', 'Other'), 0.0) * qualitative_importance_grades['STATUS']
    naics_weight = naics_weights.get(row.get('NAICS_CODE', 0), 0.0) * qualitative_importance_grades['NAICS_CODE']
    trauma_weight = trauma_weights.get(row.get('TRAUMA', 'NOT AVAILABLE'), 0.0) * qualitative_importance_grades['TRAUMA']
    helipad_weight = helipad_weights.get(row.get('HELIPAD', 'N'), 0.0) * qualitative_importance_grades['HELIPAD']

    # Calculate quantitative weights
    beds_weight = (row['BEDS'] / np.nanmax(row['BEDS'])) * quantitative_weights['BEDS'] if pd.notna(row['BEDS']) and row['BEDS'] > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = type_weight + status_weight + naics_weight + trauma_weight + helipad_weight + beds_weight

    # Ensure the weight is non-negative, positive, and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights and remove extra fields

def apply_weights_and_cleanup(gdf):
    print("Starting to calculate weights for each hospital...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_hospital_weight, axis=1)

    # Remove all fields except for 'Weight' and 'geometry'
    gdf = gdf[['Weight', 'geometry']]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated and fields removed. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_path):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and clean up fields
    gdf_weighted = apply_weights_and_cleanup(gdf)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Hospitals_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"
output_file_name = "Hospitals_Weighted.geojson"
output_path = os.path.join(output_dir, output_file_name)

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_path)


### Public Health Departments
**Dataset: Public Health Departments -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::public-health-departments/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
hsipthemes_weights = {
    'CRITICAL INFRASTRUCTURE, PDD-63; PUBLIC HEALTH; HEALTH SERVICES; PUBLIC HEALTH OFFICES, STATE AND LOCAL': 1.000
}

govt_level_weights = {
    'LOCAL': 0.800,
    'STATE': 0.150,
    'TRIBAL': 0.030,
    'OTHER': 0.020
}

st_vendor_weights = {
    'NAVTEQ': 0.990,
    'TGS': 0.010,
    'GU_GOV': 0.000,
    'MP_GOV': 0.000,
    'AS_GOV': 0.000
}

st_version_weights = {
    '2009Q2': 1.000,
    '2006': 0.000,
    '2007': 0.000,
    '1990': 0.000
}

sdr_weights = {
    'YES': 0.950,
    'NO': 0.050,
    'Unspecified': 0.000
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'HSIPTHEMES': 0.300,
    'GOVT_LEVEL': 0.250,
    'ST_VENDOR': 0.200,
    'ST_VERSION': 0.150,
    'SDR': 0.100
}

# Define quantitative attribute weights
quantitative_weights = {
    'TOT_STAFF': 1.000  # Adjusting to give some impact for this quantitative measure
}

# Step 2: Calculate the weight for each row
def calculate_health_dept_weight(row):
    # Calculate qualitative weights
    hsipthemes_weight = hsipthemes_weights.get(row.get('HSIPTHEMES', ''), 0.0) * qualitative_importance_grades['HSIPTHEMES']
    govt_level_weight = govt_level_weights.get(row.get('GOVT_LEVEL', 'OTHER'), 0.0) * qualitative_importance_grades['GOVT_LEVEL']
    st_vendor_weight = st_vendor_weights.get(row.get('ST_VENDOR', ''), 0.0) * qualitative_importance_grades['ST_VENDOR']
    st_version_weight = st_version_weights.get(row.get('ST_VERSION', ''), 0.0) * qualitative_importance_grades['ST_VERSION']
    sdr_weight = sdr_weights.get(row.get('SDR', 'Unspecified'), 0.0) * qualitative_importance_grades['SDR']

    # Calculate quantitative weights
    tot_staff = row.get('TOT_STAFF', 0) if pd.notna(row.get('TOT_STAFF')) else 0
    max_tot_staff = max(1, tot_staff)  # Prevent division by zero
    tot_staff_weight = (tot_staff / max_tot_staff) * quantitative_weights['TOT_STAFF'] if max_tot_staff > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        hsipthemes_weight +
        govt_level_weight +
        st_vendor_weight +
        st_version_weight +
        sdr_weight +
        tot_staff_weight
    )

    # Ensure the weight is positive, non-null, and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset and filter unnecessary fields
def apply_weights_to_health_dept_dataset(gdf):
    print("Starting to calculate weights for each public health department...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_health_dept_weight, axis=1)

    # Keep only relevant fields (in this case, 'Weight' and 'geometry')
    fields_to_keep = ['Weight', 'geometry']
    gdf_filtered = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_filtered

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and filter unnecessary fields
    gdf_weighted = apply_weights_to_health_dept_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Public_Health_Departments_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Urgent Care Facilities

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
hsipthemes_weights = {
    'CRITICAL INFRASTRUCTURE, PDD-63; PUBLIC HEALTH; PRIMARY CARE FACILITIES (INCLUDING HOSPITALS); AMBULATORY SURGICAL FACILITIES': 1.000
}

st_vendor_weights = {
    'NAVTEQ': 0.990,
    'TGS': 0.010,
    'GU_GOV': 0.000,
    'MP_GOV': 0.000,
    'AS_GOV': 0.000
}

st_version_weights = {
    '2008Q1': 1.000,
    '2006': 0.000,
    '2007': 0.000,
    '1990': 0.000
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'HSIPTHEMES': 0.500,
    'ST_VENDOR': 0.300,
    'ST_VERSION': 0.200
}

# No quantitative attributes in this case, but you can adjust if needed
quantitative_weights = {}

# Step 2: Calculate the weight for each row
def calculate_urgent_care_weight(row):
    # Calculate qualitative weights
    hsipthemes_weight = hsipthemes_weights.get(row.get('HSIPTHEMES', ''), 0.0) * qualitative_importance_grades['HSIPTHEMES']
    st_vendor_weight = st_vendor_weights.get(row.get('ST_VENDOR', ''), 0.0) * qualitative_importance_grades['ST_VENDOR']
    st_version_weight = st_version_weights.get(row.get('ST_VERSION', ''), 0.0) * qualitative_importance_grades['ST_VERSION']

    # Combine qualitative and quantitative weights
    total_weight = (
        hsipthemes_weight +
        st_vendor_weight +
        st_version_weight
    )

    # Ensure the weight is positive, non-null, and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset and filter unnecessary fields
def apply_weights_to_urgent_care_dataset(gdf):
    print("Starting to calculate weights for each urgent care facility...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_urgent_care_weight, axis=1)

    # Keep only relevant fields (in this case, 'Weight' and 'geometry')
    fields_to_keep = ['Weight', 'geometry']
    gdf_filtered = gdf[fields_to_keep]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated and unnecessary fields removed in {elapsed_time:.2f} seconds.")

    return gdf_filtered

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights and filter unnecessary fields
    gdf_weighted = apply_weights_to_urgent_care_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Urgent_Care_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### VA Facilities

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights for NAICSCODE
naicscode_weights = {
    '62149': 0.700,   # Outpatient Care Centers
    '622110': 0.250,  # General Medical and Surgical Hospitals
    '623110': 0.040,  # Nursing Care Facilities
    '622310': 0.010   # Specialty Hospitals
}

# Define qualitative attribute weights for PRIM_SVC
prims_svc_weights = {
    'CBOC': 0.500,  # Community Based Outpatient Clinic
    'VCTR': 0.300,  # Veterans Center
    'VAMC': 0.150,  # Veterans Affairs Medical Center
    'VANH': 0.030,  # Veterans Affairs Nursing Home
    'IOC': 0.015,   # Independent Outpatient Clinic
    'RRTP': 0.005   # Residential Rehabilitation Treatment Program
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'NAICSCODE': 0.600,
    'PRIM_SVC': 0.400
}

# Step 2: Calculate the weight for each row
def calculate_va_facility_weight(row):
    # Ensure no missing values by using defaults
    naicscode_weight = naicscode_weights.get(row.get('NAICSCODE', ''), 0.0) * qualitative_importance_grades['NAICSCODE']
    prim_svc_weight = prims_svc_weights.get(row.get('PRIM_SVC', ''), 0.0) * qualitative_importance_grades['PRIM_SVC']

    # Combine qualitative weights
    total_weight = naicscode_weight + prim_svc_weight

    # Ensure the weight is positive, non-null, and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_va_facilities_dataset(gdf):
    print("Starting to calculate weights for each VA Medical Facility...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_va_facility_weight, axis=1)

    # Remove all fields except 'Weight' and 'geometry'
    gdf = gdf[['Weight', 'geometry']]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_va_facilities_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/VA_Medical_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Industrial

### Fortune 500 Headquarters
**Dataset: Fortune 500 Corporate Headquarters -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::fortune-500-corporate-headquarters/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define the weights for quantitative attributes
quantitative_weights = {
    'RANK': 0.300,       # Lower ranks (e.g., 1) are more important
    'EMPLOYEES': 0.250,  # More employees indicate a larger, more significant company
    'REVENUES': 0.300,   # Higher revenue is more critical
    'PROFIT': 0.150      # Higher profit indicates financial success and importance
}

# Step 2: Calculate the weight for each row
def calculate_fortune500_weight(row, max_employees, max_revenues, max_profit):
    # Ensure all fields are present and non-null
    rank = row.get('RANK', 0) if pd.notna(row.get('RANK', 0)) else 0
    employees = row.get('EMPLOYEES', 0) if pd.notna(row.get('EMPLOYEES', 0)) else 0
    revenues = row.get('REVENUES', 0) if pd.notna(row.get('REVENUES', 0)) else 0
    profit = row.get('PROFIT', 0) if pd.notna(row.get('PROFIT', 0)) else 0

    # Calculate quantitative weights
    rank_weight = ((500 - rank) / 499) * quantitative_weights['RANK'] if rank > 0 else 0
    employees_weight = (employees / max(1, max_employees)) * quantitative_weights['EMPLOYEES'] if employees > 0 else 0
    revenues_weight = (revenues / max(1, max_revenues)) * quantitative_weights['REVENUES'] if revenues > 0 else 0
    profit_weight = (profit / max(1, max_profit)) * quantitative_weights['PROFIT'] if profit > 0 else 0

    # Combine quantitative weights
    total_weight = rank_weight + employees_weight + revenues_weight + profit_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_fortune500_dataset(gdf):
    print("Starting to calculate weights for each company...")
    start_time = time.time()

    # Get the maximum values for scaling
    max_employees = np.nanmax(gdf['EMPLOYEES'])
    max_revenues = np.nanmax(gdf['REVENUES'])
    max_profit = np.nanmax(gdf['PROFIT'])

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_fortune500_weight, axis=1, args=(max_employees, max_revenues, max_profit))

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_fortune500_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Fortune_500_HQ_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Manufacturing
**Dataset: General Manufacturing Facilities -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::general-manufacturing-facilities/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
product_weights = {
    'NEWSPAPER PUBLISHING': 0.400,
    'COMMERCIAL PRINTING': 0.350,
    'READY-MIXED CONCRETE': 0.250
}

sic_weights = {
    3089: 0.400,
    3599: 0.350,
    2759: 0.250
}

sic2_weights = {
    3599: 0.450,
    3089: 0.350,
    3499: 0.200
}

sic3_weights = {
    3599: 0.600,
    3089: 0.400
}

sic4_weights = {
    3: 0.500,
    1: 0.300,
    3599: 0.200
}

naics_weights = {
    332710: 0.600,
    323119: 0.400
}

naics_descr_weights = {
    'NOT AVAILABLE': 0.200,
    'MACHINE SHOPS': 0.400,
    'OTHER COMMERCIAL PRINTING': 0.400
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'PRODUCT': 0.200,
    'SIC': 0.150,
    'SIC2': 0.150,
    'SIC3': 0.100,
    'SIC4': 0.100,
    'NAICS': 0.150,
    'NAICSDESCR': 0.150
}

# Define quantitative attribute weights
quantitative_weights = {
    'EMP': 0.500  # The number of employees is very important
}

# Step 2: Calculate the weight for each row

def calculate_manufacturing_weight(row, max_emp):
    # Calculate qualitative weights
    product_weight = product_weights.get(row.get('PRODUCT', ''), 0.0) * qualitative_importance_grades['PRODUCT']
    sic_weight = sic_weights.get(row.get('SIC', ''), 0.0) * qualitative_importance_grades['SIC']
    sic2_weight = sic2_weights.get(row.get('SIC2', ''), 0.0) * qualitative_importance_grades['SIC2']
    sic3_weight = sic3_weights.get(row.get('SIC3', ''), 0.0) * qualitative_importance_grades['SIC3']
    sic4_weight = sic4_weights.get(row.get('SIC4', ''), 0.0) * qualitative_importance_grades['SIC4']
    naics_weight = naics_weights.get(row.get('NAICS', ''), 0.0) * qualitative_importance_grades['NAICS']
    naics_descr_weight = naics_descr_weights.get(row.get('NAICSDESCR', ''), 0.0) * qualitative_importance_grades['NAICSDESCR']

    # Calculate quantitative weights
    emp = row.get('EMP', 0) if pd.notna(row.get('EMP', 0)) else 0
    emp_weight = (emp / max_emp) * quantitative_weights['EMP'] if emp > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        product_weight +
        sic_weight +
        sic2_weight +
        sic3_weight +
        sic4_weight +
        naics_weight +
        naics_descr_weight +
        emp_weight
    )

    # Ensure the weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_manufacturing_dataset(gdf):
    print("Starting to calculate weights for each manufacturing facility...")
    start_time = time.time()

    # Get the maximum EMP value for scaling
    max_emp = max(1, gdf['EMP'].max())  # Prevent division by zero

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_manufacturing_weight, axis=1, args=(max_emp,))

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_manufacturing_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Manufacturing_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Military

### Military Installations
**Dataset: Military Installations, Ranges, and Training Areas (MIRTA) DoD Sites - Points -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::military-installations-ranges-and-training-areas-mirta-dod-sites-points/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
site_reporting_component_weights = {
    'usn': 0.250,
    'armyNationalGuard': 0.230,
    'usaf': 0.200,
    'usa': 0.150,
    'airNationalGuard': 0.100,
    'Summary for Smaller Components': 0.070
}

site_operational_status_weights = {
    'act': 5.000,
    'clsd': 0.050,
    'care': 0.040,
    'Summary for Other Statuses': 0.010
}

is_joint_base_weights = {
    'Yes': 0.600,
    'No': 0.400
}

is_firrma_site_weights = {
    'Yes': 0.600,
    'No': 0.400
}

is_cui_weights = {
    'Yes': 0.300,
    'No': 0.700
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'siteReportingComponent': 0.300,
    'siteOperationalStatus': 0.400,
    'isJointBase': 0.100,
    'isFirrmaSite': 0.100,
    'isCui': 0.100
}

# Step 2: Calculate the weight for each row

def calculate_military_installation_weight(row):
    # Calculate qualitative weights, setting missing fields to 0
    site_reporting_weight = site_reporting_component_weights.get(row.get('siteReportingComponent', ''), 0.0) * qualitative_importance_grades['siteReportingComponent']
    operational_status_weight = site_operational_status_weights.get(row.get('siteOperationalStatus', ''), 0.0) * qualitative_importance_grades['siteOperationalStatus']
    joint_base_weight = is_joint_base_weights.get(row.get('isJointBase', ''), 0.0) * qualitative_importance_grades['isJointBase']
    firrma_site_weight = is_firrma_site_weights.get(row.get('isFirrmaSite', ''), 0.0) * qualitative_importance_grades['isFirrmaSite']
    cui_weight = is_cui_weights.get(row.get('isCui', ''), 0.0) * qualitative_importance_grades['isCui']

    # Combine qualitative weights and round to 4 decimal places
    total_weight = round((
        site_reporting_weight +
        operational_status_weight +
        joint_base_weight +
        firrma_site_weight +
        cui_weight
    ), 4)

    # Ensure the weight is positive
    return max(total_weight, 0.0)

# Step 3: Apply the weights to the dataset

def apply_weights_to_military_installations_dataset(gdf):
    print("Starting to calculate weights for each military installation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_military_installation_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf[['Weight', 'geometry']]  # Keep only 'Weight' and 'geometry' columns

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_military_installations_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Military_Installations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Mines

### Agricultural Minerals Operations
**Dataset: Agricultural Minerals Operations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::agricultural-minerals-operations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'SULFUR': 0.400,
    'PEAT': 0.200,
    'PHOSPHATE': 0.150,
    'VERMICULITE': 0.100,
    'MAGNESIUM COMPOUNDS': 0.075,
    'POTASH': 0.075
}

plant_min_weights = {
    'P': 0.600,   # Phosphate
    'M': 0.260,   # Magnesium
    'M/P': 0.140  # Magnesium/Phosphate
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.600,
    'PLANT_MIN': 0.400
}

# Step 2: Calculate the weight for each row
def calculate_mineral_weight(row):
    # Ensure all fields are present and non-null
    commodity = row.get('COMMODITY', '') if pd.notna(row.get('COMMODITY')) else ''
    plant_min = row.get('PLANT_MIN', '') if pd.notna(row.get('PLANT_MIN')) else ''

    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITY']
    plant_min_weight = plant_min_weights.get(plant_min, 0.0) * qualitative_importance_grades['PLANT_MIN']

    # Combine qualitative weights
    total_weight = commodity_weight + plant_min_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_mineral_operations_dataset(gdf):
    print("Starting to calculate weights for each operation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_mineral_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_mineral_operations_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Agricultural_Minerals_Operations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Construction Minerals Operations
**Dataset: Construction Minerals Operations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::construction-minerals-operations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights for COMMODITY
commodity_weights = {
    "COMMON CLAY AND SHALE": 0.350,
    "DIMENSION STONE": 0.250,
    "CEMENT": 0.200,
    "PERLITE": 0.100,
    "GYPSUM": 0.050,
    "BALL CLAY": 0.030,
    "MICA": 0.010,
    "PUMICE": 0.010
}

# Define qualitative attribute weights for PLANT_MIN
plant_min_weights = {
    "M/P": 0.400,
    "M": 0.350,
    "P": 0.250
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.650,
    'PLANT_MIN': 0.350
}

# Step 2: Calculate the weight for each row
def calculate_construction_minerals_weight(row):
    # Handle missing or null values and ensure all fields have valid data
    commodity_weight = commodity_weights.get(row.get('COMMODITY', ''), 0.0) * qualitative_importance_grades['COMMODITY']
    plant_min_weight = plant_min_weights.get(row.get('PLANT_MIN', ''), 0.0) * qualitative_importance_grades['PLANT_MIN']

    # Combine qualitative weights (no quantitative weights provided)
    total_weight = commodity_weight + plant_min_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_construction_minerals_dataset(gdf):
    print("Starting to calculate weights for each construction mineral operation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_construction_minerals_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_construction_minerals_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Construction_Minerals_Operations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Ferrous Metal Mines
**Dataset: Ferrous Metal Mines -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::ferrous-metal-mines/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'Iron': 0.350,
    'Molybdenum': 0.300,
    'Cobalt': 0.200,
    'Rhenium': 0.100,
    'Nickel': 0.050
}

plant_mine_weights = {
    'M/P': 0.600,
    'M': 0.300,
    'P': 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.700,
    'PLANT_MINE': 0.300
}

# Step 2: Calculate the weight for each row
def calculate_mine_weight(row):
    # Ensure all fields are present and non-null
    commodity = row.get('COMMODITY', '') if pd.notna(row.get('COMMODITY', '')) else ''
    plant_mine = row.get('PLANT_MINE', '') if pd.notna(row.get('PLANT_MINE', '')) else ''

    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITY']
    plant_mine_weight = plant_mine_weights.get(plant_mine, 0.0) * qualitative_importance_grades['PLANT_MINE']

    # Combine qualitative weights
    total_weight = commodity_weight + plant_mine_weight

    # Ensure the weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_mines_dataset(gdf):
    print("Starting to calculate weights for each mine...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_mine_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_mines_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Ferrous_Metal_Mines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Ferrous Metal Processing Plants
**Dataset: Ferrous Metal Processing Plants -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::ferrous-metal-processing-plants/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'Iron': 0.300,
    'Molybdenum': 0.250,
    'Silicon': 0.200,
    'Cobalt': 0.150,
    'Rhenium': 0.050,
    'Columbium and(or) tantalum': 0.030,
    'Nickel': 0.030,
    'Tungsten': 0.030,
    'Chromium': 0.020,
    'Columbium': 0.020,
    'Manganese': 0.020
}

plant_mine_weights = {
    'P': 0.500,
    'M/P': 0.350,
    'M': 0.150
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.650,
    'PLANT_MINE': 0.350
}

# Step 2: Calculate the weight for each row
def calculate_ferrous_plant_weight(row):
    # Ensure all fields are present and non-null
    commodity = row.get('COMMODITY', '') if pd.notna(row.get('COMMODITY', '')) else ''
    plant_mine = row.get('PLANT_MINE', '') if pd.notna(row.get('PLANT_MINE', '')) else ''

    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITY']
    plant_mine_weight = plant_mine_weights.get(plant_mine, 0.0) * qualitative_importance_grades['PLANT_MINE']

    # Combine qualitative weights
    total_weight = commodity_weight + plant_mine_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_ferrous_plants_dataset(gdf):
    print("Starting to calculate weights for each ferrous metal processing plant...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_ferrous_plant_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_ferrous_plants_dataset(gdf)

    # Drop all fields except 'Weight' and 'geometry'
    gdf_weighted = gdf_weighted[['Weight', 'geometry']]

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Ferrous_Metal_Processing_Plants_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Mines and Mineral Resources
**Dataset: Mines and Mineral Resources -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::mines-and-mineral-resources/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
naicscode_weights = {
    '212111': 0.300,
    '212112': 0.250,
    '212311': 0.200,
    '212325': 0.150,
    '212399': 0.100
}

mine_type_weights = {
    '12': 0.250,
    '11': 0.200,
    '06': 0.150,
    '05': 0.150,
    '04': 0.100,
    'Others': 0.150
}

stat_code_weights = {
    'A': 0.500,
    '1': 0.250,
    '2': 0.150,
    'Others': 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'NAICSCODE': 0.400,
    'MINE_TYPE': 0.350,
    'STAT_CODE': 0.250
}

# Step 2: Calculate the weight for each row
def calculate_mine_weight(row):
    # Ensure all fields are present and non-null
    naicscode = row.get('NAICSCODE', 'Unknown')
    mine_type = row.get('MINE_TYPE', 'Others')
    stat_code = row.get('STAT_CODE', 'Others')

    # Calculate qualitative weights
    naicscode_weight = naicscode_weights.get(naicscode, 0.0) * qualitative_importance_grades['NAICSCODE']
    mine_type_weight = mine_type_weights.get(mine_type, mine_type_weights['Others']) * qualitative_importance_grades['MINE_TYPE']
    stat_code_weight = stat_code_weights.get(stat_code, stat_code_weights['Others']) * qualitative_importance_grades['STAT_CODE']

    # Combine qualitative weights
    total_weight = naicscode_weight + mine_type_weight + stat_code_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_mines_dataset(gdf):
    print("Starting to calculate weights for each mining operation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_mine_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_mines_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Mines_and_Mineral_Resources_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Nonferrous Metal Mines
**Dataset: Nonferrous Metal Mines -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::nonferrous-metal-mines/explore

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'Silver': 0.300,
    'Gold': 0.250,
    'Copper': 0.200,
    'Zinc': 0.100,
    'Lead': 0.080,
    'Beryllium': 0.035,
    'Germanium': 0.035
}

plant_mine_weights = {
    'M': 0.600,  # Mine
    'M/P': 0.250,  # Mine/Processing Plant
    'P': 0.150  # Processing Plant
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.700,
    'PLANT_MINE': 0.300
}

# Step 2: Calculate the weight for each row

def calculate_mine_weight(row):
    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(row['COMMODITY'], 0.0) * qualitative_importance_grades['COMMODITY']
    plant_mine_weight = plant_mine_weights.get(row['PLANT_MINE'], 0.0) * qualitative_importance_grades['PLANT_MINE']

    # Combine qualitative weights (since no quantitative data is provided)
    total_weight = commodity_weight + plant_mine_weight

    # Ensure the weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset and remove fields

def apply_weights_to_mines_dataset(gdf):
    print("Starting to calculate weights for each mine...")
    start_time = time.time()

    # Initialize progress bar
    progress_bar = tqdm(total=len(gdf), desc="Processing rows", unit="rows")

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_mine_weight, axis=1)

    progress_bar.update(1)
    progress_bar.close()

    # Drop all columns except 'Weight' and 'geometry'
    gdf = gdf[['Weight', 'geometry']]

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_mines_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Nonferrous_Metal_Mines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Nonferrous Metal Processing Plants
**Dataset: Nonferrous Metal Processing Plants -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::nonferrous-metal-processing-plants/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'ALUMINUM': 0.300,
    'COPPER': 0.250,
    'IRON': 0.150,
    'SILVER': 0.120,
    'GOLD': 0.080,
    'RHENIUM': 0.030,
    'ANTIMONY': 0.020,
    'LEAD': 0.020,
    'TITANIUM METAL': 0.020,
    'ZINC': 0.020,
    'BERYLLIUM': 0.010,
    'CADMIUM': 0.010,
    'MAGNESIUM METAL': 0.010,
    'SELENIUM': 0.010,
    'STRONTIUM': 0.010
}

plant_min_weights = {
    'P': 0.630,
    'M/P': 0.370
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.700,
    'PLANT_MIN': 0.300
}

# Step 2: Calculate the weight for each row

def calculate_metal_processing_plant_weight(row):
    # Ensure all fields are present and non-null
    commodity = row.get('COMMODITY', 'Other') if pd.notna(row.get('COMMODITY', 'Other')) else 'Other'
    plant_min = row.get('PLANT_MIN', 'Other') if pd.notna(row.get('PLANT_MIN', 'Other')) else 'Other'

    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITY']
    plant_min_weight = plant_min_weights.get(plant_min, 0.0) * qualitative_importance_grades['PLANT_MIN']

    # Combine qualitative weights
    total_weight = commodity_weight + plant_min_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_metal_processing_plants_dataset(gdf):
    print("Starting to calculate weights for each plant...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_metal_processing_plant_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_metal_processing_plants_dataset(gdf)

    # Keep only 'Weight' and 'geometry' columns
    gdf_weighted = gdf_weighted[['Weight', 'geometry']]

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Nonferrous_Metal_Processing_Plants_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Refractory Abrasive and Other Industrial Mineral Operations
**Dataset: Refractory Abrasive and Other Industrial Mineral Operations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::refractory-abrasive-and-other-industrial-mineral-operations/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'Gemstones': 0.250,
    'Bentonite': 0.200,
    'Talc': 0.150,
    'Zeolites': 0.100,
    'Silica': 0.080,
    'Feldspar': 0.070,
    'Diatomite': 0.050,
    'Boron': 0.040,
    'Pyrophyllite': 0.020,
    'Zircon': 0.020,
    'Garnet': 0.015,
    'Olivine': 0.015,
    'Wollastonite': 0.015,
    'Kyanite': 0.010,
    'Trona': 0.010
}

plant_min_weights = {
    'M': 0.600,
    'M/P': 0.250,
    'P': 0.150
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.700,
    'PLANT_MIN': 0.300
}

# Step 2: Calculate the weight for each row
def calculate_mineral_operations_weight(row):
    # Ensure all fields are present and non-null
    commodity = row.get('COMMODITY', '') if pd.notna(row.get('COMMODITY', '')) else ''
    plant_min = row.get('PLANT_MIN', '') if pd.notna(row.get('PLANT_MIN', '')) else ''

    # Calculate qualitative weights
    commodity_weight = commodity_weights.get(commodity, 0.0) * qualitative_importance_grades['COMMODITY']
    plant_min_weight = plant_min_weights.get(plant_min, 0.0) * qualitative_importance_grades['PLANT_MIN']

    # Combine qualitative weights
    total_weight = commodity_weight + plant_min_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_mineral_operations_dataset(gdf):
    print("Starting to calculate weights for each operation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_mineral_operations_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_mineral_operations_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Industrial_Mineral_Operations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Sand and Gravel Operations
**Dataset: Sand and Gravel Operations -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::sand-and-gravel-operations-1/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
commodity_weights = {
    'SAND & GRAVEL': 1.000
}

oper_type_weights = {
    'OPEN PIT': 0.600,
    'DREDGE': 0.300,
    'NOT AVAILABLE': 0.050,
    'SALES YARD': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COMMODITY': 0.500,
    'OPER_TYPE': 0.500
}

# Step 2: Calculate the weight for each row

def calculate_sand_gravel_weight(row):
    # Ensure that missing values are handled as 0
    commodity_weight = commodity_weights.get(row.get('COMMODITY', 'NOT AVAILABLE'), 0.0) * qualitative_importance_grades['COMMODITY']
    oper_type_weight = oper_type_weights.get(row.get('OPER_TYPE', 'NOT AVAILABLE'), 0.0) * qualitative_importance_grades['OPER_TYPE']

    # Combine qualitative weights
    total_weight = commodity_weight + oper_type_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_sand_gravel_dataset(gdf):
    print("Starting to calculate weights for each operation...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_sand_gravel_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_sand_gravel_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Sand_and_Gravel_Operations_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Uranium and Vanadium Deposits
**Dataset: Uranium and Vanadium Deposits -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::uranium-and-vanadium-deposits/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
com_major_weights = {
    "Uranium": 0.500,
    "Uranium, Vanadium": 0.200,
    "Not Available": 0.150,
    "Other": 0.150
}

dev_stat_weights = {
    "Past Producer": 0.400,
    "Occurrence": 0.300,
    "Producer": 0.200,
    "Prospect": 0.100,
    "Other": 0.000
}

ore_weights = {
    "Uraninite": 0.600,
    "Uraninite, Uranophane": 0.200,
    "Other": 0.200
}

orebody_fm_weights = {
    "Irregular": 0.300,
    "Tabular": 0.250,
    "Not Available": 0.200,
    "Other": 0.250
}

work_type_weights = {
    "Underground": 0.400,
    "Surface": 0.300,
    "Not Available": 0.200,
    "Surface/Underground": 0.100,
    "Other": 0.000
}

model_weights = {
    "Not Available": 0.800,
    "Other": 0.200
}

hrock_unit_weights = {
    "Chinle Formation, Moss Back Member": 0.300,
    "Mount Holly Complex": 0.250,
    "Not Available": 0.200,
    "Other": 0.250
}

hrock_type_weights = {
    "Sandstone": 0.300,
    "Mudstone, Sandstone": 0.250,
    "Not Available": 0.200,
    "Other": 0.250
}

arock_unit_weights = {
    "Not Available": 0.900,
    "Other": 0.100
}

arock_type_weights = {
    "Not Available": 0.900,
    "Other": 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'COM_MAJOR': 0.150,
    'DEV_STAT': 0.150,
    'ORE': 0.200,
    'OREBODY_FM': 0.100,
    'WORK_TYPE': 0.100,
    'MODEL': 0.050,
    'HROCK_UNIT': 0.050,
    'HROCK_TYPE': 0.050,
    'AROCK_UNIT': 0.050,
    'AROCK_TYPE': 0.050
}

# Step 2: Calculate the weight for each row
def calculate_deposit_weight(row):
    # Calculate qualitative weights
    com_major_weight = com_major_weights.get(row.get('COM_MAJOR', 'Other'), com_major_weights['Other']) * qualitative_importance_grades['COM_MAJOR']
    dev_stat_weight = dev_stat_weights.get(row.get('DEV_STAT', 'Other'), dev_stat_weights['Other']) * qualitative_importance_grades['DEV_STAT']
    ore_weight = ore_weights.get(row.get('ORE', 'Other'), ore_weights['Other']) * qualitative_importance_grades['ORE']
    orebody_fm_weight = orebody_fm_weights.get(row.get('OREBODY_FM', 'Other'), orebody_fm_weights['Other']) * qualitative_importance_grades['OREBODY_FM']
    work_type_weight = work_type_weights.get(row.get('WORK_TYPE', 'Other'), work_type_weights['Other']) * qualitative_importance_grades['WORK_TYPE']
    model_weight = model_weights.get(row.get('MODEL', 'Other'), model_weights['Other']) * qualitative_importance_grades['MODEL']
    hrock_unit_weight = hrock_unit_weights.get(row.get('HROCK_UNIT', 'Other'), hrock_unit_weights['Other']) * qualitative_importance_grades['HROCK_UNIT']
    hrock_type_weight = hrock_type_weights.get(row.get('HROCK_TYPE', 'Other'), hrock_type_weights['Other']) * qualitative_importance_grades['HROCK_TYPE']
    arock_unit_weight = arock_unit_weights.get(row.get('AROCK_UNIT', 'Other'), arock_unit_weights['Other']) * qualitative_importance_grades['AROCK_UNIT']
    arock_type_weight = arock_type_weights.get(row.get('AROCK_TYPE', 'Other'), arock_type_weights['Other']) * qualitative_importance_grades['AROCK_TYPE']

    # Combine qualitative weights
    total_weight = (
        com_major_weight +
        dev_stat_weight +
        ore_weight +
        orebody_fm_weight +
        work_type_weight +
        model_weight +
        hrock_unit_weight +
        hrock_type_weight +
        arock_unit_weight +
        arock_type_weight
    )

    # Ensure weight is rounded to 4 decimal places and non-negative
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_deposits_dataset(gdf):
    print("Starting to calculate weights for each deposit...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_deposit_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only the 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_deposits_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Uranium_and_Vanadium_Deposits_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Transportation

### Airports
**Dataset: US Airports -** https://hub.arcgis.com/datasets/Aviation::us-airports/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
facility_use_code_weights = {
    'PR': 0.600,
    'PU': 0.400
}

ownership_type_code_weights = {
    'PR': 0.600,
    'PU': 0.250,
    'Other': 0.150  # Other includes MR, MA, MN, CG
}

region_code_weights = {
    'AGL': 0.250,
    'ASW': 0.250,
    'ASO': 0.250,
    'Other': 0.250  # Balance other regions
}

site_type_code_weights = {
    'A': 0.600,
    'H': 0.250,
    'Other': 0.150  # Other includes C, U, G, B
}

arpt_status_weights = {
    'O': 0.800,
    'CI': 0.100,
    'CP': 0.100
}

direction_code_weights = {
    'N': 0.111,
    'NE': 0.111,
    'E': 0.111,
    'SE': 0.111,
    'S': 0.111,
    'SW': 0.111,
    'W': 0.111,
    'NW': 0.111,
    'Other': 0.111  # Distribute evenly among all directions
}

fuel_types_weights = {
    '100LL': 0.600,
    'A': 0.400,
    'Other': 0.000  # Assume no additional importance for other types not mentioned
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'FACILITY_USE_CODE': 0.150,
    'OWNERSHIP_TYPE_CODE': 0.150,
    'REGION_CODE': 0.100,
    'SITE_TYPE_CODE': 0.150,
    'ARPT_STATUS': 0.150,
    'DIRECTION_CODE': 0.150,
    'FUEL_TYPES': 0.150
}

# Define quantitative attribute weights
quantitative_weights = {
    'ELEV': 0.200,
    'BASED_SINGLE_ENG': 0.150,
    'BASED_MULTI_ENG': 0.100,
    'BASED_JET_ENG': 0.150,
    'BASED_HEL': 0.100,
    'BASED_GLIDERS': 0.050,
    'BASED_MIL_ACFT': 0.150,
    'BASED_ULTRALGT_ACFT': 0.050,
    'COMMERCIAL_OPS': 0.250,
    'AIR_TAXI_OPS': 0.150,
    'LOCAL_OPS': 0.200,
    'ITNRNT_OPS': 0.200,
    'MIL_ACFT_OPS': 0.150
}

# Step 2: Calculate the weight for each row

def calculate_airport_weight(row):
    # Calculate qualitative weights
    facility_use_weight = facility_use_code_weights.get(row['FACILITY_USE_CODE'], 0.0) * qualitative_importance_grades['FACILITY_USE_CODE']
    ownership_type_weight = ownership_type_code_weights.get(row['OWNERSHIP_TYPE_CODE'], ownership_type_code_weights['Other']) * qualitative_importance_grades['OWNERSHIP_TYPE_CODE']
    region_code_weight = region_code_weights.get(row['REGION_CODE'], region_code_weights['Other']) * qualitative_importance_grades['REGION_CODE']
    site_type_weight = site_type_code_weights.get(row['SITE_TYPE_CODE'], site_type_code_weights['Other']) * qualitative_importance_grades['SITE_TYPE_CODE']
    arpt_status_weight = arpt_status_weights.get(row['ARPT_STATUS'], 0.0) * qualitative_importance_grades['ARPT_STATUS']
    direction_code_weight = direction_code_weights.get(row['DIRECTION_CODE'], direction_code_weights['Other']) * qualitative_importance_grades['DIRECTION_CODE']
    fuel_types_weight = fuel_types_weights.get(row['FUEL_TYPES'], fuel_types_weights['Other']) * qualitative_importance_grades['FUEL_TYPES']

    # Calculate quantitative weights
    elev_weight = (row['ELEV'] / max(1, np.nanmax([row.get('ELEV', 0)]))) * quantitative_weights['ELEV'] if row['ELEV'] > 0 else 0
    single_eng_weight = (row['BASED_SINGLE_ENG'] / max(1, np.nanmax([row.get('BASED_SINGLE_ENG', 0)]))) * quantitative_weights['BASED_SINGLE_ENG'] if row['BASED_SINGLE_ENG'] > 0 else 0
    multi_eng_weight = (row['BASED_MULTI_ENG'] / max(1, np.nanmax([row.get('BASED_MULTI_ENG', 0)]))) * quantitative_weights['BASED_MULTI_ENG'] if row['BASED_MULTI_ENG'] > 0 else 0
    jet_eng_weight = (row['BASED_JET_ENG'] / max(1, np.nanmax([row.get('BASED_JET_ENG', 0)]))) * quantitative_weights['BASED_JET_ENG'] if row['BASED_JET_ENG'] > 0 else 0
    hel_weight = (row['BASED_HEL'] / max(1, np.nanmax([row.get('BASED_HEL', 0)]))) * quantitative_weights['BASED_HEL'] if row['BASED_HEL'] > 0 else 0
    gliders_weight = (row['BASED_GLIDERS'] / max(1, np.nanmax([row.get('BASED_GLIDERS', 0)]))) * quantitative_weights['BASED_GLIDERS'] if row['BASED_GLIDERS'] > 0 else 0
    mil_acft_weight = (row['BASED_MIL_ACFT'] / max(1, np.nanmax([row.get('BASED_MIL_ACFT', 0)]))) * quantitative_weights['BASED_MIL_ACFT'] if row['BASED_MIL_ACFT'] > 0 else 0
    ultralgt_acft_weight = (row['BASED_ULTRALGT_ACFT'] / max(1, np.nanmax([row.get('BASED_ULTRALGT_ACFT', 0)]))) * quantitative_weights['BASED_ULTRALGT_ACFT'] if row['BASED_ULTRALGT_ACFT'] > 0 else 0
    commercial_ops_weight = (row['COMMERCIAL_OPS'] / max(1, np.nanmax([row.get('COMMERCIAL_OPS', 0)]))) * quantitative_weights['COMMERCIAL_OPS'] if row['COMMERCIAL_OPS'] > 0 else 0
    air_taxi_ops_weight = (row['AIR_TAXI_OPS'] / max(1, np.nanmax([row.get('AIR_TAXI_OPS', 0)]))) * quantitative_weights['AIR_TAXI_OPS'] if row['AIR_TAXI_OPS'] > 0 else 0
    local_ops_weight = (row['LOCAL_OPS'] / max(1, np.nanmax([row.get('LOCAL_OPS', 0)]))) * quantitative_weights['LOCAL_OPS'] if row['LOCAL_OPS'] > 0 else 0
    itnrnt_ops_weight = (row['ITNRNT_OPS'] / max(1, np.nanmax([row.get('ITNRNT_OPS', 0)]))) * quantitative_weights['ITNRNT_OPS'] if row['ITNRNT_OPS'] > 0 else 0
    mil_acft_ops_weight = (row['MIL_ACFT_OPS'] / max(1, np.nanmax([row.get('MIL_ACFT_OPS', 0)]))) * quantitative_weights['MIL_ACFT_OPS'] if row['MIL_ACFT_OPS'] > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        facility_use_weight +
        ownership_type_weight +
        region_code_weight +
        site_type_weight +
        arpt_status_weight +
        direction_code_weight +
        fuel_types_weight +
        elev_weight +
        single_eng_weight +
        multi_eng_weight +
        jet_eng_weight +
        hel_weight +
        gliders_weight +
        mil_acft_weight +
        ultralgt_acft_weight +
        commercial_ops_weight +
        air_taxi_ops_weight +
        local_ops_weight +
        itnrnt_ops_weight +
        mil_acft_ops_weight
    )

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_airports_dataset(gdf):
    print("Starting to calculate weights for each airport...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_airport_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf[['Weight', 'geometry']]

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_airports_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Aviation_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Bridges
**Dataset: National Bridge Inventory -** https://hub.arcgis.com/datasets/fedmaps::national-bridge-inventory-3/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
state_code_weights = {
    '48': 0.400,  # Texas
    '39': 0.250,  # Ohio
    '17': 0.200,  # Illinois
    '6': 0.150    # California
}

record_type_weights = {
    1: 1.000
}

route_prefix_weights = {
    4: 0.350,  # State Highway
    3: 0.300,  # Interstate Highway
    5: 0.200,  # U.S. Highway
    1: 0.150   # Local Road
}

service_level_weights = {
    1: 0.500,  # Mainline
    0: 0.300,  # None
    7: 0.200   # Frontage Road
}

direction_weights = {
    0: 0.500,  # Unknown
    1: 0.200,  # One-way
    2: 0.200   # Two-way
}

undclrenc_eval_weights = {
    'N': 0.400,  # Not applicable
    '3': 0.300,  # Fair
    '4': 0.300   # Poor
}

bridge_condition_weights = {
    'F': 0.400,  # Fair
    'G': 0.400,  # Good
    'P': 0.200   # Poor
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'STATE_CODE_001': 0.100,
    'RECORD_TYPE_005A': 0.100,
    'ROUTE_PREFIX_005B': 0.200,
    'SERVICE_LEVEL_005C': 0.150,
    'DIRECTION_005E': 0.100,
    'UNDCLRENCE_EVAL_069': 0.100,
    'BRIDGE_CONDITION': 0.250
}

# Define quantitative attribute weights
quantitative_weights = {
    'MIN_VERT_CLR_010': 0.150,
    'ADT_029': 0.200,
    'YEAR_BUILT_027': 0.150,
    'YEAR_ADT_030': 0.100,
    'PERCENT_ADT_TRUCK_109': 0.100,
    'DESIGN_LOAD_031': 0.150,
    'DECK_COND_058': 0.200,
    'SUPERSTRUCTURE_COND_059': 0.200,
    'SUBSTRUCTURE_COND_060': 0.150,
    'CHANNEL_COND_061': 0.150,
    'CULVERT_COND_062': 0.150,
    'OPERATING_RATING_064': 0.200,
    'INVENTORY_RATING_066': 0.150,
    'STRUCTURAL_EVAL_067': 0.200,
    'DECK_GEOMETRY_EVAL_068': 0.150,
    'POSTING_EVAL_070': 0.100,
    'WATERWAY_EVAL_071': 0.150,
    'APPR_ROAD_EVAL_072': 0.150,
    'DATE_OF_INSPECT_090': 0.200,
    'FRACTURE_LAST_DATE_093A': 0.150,
    'UNDWATER_LAST_DATE_093B': 0.150,
    'SPEC_LAST_DATE_093C': 0.150,
    'LOWEST_RATING': 0.200,
    'DECK_AREA': 0.150
}

# Step 2: Create a function to convert specific columns to numeric

def convert_columns_to_numeric(gdf, columns):
    for column in columns:
        gdf[column] = pd.to_numeric(gdf[column], errors='coerce')
    return gdf

# Step 3: Calculate the weight for each row

def calculate_bridge_weight(row, max_values):
    # Calculate qualitative weights
    state_code_weight = state_code_weights.get(row['STATE_CODE_001'], 0.0) * qualitative_importance_grades['STATE_CODE_001']
    record_type_weight = record_type_weights.get(row['RECORD_TYPE_005A'], 0.0) * qualitative_importance_grades['RECORD_TYPE_005A']
    route_prefix_weight = route_prefix_weights.get(row['ROUTE_PREFIX_005B'], 0.0) * qualitative_importance_grades['ROUTE_PREFIX_005B']
    service_level_weight = service_level_weights.get(row['SERVICE_LEVEL_005C'], 0.0) * qualitative_importance_grades['SERVICE_LEVEL_005C']
    direction_weight = direction_weights.get(row['DIRECTION_005E'], 0.0) * qualitative_importance_grades['DIRECTION_005E']
    undclrenc_eval_weight = undclrenc_eval_weights.get(row['UNDCLRENCE_EVAL_069'], 0.0) * qualitative_importance_grades['UNDCLRENCE_EVAL_069']
    bridge_condition_weight = bridge_condition_weights.get(row['BRIDGE_CONDITION'], 0.0) * qualitative_importance_grades['BRIDGE_CONDITION']

    # Calculate quantitative weights
    quantitative_weight_sum = 0
    for key, weight in quantitative_weights.items():
        max_value = max_values[key] if key in max_values and max_values[key] > 0 else 1
        quantitative_weight_sum += (row.get(key, 0) / max_value) * weight if row.get(key, 0) > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        state_code_weight +
        record_type_weight +
        route_prefix_weight +
        service_level_weight +
        direction_weight +
        undclrenc_eval_weight +
        bridge_condition_weight +
        quantitative_weight_sum
    )

    return round(max(0.0, total_weight), 4)

# Step 4: Apply the weights to the dataset

def apply_weights_to_bridges_dataset(gdf):
    print("Starting to calculate weights for each bridge...")
    start_time = time.time()

    # Convert specific columns to numeric
    numeric_columns = [
        'MIN_VERT_CLR_010', 'ADT_029', 'YEAR_BUILT_027', 'YEAR_ADT_030', 'PERCENT_ADT_TRUCK_109', 'DESIGN_LOAD_031',
        'DECK_COND_058', 'SUPERSTRUCTURE_COND_059', 'SUBSTRUCTURE_COND_060', 'CHANNEL_COND_061', 'CULVERT_COND_062',
        'OPERATING_RATING_064', 'INVENTORY_RATING_066', 'STRUCTURAL_EVAL_067', 'DECK_GEOMETRY_EVAL_068', 'POSTING_EVAL_070',
        'WATERWAY_EVAL_071', 'APPR_ROAD_EVAL_072', 'DATE_OF_INSPECT_090', 'FRACTURE_LAST_DATE_093A',
        'UNDWATER_LAST_DATE_093B', 'SPEC_LAST_DATE_093C', 'LOWEST_RATING', 'DECK_AREA'
    ]
    gdf = convert_columns_to_numeric(gdf, numeric_columns)

    # Get maximum values for each quantitative field
    max_values = {col: gdf[col].max() for col in numeric_columns}

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_bridge_weight, axis=1, max_values=max_values)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Drop unnecessary columns, keeping only 'Weight' and 'geometry'
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 5: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_bridges_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/bridge/Bridges_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Ports
**Dataset: Principal Ports -** https://data-usdot.opendata.arcgis.com/datasets/usdot::principal-ports/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
type_weights = {
    'C': 0.600,
    'I': 0.300,
    'L': 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'TYPE': 0.500
}

# Define quantitative attribute weights
quantitative_weights = {
    'RANK': 0.100,
    'TOTAL': 0.200,
    'DOMESTIC': 0.150,
    'FOREIGN_': 0.200,
    'IMPORTS': 0.175,
    'EXPORTS': 0.175
}

# Step 2: Calculate the weight for each row

def calculate_port_weight(row, max_rank, max_total, max_domestic, max_foreign, max_imports, max_exports):
    # Ensure all fields are present and non-null
    type_weight = type_weights.get(row['TYPE'], 0.0) * qualitative_importance_grades['TYPE']
    rank_weight = (row['RANK'] / max(1, max_rank)) * quantitative_weights['RANK'] if not pd.isna(row['RANK']) else 0
    total_weight = (row['TOTAL'] / max(1, max_total)) * quantitative_weights['TOTAL'] if not pd.isna(row['TOTAL']) else 0
    domestic_weight = (row['DOMESTIC'] / max(1, max_domestic)) * quantitative_weights['DOMESTIC'] if not pd.isna(row['DOMESTIC']) else 0
    foreign_weight = (row['FOREIGN_'] / max(1, max_foreign)) * quantitative_weights['FOREIGN_'] if not pd.isna(row['FOREIGN_']) else 0
    imports_weight = (row['IMPORTS'] / max(1, max_imports)) * quantitative_weights['IMPORTS'] if not pd.isna(row['IMPORTS']) else 0
    exports_weight = (row['EXPORTS'] / max(1, max_exports)) * quantitative_weights['EXPORTS'] if not pd.isna(row['EXPORTS']) else 0

    # Combine qualitative and quantitative weights
    total_weight = type_weight + rank_weight + total_weight + domestic_weight + foreign_weight + imports_weight + exports_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_ports_dataset(gdf):
    print("Starting to calculate weights for each port...")
    start_time = time.time()

    # Get the maximum values for scaling
    max_rank = np.nanmax(gdf['RANK'])
    max_total = np.nanmax(gdf['TOTAL'])
    max_domestic = np.nanmax(gdf['DOMESTIC'])
    max_foreign = np.nanmax(gdf['FOREIGN_'])
    max_imports = np.nanmax(gdf['IMPORTS'])
    max_exports = np.nanmax(gdf['EXPORTS'])

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_port_weight, axis=1, args=(max_rank, max_total, max_domestic, max_foreign, max_imports, max_exports))

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_ports_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Principal_Ports_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Railroads
**Dataset: North American Rail Lines (NTAD) -** https://hub.arcgis.com/documents/DCP::north-american-rail-lines-ntad/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
fradistrct_weights = {
    99: 0.300,
    3: 0.200,
    2: 0.150,
    'Other': 0.350
}

rrowner1_weights = {
    'UP': 0.350,
    'CN': 0.350,
    'BNSF': 0.300
}

passngr_weights = {
    'A': 0.400,
    'C': 0.250,
    'V': 0.200,
    'Other': 0.150
}

net_weights = {
    'M': 0.350,
    'O': 0.300,
    'Y': 0.250,
    'Other': 0.100
}

timezone_weights = {
    'C': 0.350,
    'E': 0.300,
    'P': 0.200,
    'Other': 0.150
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'FRADISTRCT': 0.200,
    'RROWNER1': 0.300,
    'PASSNGR': 0.200,
    'NET': 0.150,
    'TIMEZONE': 0.150
}

# Define quantitative attribute weights
quantitative_weights = {
    'TRACKS': 0.400,
    'MILES': 0.350,
    'KM': 0.250
}

# Step 2: Calculate the weight for each row

def calculate_rail_weight(row, max_tracks, max_miles, max_km):
    # Ensure all fields are present and non-null
    fradistrct = row.get('FRADISTRCT', 'Other')
    rrowner1 = row.get('RROWNER1', 'Other')
    passngr = row.get('PASSNGR', 'Other')
    net = row.get('NET', 'Other')
    timezone = row.get('TIMEZONE', 'Other')
    tracks = row.get('TRACKS', 0) if pd.notna(row.get('TRACKS')) else 0
    miles = row.get('MILES', 0) if pd.notna(row.get('MILES')) else 0
    km = row.get('KM', 0) if pd.notna(row.get('KM')) else 0

    # Calculate qualitative weights
    fradistrct_weight = fradistrct_weights.get(fradistrct, fradistrct_weights['Other']) * qualitative_importance_grades['FRADISTRCT']
    rrowner1_weight = rrowner1_weights.get(rrowner1, 0.0) * qualitative_importance_grades['RROWNER1']
    passngr_weight = passngr_weights.get(passngr, passngr_weights['Other']) * qualitative_importance_grades['PASSNGR']
    net_weight = net_weights.get(net, net_weights['Other']) * qualitative_importance_grades['NET']
    timezone_weight = timezone_weights.get(timezone, timezone_weights['Other']) * qualitative_importance_grades['TIMEZONE']

    # Calculate quantitative weights
    tracks_weight = (tracks / max_tracks) * quantitative_weights['TRACKS'] if tracks > 0 else 0
    miles_weight = (miles / max_miles) * quantitative_weights['MILES'] if miles > 0 else 0
    km_weight = (km / max_km) * quantitative_weights['KM'] if km > 0 else 0

    # Combine qualitative and quantitative weights
    total_weight = fradistrct_weight + rrowner1_weight + passngr_weight + net_weight + timezone_weight + tracks_weight + miles_weight + km_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_railroads_dataset(gdf):
    print("Starting to calculate weights for each railroad line...")
    start_time = time.time()

    # Get the maximum values for scaling
    max_tracks = np.nanmax(gdf['TRACKS'])
    max_miles = np.nanmax(gdf['MILES'])
    max_km = np.nanmax(gdf['KM'])

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_rail_weight, axis=1, args=(max_tracks, max_miles, max_km))

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_railroads_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Rail_Lines_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Spaceports

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights for the "License" property
license_weights = {
    'FAA': 0.600,
    'FAA and Federal': 0.150,
    'Federal': 0.150,
    'Private Exclusive Use': 0.100
}

# Define qualitative attribute weights for the "Launch_Type" property
launch_type_weights = {
    'Horizontal': 0.350,
    'Vertical': 0.250,
    'Vertical and Horizontal': 0.150,
    'Not Specified': 0.100,
    'Orbital Reentry': 0.050,
    'Horizontal and Orbital Reentry': 0.050,
    'Horizontal and Vertical': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'License': 0.500,
    'Launch_Type': 0.500
}

# Step 2: Calculate the weight for each row
def calculate_spaceport_weight(row):
    # Ensure all fields are present and non-null
    license = row.get('License', 'Not Specified') if pd.notna(row.get('License', 'Not Specified')) else 'Not Specified'
    launch_type = row.get('Launch_Type', 'Not Specified') if pd.notna(row.get('Launch_Type', 'Not Specified')) else 'Not Specified'

    # Calculate qualitative weights
    license_weight = license_weights.get(license, 0.0) * qualitative_importance_grades['License']
    launch_type_weight = launch_type_weights.get(launch_type, 0.0) * qualitative_importance_grades['Launch_Type']

    # Combine qualitative weights
    total_weight = license_weight + launch_type_weight

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_spaceports_dataset(gdf):
    print("Starting to calculate weights for each spaceport...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_spaceport_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_spaceports_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Spaceports_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Roads
Transportation - https://hub.arcgis.com/maps/fedmaps::transportation-1/about

#### Primary Roads
**Dataset: Primary_Roads -** https://hub.arcgis.com/datasets/fedmaps::transportation-1/about?layer=2

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
pretypeabrv_weights = {
    'I-': 0.400,
    'US Hwy': 0.300,
    'State Rte': 0.150,
    'State Hwy': 0.100,
    'Others': 0.050
}

rttyp_weights = {
    'I': 0.400,
    'M': 0.300,
    'U': 0.200,
    'S': 0.100,
    'Others': 0.050
}

predirabrv_weights = {
    'W': 0.400,
    'S': 0.300,
    'N': 0.200,
    'E': 0.100,
    'Others': 0.050
}

prequal_weights = {
    '20.0': 1.000
}

prequalabrv_weights = {
    'Old': 1.000
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'PRETYPEABRV': 0.250,
    'RTTYP': 0.250,
    'PREDIRABRV': 0.250,
    'PREQUAL': 0.125,
    'PREQUALABRV': 0.125
}

# Define quantitative attribute weights
quantitative_weights = {
    'SUFDIR': 0.333,
    'PRETYP': 0.333,
    'PREDIR': 0.333
}

# Step 2: Calculate the weight for each row

def calculate_road_weight(row, max_sufdir, max_pretyp, max_predir):
    # Ensure no missing fields lead to NULL values
    pretypeabrv_weight = pretypeabrv_weights.get(row.get('PRETYPEABRV', 'Others'), pretypeabrv_weights['Others']) * qualitative_importance_grades['PRETYPEABRV']
    rttyp_weight = rttyp_weights.get(row.get('RTTYP', 'Others'), rttyp_weights['Others']) * qualitative_importance_grades['RTTYP']
    predirabrv_weight = predirabrv_weights.get(row.get('PREDIRABRV', 'Others'), predirabrv_weights['Others']) * qualitative_importance_grades['PREDIRABRV']
    prequal_weight = prequal_weights.get(row.get('PREQUAL', 0), 0.0) * qualitative_importance_grades['PREQUAL']
    prequalabrv_weight = prequalabrv_weights.get(row.get('PREQUALABRV', 0), 0.0) * qualitative_importance_grades['PREQUALABRV']

    # Handle quantitative attributes
    sufdirt_weight = (row['SUFDIR'] / max_sufdir) * quantitative_weights['SUFDIR'] if row['SUFDIR'] else 0
    pretyp_weight = (row['PRETYP'] / max_pretyp) * quantitative_weights['PRETYP'] if row['PRETYP'] else 0
    predir_weight = (row['PREDIR'] / max_predir) * quantitative_weights['PREDIR'] if row['PREDIR'] else 0

    # Combine all weights
    total_weight = (
        pretypeabrv_weight +
        rttyp_weight +
        predirabrv_weight +
        prequal_weight +
        prequalabrv_weight +
        sufdirt_weight +
        pretyp_weight +
        predir_weight
    )

    # Ensure weight is a positive float, rounded to 4 digits
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_roads_dataset(gdf):
    print("Starting to calculate weights for each road...")
    start_time = time.time()

    # Get max values for scaling quantitative attributes
    max_sufdir = gdf['SUFDIR'].max() if not gdf['SUFDIR'].isna().all() else 1
    max_pretyp = gdf['PRETYP'].max() if not gdf['PRETYP'].isna().all() else 1
    max_predir = gdf['PREDIR'].max() if not gdf['PREDIR'].isna().all() else 1

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_road_weight, axis=1, args=(max_sufdir, max_pretyp, max_predir))

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_roads_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Primary_Roads_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


#### Secondary Roads
**Dataset: Secondary_Roads_72_1k_scale -** https://hub.arcgis.com/datasets/fedmaps::transportation-1/about?layer=6

In [None]:
def calculate_road_importance(roads):
    importance_scores = []
    max_shape_length = max(road['Shape Length'] for road in roads)

    # Weights for different Route Type Codes, with 'S' representing secondary roads
    route_type_weights = {'S': 0.4, 'M': 0.3, 'U': 0.2, 'C': 0.1, 'O': 0.05, 'I': 0.05}

    for road in roads:
        # Get the weight for the route type, default to lowest if not found
        route_type_score = route_type_weights.get(road['Route Type Code'], 0.05)

        # Normalize the shape length to a 0-1 scale and adjust its weight
        normalized_length_score = (road['Shape Length'] / max_shape_length) * 0.6

        # Calculate total importance score
        total_score = route_type_score + normalized_length_score

        # Ensure total score does not exceed 1.0 and round to three decimal places
        importance_score = round(min(1.0, total_score), 3)
        importance_scores.append(importance_score)

    return importance_scores

# Example usage
roads = [
    {'Route Type Code': 'S', 'Shape Length': 458.81718699964},
    {'Route Type Code': 'M', 'Shape Length': 8888.27937627825},
    # Add more entries as needed
]

importance_scores = calculate_road_importance(roads)
print(importance_scores)


## Waste

### Solid Waste Landfill Facilities
**Dataset: Solid Waste Landfill Facilities -** https://hub.arcgis.com/datasets/155761d340764921ab7fb2e88257bd97_0/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
facility_type_weights = {
    'Solid Waste': 1.000,
}

facility_status_weights = {
    'Inactive': 0.200,
    'Active': 0.300,
    'Pre-Authorized': 0.200,
    'Closed, No Gw Monitoring': 0.150,
    'Nfa, No Further Action': 0.100,
    'Other Statuses': 0.050,
}

ownership_weights = {
    'Private': 0.400,
    'County': 0.300,
    'Municipal': 0.200,
    'Other Ownership Types': 0.100,
}

district_weights = {
    'SED (Southeast District)': 0.250,
    'NED (Northeast District)': 0.250,
    'NWD (Northwest District)': 0.200,
    'CD (Central District)': 0.200,
    'Other Districts': 0.100,
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'FACILITY_TYPE': 0.400,
    'FACILITY_STATUS': 0.300,
    'OWNERSHIP': 0.200,
    'DISTRICT': 0.100,
}

# Step 2: Calculate the weight for each row

def calculate_facility_weight(row):
    # Ensure no field is missing and replace with zero if necessary
    facility_type_weight = facility_type_weights.get(row.get('FACILITY_TYPE', ''), 0.0) * qualitative_importance_grades['FACILITY_TYPE']
    facility_status_weight = facility_status_weights.get(row.get('FACILITY_STATUS', ''), 0.0) * qualitative_importance_grades['FACILITY_STATUS']
    ownership_weight = ownership_weights.get(row.get('OWNERSHIP', ''), 0.0) * qualitative_importance_grades['OWNERSHIP']
    district_weight = district_weights.get(row.get('DISTRICT', ''), 0.0) * qualitative_importance_grades['DISTRICT']

    # Combine qualitative weights
    total_weight = (
        facility_type_weight +
        facility_status_weight +
        ownership_weight +
        district_weight
    )

    # Ensure weight is positive and rounded to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_facilities_dataset(gdf):
    print("Starting to calculate weights for each facility...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_facility_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Drop unnecessary columns, keeping only 'Weight' and 'geometry'
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_facilities_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Solid_Waste_Facilities_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


## Water

### Aquifers
**Dataset: Aquifers -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::aquifers/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
rock_name_weights = {
    "Other rocks": 0.200,
    "Unconsolidated sand and gravel aquifers": 0.250,
    "Carbonate-rock aquifers": 0.200,
    "Semiconsolidated sand aquifers": 0.150,
    "Igneous and metamorphic-rock aquifers": 0.100,
    "Sandstone aquifers": 0.050,
    "Sandstone and carbonate-rock aquifers": 0.050
}

rock_type_weights = {
    999: 0.200,
    100: 0.250,
    400: 0.200,
    200: 0.150,
    600: 0.100,
    300: 0.050,
    500: 0.050
}

aq_code_weights = {
    999: 0.400,
    201: 0.300,
    609: 0.200,
    104: 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'ROCK_NAME': 0.350,
    'ROCK_TYPE': 0.350,
    'AQ_CODE': 0.300
}

# Step 2: Calculate the weight for each row

def calculate_aquifer_weight(row):
    # Ensure all fields are present and valid
    rock_name = row.get('ROCK_NAME', "Other rocks") if pd.notna(row.get('ROCK_NAME')) else "Other rocks"
    rock_type = row.get('ROCK_TYPE', 999) if pd.notna(row.get('ROCK_TYPE')) else 999
    aq_code = row.get('AQ_CODE', 999) if pd.notna(row.get('AQ_CODE')) else 999

    # Calculate qualitative weights
    rock_name_weight = rock_name_weights.get(rock_name, 0.0) * qualitative_importance_grades['ROCK_NAME']
    rock_type_weight = rock_type_weights.get(rock_type, 0.0) * qualitative_importance_grades['ROCK_TYPE']
    aq_code_weight = aq_code_weights.get(aq_code, 0.0) * qualitative_importance_grades['AQ_CODE']

    # Combine qualitative weights
    total_weight = rock_name_weight + rock_type_weight + aq_code_weight

    # Ensure weight is positive and round to 4 decimal places
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_aquifers_dataset(gdf):
    print("Starting to calculate weights for each aquifer...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_aquifer_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    # Keep only 'Weight' and 'geometry' columns
    gdf = gdf[['Weight', 'geometry']]

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_aquifers_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Aquifers_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### Dams
**Dataset: National Inventory of Dams (NID) -** https://hifld-geoplatform.hub.arcgis.com/apps/geoplatform::national-inventory-of-dams-nid/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm

# Step 1: Define the weight schemas

# Define qualitative attribute weights
owner_type_weights = {
    'Private': 0.300,
    'Local Government': 0.200,
    'State': 0.150,
    'Federal': 0.150,
    'Other': 0.200
}

purpose_weights = {
    'Recreation': 0.300,
    'Flood Risk Reduction': 0.250,
    'Fire Protection, Stock, Or Small Fish Pond': 0.150,
    'Other': 0.150,
    'Other Types': 0.150
}

spillway_type_weights = {
    'Uncontrolled': 0.500,
    'Controlled': 0.300,
    'None': 0.200
}

hazard_potential_weights = {
    'Low': 0.400,
    'High': 0.300,
    'Significant': 0.200,
    'Not Available / Undetermined': 0.100
}

dam_type_weights = {
    'Earth': 0.500,
    'Gravity': 0.200,
    'Concrete': 0.200,
    'Other': 0.100
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'PRIMARY_OWNER_TYPE': 0.200,
    'PRIMARY_PURPOSE': 0.250,
    'SPILLWAY_TYPE': 0.150,
    'HAZARD_POTENTIAL': 0.300,
    'PRIMARY_DAM_TYPE': 0.100
}

# Define quantitative attribute weights
quantitative_weights = {
    'DAM_HEIGHT': 0.200,
    'NID_STORAGE': 0.150,
    'MAX_STORAGE': 0.150,
    'NUMBER_ASSOCIATED_STRUCTURES': 0.100,
    'DISTANCE': 0.100,
    'HYDRAULIC_HEIGHT': 0.100,
    'STRUCTURAL_HEIGHT': 0.100,
    'DAM_LENGTH': 0.150,
    'DAM_VOLUME': 0.200,
    'NORMAL_STORAGE': 0.150,
    'SURFACE_AREA': 0.150,
    'DRAINAGE_AREA': 0.150,
    'MAX_DISCHARGE': 0.200,
    'SPILLWAY_WIDTH': 0.100,
    'NUMBER_OF_LOCKS': 0.100,
    'LENGTH_OF_LOCKS': 0.100,
    'WIDTH_OF_LOCKS': 0.100
}

# Step 2: Calculate the weight for each row, ignoring non-existent or null values

def calculate_dam_weight(row, max_values):
    # Calculate qualitative weights, ignoring missing or null values
    owner_type_weight = owner_type_weights.get(row.get('PRIMARY_OWNER_TYPE'), 0.0) * qualitative_importance_grades['PRIMARY_OWNER_TYPE']
    purpose_weight = purpose_weights.get(row.get('PRIMARY_PURPOSE', 'Other'), purpose_weights['Other']) * qualitative_importance_grades['PRIMARY_PURPOSE']
    spillway_type_weight = spillway_type_weights.get(row.get('SPILLWAY_TYPE', 'None'), 0.0) * qualitative_importance_grades['SPILLWAY_TYPE']
    hazard_potential_weight = hazard_potential_weights.get(row.get('HAZARD_POTENTIAL', 'Not Available / Undetermined'), 0.0) * qualitative_importance_grades['HAZARD_POTENTIAL']
    dam_type_weight = dam_type_weights.get(row.get('PRIMARY_DAM_TYPE', 'Other'), 0.0) * qualitative_importance_grades['PRIMARY_DAM_TYPE']

    # Calculate quantitative weights, ignoring missing values
    dam_height_weight = (row['DAM_HEIGHT'] / max(1, max_values['DAM_HEIGHT'])) * quantitative_weights['DAM_HEIGHT'] if not pd.isna(row['DAM_HEIGHT']) else 0
    nid_storage_weight = (row['NID_STORAGE'] / max(1, max_values['NID_STORAGE'])) * quantitative_weights['NID_STORAGE'] if not pd.isna(row['NID_STORAGE']) else 0
    max_storage_weight = (row['MAX_STORAGE'] / max(1, max_values['MAX_STORAGE'])) * quantitative_weights['MAX_STORAGE'] if not pd.isna(row['MAX_STORAGE']) else 0
    num_structures_weight = (row['NUMBER_ASSOCIATED_STRUCTURES'] / max(1, max_values['NUMBER_ASSOCIATED_STRUCTURES'])) * quantitative_weights['NUMBER_ASSOCIATED_STRUCTURES'] if not pd.isna(row['NUMBER_ASSOCIATED_STRUCTURES']) else 0
    distance_weight = (row['DISTANCE'] / max(1, max_values['DISTANCE'])) * quantitative_weights['DISTANCE'] if not pd.isna(row['DISTANCE']) else 0
    hydraulic_height_weight = (row['HYDRAULIC_HEIGHT'] / max(1, max_values['HYDRAULIC_HEIGHT'])) * quantitative_weights['HYDRAULIC_HEIGHT'] if not pd.isna(row['HYDRAULIC_HEIGHT']) else 0
    structural_height_weight = (row['STRUCTURAL_HEIGHT'] / max(1, max_values['STRUCTURAL_HEIGHT'])) * quantitative_weights['STRUCTURAL_HEIGHT'] if not pd.isna(row['STRUCTURAL_HEIGHT']) else 0
    dam_length_weight = (row['DAM_LENGTH'] / max(1, max_values['DAM_LENGTH'])) * quantitative_weights['DAM_LENGTH'] if not pd.isna(row['DAM_LENGTH']) else 0
    normal_storage_weight = (row['NORMAL_STORAGE'] / max(1, max_values['NORMAL_STORAGE'])) * quantitative_weights['NORMAL_STORAGE'] if not pd.isna(row['NORMAL_STORAGE']) else 0
    surface_area_weight = (row['SURFACE_AREA'] / max(1, max_values['SURFACE_AREA'])) * quantitative_weights['SURFACE_AREA'] if not pd.isna(row['SURFACE_AREA']) else 0
    drainage_area_weight = (row['DRAINAGE_AREA'] / max(1, max_values['DRAINAGE_AREA'])) * quantitative_weights['DRAINAGE_AREA'] if not pd.isna(row['DRAINAGE_AREA']) else 0
    max_discharge_weight = (row['MAX_DISCHARGE'] / max(1, max_values['MAX_DISCHARGE'])) * quantitative_weights['MAX_DISCHARGE'] if not pd.isna(row['MAX_DISCHARGE']) else 0
    spillway_width_weight = (row['SPILLWAY_WIDTH'] / max(1, max_values['SPILLWAY_WIDTH'])) * quantitative_weights['SPILLWAY_WIDTH'] if not pd.isna(row['SPILLWAY_WIDTH']) else 0

    # Combine qualitative and quantitative weights
    total_weight = (
        owner_type_weight +
        purpose_weight +
        spillway_type_weight +
        hazard_potential_weight +
        dam_type_weight +
        dam_height_weight +
        nid_storage_weight +
        max_storage_weight +
        num_structures_weight +
        distance_weight +
        hydraulic_height_weight +
        structural_height_weight +
        dam_length_weight +
        normal_storage_weight +
        surface_area_weight +
        drainage_area_weight +
        max_discharge_weight +
        spillway_width_weight
    )

    # Ensure weight is positive, rounded to 4 decimal places, and no negative values
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset

def apply_weights_to_dams_dataset(gdf):
    print("Starting to calculate weights for each dam...")
    start_time = time.time()

    # Precompute the max values for all the relevant fields to avoid repeated computation
    max_values = {
        'DAM_HEIGHT': gdf['DAM_HEIGHT'].max(skipna=True),
        'NID_STORAGE': gdf['NID_STORAGE'].max(skipna=True),
        'MAX_STORAGE': gdf['MAX_STORAGE'].max(skipna=True),
        'NUMBER_ASSOCIATED_STRUCTURES': gdf['NUMBER_ASSOCIATED_STRUCTURES'].max(skipna=True),
        'DISTANCE': gdf['DISTANCE'].max(skipna=True),
        'HYDRAULIC_HEIGHT': gdf['HYDRAULIC_HEIGHT'].max(skipna=True),
        'STRUCTURAL_HEIGHT': gdf['STRUCTURAL_HEIGHT'].max(skipna=True),
        'DAM_LENGTH': gdf['DAM_LENGTH'].max(skipna=True),
        'NORMAL_STORAGE': gdf['NORMAL_STORAGE'].max(skipna=True),
        'SURFACE_AREA': gdf['SURFACE_AREA'].max(skipna=True),
        'DRAINAGE_AREA': gdf['DRAINAGE_AREA'].max(skipna=True),
        'MAX_DISCHARGE': gdf['MAX_DISCHARGE'].max(skipna=True),
        'SPILLWAY_WIDTH': gdf['SPILLWAY_WIDTH'].max(skipna=True)
    }

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_dam_weight, axis=1, max_values=max_values)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf

# Step 4: Process and save the dataset with weights

def process_and_save_weighted_geojson(input_path, output_path):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Size in MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_dams_dataset(gdf)

    # Save the weighted dataset with only 'Weight' and 'geometry' fields
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted[['Weight', 'geometry']].to_file(output_path, driver='GeoJSON')

    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Size in MB
    print(f"Final file size: {final_size:.2f} MB")

    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/Dams_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted/Dams_Weighted.geojson"

# Ensure output directory exists
os.makedirs(os.path.dirname(output_dir), exist_ok=True)

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)


### USACE Owned & Operated Reservoirs
**Dataset: US Army Corps of Engineers (USACE) Owned and Operated Reservoirs -** https://hifld-geoplatform.hub.arcgis.com/datasets/geoplatform::us-army-corps-of-engineers-usace-owned-and-operated-reservoirs/about

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import time
import os

# Step 1: Define the weight schemas

# Define qualitative attribute weights
district_weights = {
    'Huntington District': 0.200,
    'Mobile District': 0.200,
    'Tulsa District': 0.180,
    'New England District': 0.160,
    'Omaha District': 0.160
}

dist_sym_weights = {
    'LRH': 0.200,
    'SAM': 0.200,
    'SWT': 0.180,
    'NAE': 0.160,
    'NWO': 0.160
}

division_weights = {
    'Southwestern Division': 0.250,
    'Great Lakes and Ohio River Division': 0.240,
    'Northwestern Division': 0.200,
    'North Atlantic Division': 0.160,
    'South Atlantic Division': 0.150,
    'South Pacific Division': 0.120,
    'Mississippi Valley Division': 0.080
}

dry_weights = {
    'No': 0.800,
    'Yes': 0.150,
    'NO': 0.050
}

# Define overall importance grades for each qualitative property
qualitative_importance_grades = {
    'DISTRICT': 0.250,
    'DIST_SYM': 0.250,
    'DIVISION': 0.300,
    'DRY': 0.200
}

# Step 2: Calculate the weight for each row
def calculate_reservoir_weight(row):
    # Calculate qualitative weights
    district_weight = district_weights.get(row.get('DISTRICT', ''), 0.0) * qualitative_importance_grades['DISTRICT']
    dist_sym_weight = dist_sym_weights.get(row.get('DIST_SYM', ''), 0.0) * qualitative_importance_grades['DIST_SYM']
    division_weight = division_weights.get(row.get('DIVISION', ''), 0.0) * qualitative_importance_grades['DIVISION']
    dry_weight = dry_weights.get(row.get('DRY', ''), 0.0) * qualitative_importance_grades['DRY']

    # Combine qualitative and quantitative weights
    total_weight = district_weight + dist_sym_weight + division_weight + dry_weight

    # Ensure the weight is positive, and round to 4 digits
    return round(max(0.0, total_weight), 4)

# Step 3: Apply the weights to the dataset
def apply_weights_to_reservoirs_dataset(gdf):
    print("Starting to calculate weights for each reservoir...")
    start_time = time.time()

    # Apply the weight calculation function to each row
    gdf['Weight'] = gdf.apply(calculate_reservoir_weight, axis=1)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Weights calculated. Time elapsed: {elapsed_time:.2f} seconds.")

    return gdf[['Weight', 'geometry']]  # Keep only Weight and geometry columns

# Step 4: Process and save the dataset with weights
def process_and_save_weighted_geojson(input_path, output_dir):
    # Load the dataset
    print(f"Loading dataset from {input_path}...")
    gdf = gpd.read_file(input_path)
    print("Dataset loaded successfully.")

    # Measure initial file size
    initial_size = os.path.getsize(input_path) / (1024 * 1024)  # Convert to MB
    print(f"Initial file size: {initial_size:.2f} MB")

    # Apply weights
    gdf_weighted = apply_weights_to_reservoirs_dataset(gdf)

    # Construct output path
    output_file_name = os.path.basename(input_path).replace(".geojson", "_weighted_cleaned.geojson")
    output_path = os.path.join(output_dir, output_file_name)

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save the weighted dataset
    print(f"Saving weighted dataset to {output_path}...")
    gdf_weighted.to_file(output_path, driver='GeoJSON')

    # Measure final file size
    final_size = os.path.getsize(output_path) / (1024 * 1024)  # Convert to MB
    print(f"Final file size: {final_size:.2f} MB")

    # Calculate file size reduction
    reduction = initial_size - final_size
    reduction_percentage = (reduction / initial_size) * 100 if initial_size > 0 else 0
    print(f"File size reduced by: {reduction:.2f} MB ({reduction_percentage:.2f}% reduction).")

# Define paths
input_path = "/geoJSON/cleaned/USACE_Reservoirs_Cleaned.geojson"
output_dir = "/geoJSON/cleaned_weighted"

# Process and save the dataset
process_and_save_weighted_geojson(input_path, output_dir)
