# Import Required Packages

In [3]:
import numpy as np
import geopandas as gpd
import sys
import socket 
if 'cluster' in socket.gethostname():
    sys.path.append('/resnick/groups/enceladus/jyzhao/Scalable_GPs_jz/create_building_inventory/BrailsPlusPlus')
else:
    sys.path.append('/Users/jinyanzhao/Desktop/Caltech/Permian_Basin/CyberShake/RemoteMountPoint/BrailsPlusPlus')
from brails.utils import Importer

# Define Location Specific Parameters

In [4]:
boundary_polygon = gpd.read_file('./small_risk_region.geojson')
boundary_polygon = boundary_polygon.geometry[0].exterior.coords
boundary_polygon = tuple([float(x[0]) for x in np.array(boundary_polygon).reshape(-1, 1)])
INVENTORY_OUTPUT = 'cybershake_buildings.geojson'
NO_POSSIBLE_WORLDS = 1

# Create and Importer object to Pull In Required BRAILS Modules

In [5]:
importer = Importer()

# Define the Region Object for the Region of Interest

In [6]:
region_data = {"type": "locationPolygon", "data": boundary_polygon}
region_boundary_class = importer.get_class("RegionBoundary")
region_boundary_object = region_boundary_class(region_data)

# Get Raw NSI Data for the Defined Region

In [7]:
nsi_class = importer.get_class('NSI_Parser')
nsi = nsi_class()
# nsi_inventory = nsi.get_raw_data(region_boundary_object)
nsi_inventory = nsi.get_raw_data_polygon(region_boundary_object)


INFO:root:
Getting National Structure Inventory (NSI) building data for the entered location...



Found a total of 3539 building points in NSI that are within the entered region of interest


In [8]:
_ = nsi_inventory.write_to_geojson(
    output_file='./NSI_inventory.geojson')

GEOJSON {'type': 'FeatureCollection', 'generated': '2025-07-06 23:37:45.196484', 'brails_version': 'NA', 'crs': {'type': 'name', 'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}}, 'features': [{'type': 'Feature', 'properties': {'fd_id': 591606194, 'bid': '8553QPX3+QR4-2-3-3-3', 'occtype': 'RES3B', 'st_damcat': 'RES', 'bldgtype': 'C', 'found_type': 'S', 'cbfips': '060375436031017', 'pop2amu65': 2, 'pop2amo65': 1, 'pop2pmu65': 2, 'pop2pmo65': 1, 'sqft': 2631.25, 'num_story': 1, 'ftprntid': '06037_2647467', 'ftprntsrc': 'NGA', 'students': 0, 'found_ht': 0.5, 'val_struct': 517694.167, 'val_cont': 258847.083, 'val_vehic': 81000, 'source': 'X', 'med_yr_blt': 1973, 'firmzone': None, 'o65disable': 0.25, 'u65disable': 0.03, 'x': -118.2954197, 'y': 33.799385634, 'ground_elv': 60.67241151977539, 'ground_elv_m': 18.492950439453125, 'type': 'building'}, 'geometry': {'type': 'Point', 'coordinates': [-118.29542, 33.799386]}}, {'type': 'Feature', 'properties': {'fd_id': 591025551, 'bid': '8553Q

# Get FEMA USA Footprints Data for the Defined Region

In [None]:
scraper_class = importer.get_class('USA_FootprintScraper')
scraper = scraper_class({'length': 'ft'})
scraper_inventory = scraper.get_footprints(region_boundary_object)

# Create a Baseline Inventory by Merging NSI Raw Data and USA Structures Footprints

In [7]:
nsi_inventory = nsi.get_filtered_data_given_inventory(
    scraper_inventory, "ft", get_extended_features=True)


Getting National Structure Inventory (NSI) building data for the entered location...
Found a total of 147204 building points in NSI that match the footprint data.


# Fill Gaps in the Baseline Inventory by Imputing Missing Values

In [8]:
knn_imputer_class = importer.get_class("KnnImputer")

imputer = knn_imputer_class(
    nsi_inventory, n_possible_worlds=NO_POSSIBLE_WORLDS,
    exclude_features=['lat', 'lon', 'fd_id'])
imputed_inventory = imputer.impute()

  bldg_properties_df = bldg_properties_df.replace("NA", np.nan, inplace=False)


Missing percentages among 159898 assets
buildingheight: 12.55%
erabuilt: 7.90%
numstories: 7.77%
roofshape: 99.88%
fparea: 7.94%
repaircost: 7.94%
constype: 7.94%
occupancy: 7.94%
found_ht: 7.94%
splitlevel: 7.94%
basement: 27.92%
Primitive imputation done.
Running the main imputation. This may take a while.
Enumerating clusters: 20 among 320
Enumerating clusters: 40 among 320
Enumerating clusters: 60 among 320
Enumerating clusters: 80 among 320
Enumerating clusters: 100 among 320
Enumerating clusters: 120 among 320
Enumerating clusters: 140 among 320
Enumerating clusters: 160 among 320
Enumerating clusters: 180 among 320
Enumerating clusters: 200 among 320
Enumerating clusters: 220 among 320
Enumerating clusters: 240 among 320
Enumerating clusters: 260 among 320
Enumerating clusters: 280 among 320
Enumerating clusters: 300 among 320
Enumerating clusters: 320 among 320
Done imputation. It took 0.17 mins


# Generate Synthetic Income Data for Input into HAZUS Rulesets
This is necessary because income data is currently a required input for the HAZUS rulesets in BRAILS++ 

In [9]:
CA_AVG = 78672  # state average
CA_STD_DEV = CA_AVG*0.5  # 50% cov

# Step 1: Calculate the parameters of the underlying normal distribution
mu = np.log(CA_AVG**2 /
            np.sqrt(CA_STD_DEV**2 + CA_AVG**2))
sigma = np.sqrt(np.log(1 + (CA_STD_DEV**2 / CA_AVG**2)))

# Step 2: Generate the lognormal sample using the parameters of the normal
# distribution
for key, val in imputed_inventory.inventory.items():
    lognormal_sample = np.random.lognormal(
        mean=mu, sigma=sigma, size=NO_POSSIBLE_WORLDS)
    val.add_features({"Income": lognormal_sample[0]})

# Change Keys to Make Them Compatible with R2D 

In [10]:
# The names of NEW keys to be inferred.
STRUCTURE_TYPE_KEY = 'StructureTypeHazus'  # instead of  "constype" from NSI
REPLACEMENT_COST_KEY = 'ReplacementCostHazus'  # instead of NSI "repaircost"

# The names of existing keys to be used as "predictors"
YEAR_BUILT_KEY = 'erabuilt'
OCCUPANCY_CLASS_KEY = 'occupancy'
INCOME_KEY = 'Income'
NUMBER_OF_STORIES_KEY = 'numstories'
PLAN_AREA_KEY = 'fpAreas'
SPLIT_LEVEL_KEY = 'splitlevel'

# Run HAZUS Rulesets to Infer R2D-Required Data

In [11]:
infer_features_for_hazuseq = importer.get_class("HazusInfererEarthquake")
inferer = infer_features_for_hazuseq(input_inventory=imputed_inventory,
                                     n_possible_worlds=NO_POSSIBLE_WORLDS,
                                     yearBuilt_key=YEAR_BUILT_KEY,
                                     occupancyClass_key=OCCUPANCY_CLASS_KEY,
                                     numberOfStories_key=NUMBER_OF_STORIES_KEY,
                                     income_key=INCOME_KEY,
                                     splitLevel_key=SPLIT_LEVEL_KEY,
                                     structureType_key=STRUCTURE_TYPE_KEY,
                                     replacementCost_key=REPLACEMENT_COST_KEY,
                                     planArea_key=PLAN_AREA_KEY,
                                     clean_features=False)
hazus_inferred_inventory = inferer.infer()

>> Step1 : Checking if OccupancyClass (occupancy) exist.
>> Step2-1 : Checking if StructureType (StructureTypeHazus) and ReplacementCost (ReplacementCostHazus) exist
>> Step2-2 : Inferring {'ReplacementCostHazus', 'StructureTypeHazus'}




GarageType info not found in the inventory. Making inference using Hazus 6.
ConstructionClass info not found in the inventory. Making inference using Hazus 6.
Done inference. It took 0.36 mins
>> Step3-1 : Checking if BuildingRise (BuildingRise), DesignLevel (DesignLevel) and FoundationType (FoundationType) exist
>> Step3-2 : Inferring {'DesignLevel', 'FoundationType', 'BuildingRise'}




The feature StructureTypeHazus is missing in many buildings including:  [18, 133, 164, 244, 281, 291, 297, 298, 318, 320]
>> Step4 : Changing feature names to what R2D (pelicun) can recognize
Done inference. It took 0.61 mins




From the warning message, inferring HAZUS StructureType for several provided structural types e.g., West Coast-IND1-mid_rise-pre_1950, were not possible using the inference rulestes in BRAILS++, because some provided structural types do not exist in HAZUS's inventory definition. The failed inference has also leaded to missing DesignLevels in the produced inventory here. Below, imputation is used to estimate the HAZUS structural types that do not exist in HAZUS. And the inferrer is run again to estimate DesignLevel.

# Re-run Imputation to Fill Values That Cannot be Inferred by HAZUS Rulesets

In [12]:
imputer = knn_imputer_class(hazus_inferred_inventory, 
                            n_possible_worlds=NO_POSSIBLE_WORLDS)
hazus_inferred_inventory_imputed = imputer.impute()

Missing percentages among 159898 assets
lon: 7.94%
lat: 7.94%
fd_id: 7.94%
StructureType: 0.14%
Primitive imputation done.
Running the main imputation. This may take a while.
Enumerating clusters: 20 among 320
Enumerating clusters: 40 among 320
Enumerating clusters: 60 among 320
Enumerating clusters: 80 among 320
Enumerating clusters: 100 among 320
Enumerating clusters: 120 among 320
Enumerating clusters: 140 among 320
Enumerating clusters: 160 among 320
Enumerating clusters: 180 among 320
Enumerating clusters: 200 among 320
Enumerating clusters: 220 among 320
Enumerating clusters: 240 among 320
Enumerating clusters: 260 among 320
Enumerating clusters: 280 among 320
Enumerating clusters: 300 among 320
Enumerating clusters: 320 among 320
Done imputation. It took 0.03 mins


In [None]:
# We will keep StructureType we just imputed

Infer_features_for_HazusDL = importer.get_class("HazusInfererEarthquake")
inferer=Infer_features_for_HazusDL(input_inventory=hazus_inferred_inventory_imputed, 
                                n_possible_worlds=NO_POSSIBLE_WORLDS, 
                                yearBuilt_key = 'erabuilt', 
                                structureType_key = 'StructureType',
                                clean_features= False)
hazus_inventory_final = inferer.infer()

>> Step1 : Checking if OccupancyClass (OccupancyClass) exist.
>> Step2-1 : Checking if StructureType (StructureType) and ReplacementCost (ReplacementCost) exist
>> Step3-1 : Checking if BuildingRise (BuildingRise), DesignLevel (DesignLevel) and FoundationType (FoundationType) exist
>> Step3-2 : Inferring {'DesignLevel'}
>> Step4 : Changing feature names to what R2D (pelicun) can recognize
Done inference. It took 0.27 mins


# Change Attribute Names To Make Them Compatible with R2D

In [14]:
hazus_inventory_final.change_feature_names({'erabuilt': 'YearBuilt',
                                            'lat': 'Latitude',
                                            'lon': 'Longitude',
                                            'fpAreas': 'PlanArea',
                                            'numstories': 'NumberOfStories'})
counter = 0
for _, val in hazus_inventory_final.inventory.items():
    val.add_features({"id": counter})
    counter += 1

# Write the Created Inventory in a GeoJSON File

In [15]:
_ = hazus_inventory_final.write_to_geojson(
    output_file=INVENTORY_OUTPUT)