# Pull Relevannt Building Parquet Files from NREL API
Loads CSV of building IDs relevant for the county and pulls them in from the NREL API

In [2]:
import os
import pandas as pd

housing_type = "single-family-detached"
scenario = "scenario-baseline"

county_buildings = pd.read_csv(f"alameda/{scenario}/{housing_type}/step0_out_suitable_buildings.csv")

county_buildings.head()
county_buildings[0:1]["in.geometry_building_type_recs"]

0    Single-Family Detached
Name: in.geometry_building_type_recs, dtype: object

In [3]:
import boto3
from botocore.client import Config
from botocore import UNSIGNED

# Initialize S3 client
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))

# Function to download a specific file from S3
def download_parquet_file(bucket_name, s3_key, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    output_file = os.path.join(output_dir, os.path.basename(s3_key))
    
    try:
        s3.download_file(Bucket=bucket_name, Key=s3_key, Filename=output_file)
        print(f"Downloaded {s3_key} to {output_file}")
    except Exception as e:
        print(f"Error downloading {s3_key}: {e}")

# List of specific building IDs to retrieve
building_ids = county_buildings.bldg_id.to_list()

print(building_ids)

# S3 bucket and prefix info
bucket_name = "oedi-data-lake"
prefix = "nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/resstock_amy2018_release_2/timeseries_individual_buildings/by_state/upgrade=0/state=CA/"
upgrade = "-0"

# Directory to save downloaded files
output_dir = f"alameda/{scenario}/{housing_type}/step1_buildings"

# Download each specified Parquet file
for bldg_id in building_ids:
    s3_key = f"{prefix}{bldg_id}{upgrade}.parquet"
    download_parquet_file(bucket_name, s3_key, output_dir)

[2917, 3783, 3978, 12375, 12817, 16654, 19942, 20171, 21271, 22651, 23998, 24009, 24965, 25603, 30895, 48010, 49023, 50131, 53397, 58487, 69496, 70747, 71566, 81709, 85060, 87583, 91263, 91372, 98684, 101074, 102183, 102995, 103887, 106487, 109755, 110109, 112476, 123048, 130268, 131135, 133398, 134736, 134743, 137911, 138268, 138592, 140195, 141014, 143679, 145345, 158068, 158511, 163180, 164417, 166605, 172899, 174948, 182725, 187405, 190101, 190928, 193068, 198641, 199571, 200436, 213797, 215363, 217317, 218193, 220272, 224379, 234229, 235943, 238642, 244261, 245409, 250302, 252078, 253939, 257973, 258121, 260612, 265117, 265132, 268025, 268153, 276403, 276525, 281460, 281738, 285514, 286880, 288115, 289220, 297134, 309622, 314412, 315255, 317340, 319847, 324309, 326905, 328642, 334330, 337416, 346688, 348840, 351538, 353630, 360015, 363027, 364036, 365599, 365929, 367108, 367361, 369721, 370551, 375860, 376428, 377764, 381468, 381953, 382908, 385471, 388011, 390895, 392319, 396456,

In [6]:
# Count to make sure # downloaded === # queried
output_dir = f"alameda/{scenario}/{housing_type}/step1_buildings"

num_building_ids = len(building_ids)
downloaded_file_count = len([f for f in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, f))])

print(num_building_ids == downloaded_file_count)
print(f"Number of building ids in CSV: {num_building_ids}")
print(f"Number of files in directory: {downloaded_file_count}")

True
Number of building ids in CSV: 178
Number of files in directory: 178
