In [1]:
import pandas as pd
import os
import requests
from io import StringIO
from datetime import datetime

In [2]:
state_abv = {
    '2': 'AK',
    '1': 'AL',
    '5': 'AR',
    '4': 'AZ',
    '6': 'CA',
    '8': 'CO',
    '9': 'CT',
    '11': 'DC',
    '10': 'DE',
    '12': 'FL',
    '13': 'GA',
    '15': 'HI',
    '19': 'IA',
    '16': 'ID',
    '17': 'IL',
    '18': 'IN',
    '20': 'KS',
    '21': 'KY',
    '22': 'LA',
    '25': 'MA',
    '24': 'MD',
    '23': 'ME',
    '26': 'MI',
    '27': 'MN',
    '29': 'MO',
    '28': 'MS',
    '30': 'MT',
    '37': 'NC',
    '38': 'ND',
    '31': 'NE',
    '33': 'NH',
    '34': 'NJ',
    '35': 'NM',
    '32': 'NV',
    '36': 'NY',
    '39': 'OH',
    '40': 'OK',
    '41': 'OR',
    '42': 'PA',
    '44': 'RI',
    '45': 'SC',
    '46': 'SD',
    '47': 'TN',
    '48': 'TX',
    '49': 'UT',
    '51': 'VA',
    '50': 'VT',
    '53': 'WA',
    '55': 'WI',
    '54': 'WV',
    '56': 'WY'}

In [3]:
test_state_abv = {
    '2': 'AK',
    '1': 'AL',
    '5': 'AR',
    '4': 'AZ'}

In [4]:
comstock_buildings = ['quickservicerestaurant',
                      'fullservicerestaurant',
                      'smalloffice',
                      'mediumoffice',
                      'largeoffice',
                      'warehouse',
                      'smallhotel',
                      'largehotel',
                      'outpatient',
                      'hospital',
                      'secondaryschool',
                      'primaryschool',
                      'retailstandalone',
                      'retailstripmall']

In [5]:
test_comstock_buildings = ['smalloffice']

In [6]:
#upgrades = ['17','18']
#upgrades = ['5','6','7','8','9','10','15']
test_upgrades = ['17']

In [7]:
columns=[
    'upgrade',
    'in.comstock_building_type',
    'in.state',
    'applicability',
    'in.sqft',
    'in.year_built',
    'in.energy_code_followed_during_last_ext_lighting_replacement',
    'in.energy_code_followed_during_last_hvac_replacement',
    'in.energy_code_followed_during_last_int_equipment_replacement',
    'in.energy_code_followed_during_last_roof_replacement',
    'in.energy_code_followed_during_last_svc_water_htg_replacement',
    'in.energy_code_followed_during_last_walls_replacement',
    'in.energy_code_followed_during_original_building_construction',
    'calc.percent_savings.electricity.total.energy_consumption_intensity..percent',
    'calc.percent_savings.electricity.total.energy_consumption..percent',
    'calc.percent_savings.natural_gas.total.energy_consumption_intensity..percent',
    'calc.percent_savings.natural_gas.total.energy_consumption..percent',
    'calc.percent_savings.site_energy.total.energy_consumption..percent',
    'calc.percent_savings.site_energy.total.energy_consumption_intensity..percent'
]

In [8]:
base_url = "https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata_and_annual_results/by_state/state={STATE}/csv/{STATE}_upgrade{up}_metadata_and_annual_results.csv"
#https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata_and_annual_results/by_state/state=CO/csv/CO_upgrade18_metadata_and_annual_results.csv

In [9]:
# loop over upgrades
for upgrade in test_upgrades:
    UP = str(upgrade)
    up = f"{int(upgrade):02}"  # Convert string to int and format with leading zero

    # Loop through each state abbreviation
    for state in state_abv.values():
                   
        # Construct the URL for the current state
        url = base_url.format(up=up, STATE=state.upper())
        print(f"url: {url}")
        try:
            # Make a GET request to fetch the CSV content
            response = requests.get(url)
            if response.status_code == 200:
                print(f"DOWNLOADING upgrade: {up}, state: {state}")
                # Convert the CSV content to a DataFrame
                csv_content = StringIO(response.content.decode('utf-8'))
                df = pd.read_csv(csv_content, low_memory=False)

                # Group by 'in.comstock_building_type'
                grouped = df.groupby('in.comstock_building_type')

                # Iterate over each group, filter columns that match the defined columns, and save as separate files
                for building_type, group in grouped:
                    # Filter columns that match the defined columns
                    filtered_df = group[columns].dropna(how='all')  # Optionally, remove rows where all selected columns are NaN
                    
                    # Check and create directory if needed
                    directory = f'annual_data/{up}/{building_type}/{state.upper()}'
                    os.makedirs(directory, exist_ok=True)  # This will create the directory if it doesn't exist

                    # Define the base file path
                    base_file_path = f'{directory}/{state.upper()}_df.csv'

                    # to avoid overwriting existing files, check if the file exists
                    if os.path.exists(base_file_path):
                        # Create a unique file name, e.g., by appending a timestamp
                        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                        file_path = f'{directory}/{state.upper()}_df_{timestamp}.csv'
                    else:
                        file_path = base_file_path

                    # Save filtered DataFrame to CSV (assuming 'filtered_df' is your DataFrame)
                    filtered_df.to_csv(file_path, index=False)

                    print(f'Saved {file_path}')

            else:
                print(f"Failed to download data for {state.upper()}: HTTP {response.status_code}")
        except Exception as e:
            print(f"Error downloading data for {state.upper()}: {e}")
            

url: https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata_and_annual_results/by_state/state=AK/csv/AK_upgrade17_metadata_and_annual_results.csv
DOWNLOADING upgrade: 17, state: AK
Saved annual_data/17/FullServiceRestaurant/AK/AK_df.csv
Saved annual_data/17/Hospital/AK/AK_df.csv
Saved annual_data/17/LargeHotel/AK/AK_df.csv
Saved annual_data/17/LargeOffice/AK/AK_df.csv
Saved annual_data/17/MediumOffice/AK/AK_df.csv
Saved annual_data/17/Outpatient/AK/AK_df.csv
Saved annual_data/17/PrimarySchool/AK/AK_df.csv
Saved annual_data/17/QuickServiceRestaurant/AK/AK_df.csv
Saved annual_data/17/RetailStandalone/AK/AK_df.csv
Saved annual_data/17/RetailStripmall/AK/AK_df.csv
Saved annual_data/17/SecondarySchool/AK/AK_df.csv
Saved annual_data/17/SmallHotel/AK/AK_df.csv
Saved annual_data/17/SmallOffice/AK/AK_df.csv
Saved annual_data/17/Warehouse/AK/AK_df.csv
url: https://oedi-data-lake.s3.amazonaws.com/nrel-pd