# Build load profile for Alameda from a series of parquet files

In [26]:
import os
import pandas as pd

In [30]:
directory = "/Users/ana/Documents/Berkeley/CCAI/backend/adapters/nrel/03_county_individual_building_aggregates/counties/G0600010/single_family_detached/buildings"
filepath = "/Users/ana/Documents/Berkeley/CCAI/backend/adapters/nrel/03_county_individual_building_aggregates/counties/G0600010/single_family_detached/buildings/855-9.parquet"

building = pd.read_parquet(filepath)

out_columns = [col for col in building.columns if col.startswith('out.electricity.')]
# print(out_columns)

building.head()

# building.head().to_csv("building_preview.csv")

Check for missing timestamps

In [35]:
expected_timestamps = pd.date_range(
    start=all_data['timestamp'].min(), 
    end=all_data['timestamp'].max(), 
    freq='H'
)
missing_timestamps = expected_timestamps.difference(all_data['timestamp'].unique())

if not missing_timestamps.empty:
    print("Missing timestamps found:")
    print(missing_timestamps)
else:
    print("No missing timestamps.")

No missing timestamps.


### List of end uses to aggregate

In [36]:
heating = [
    'out.electricity.heating.energy_consumption',
    # 'out.electricity.heating.energy_consumption_intensity',
    'out.electricity.heating_fans_pumps.energy_consumption',
    # 'out.electricity.heating_fans_pumps.energy_consumption_intensity',
    'out.electricity.heating_hp_bkup.energy_consumption',
    # 'out.electricity.heating_hp_bkup.energy_consumption_intensity',
]

cooling = [
    'out.electricity.cooling.energy_consumption',
    # 'out.electricity.cooling.energy_consumption_intensity',
    'out.electricity.cooling_fans_pumps.energy_consumption',
    # 'out.electricity.cooling_fans_pumps.energy_consumption_intensity'
]

cooking = [
    'out.electricity.range_oven.energy_consumption',
    # 'out.electricity.range_oven.energy_consumption_intensity'
]

hot_water = [
    'out.electricity.hot_water.energy_consumption',
    # 'out.electricity.hot_water.energy_consumption_intensity'
]

appliances = [
    'out.electricity.ceiling_fan.energy_consumption',
    # 'out.electricity.ceiling_fan.energy_consumption_intensity',
    'out.electricity.clothes_dryer.energy_consumption',
    # 'out.electricity.clothes_dryer.energy_consumption_intensity',
    'out.electricity.clothes_washer.energy_consumption',
    # 'out.electricity.clothes_washer.energy_consumption_intensity',
    'out.electricity.dishwasher.energy_consumption',
    # 'out.electricity.dishwasher.energy_consumption_intensity',
    'out.electricity.lighting_exterior.energy_consumption',
    # 'out.electricity.lighting_exterior.energy_consumption_intensity',
    'out.electricity.lighting_garage.energy_consumption',
    # 'out.electricity.lighting_garage.energy_consumption_intensity',
    'out.electricity.lighting_interior.energy_consumption',
    # 'out.electricity.lighting_interior.energy_consumption_intensity',
    'out.electricity.mech_vent.energy_consumption',
    # 'out.electricity.mech_vent.energy_consumption_intensity',
    'out.electricity.refrigerator.energy_consumption',
    # 'out.electricity.refrigerator.energy_consumption_intensity'
]

misc = [
    'out.electricity.heating_hp_bkup_fa.energy_consumption',
    # 'out.electricity.heating_hp_bkup_fa.energy_consumption_intensity'
    'out.electricity.freezer.energy_consumption',
    'out.electricity.freezer.energy_consumption_intensity',
    'out.electricity.permanent_spa_heat.energy_consumption',
    'out.electricity.permanent_spa_heat.energy_consumption_intensity',
    'out.electricity.permanent_spa_pump.energy_consumption',
    'out.electricity.permanent_spa_pump.energy_consumption_intensity',
    'out.electricity.plug_loads.energy_consumption',
    'out.electricity.plug_loads.energy_consumption_intensity',
    'out.electricity.pool_heater.energy_consumption',
    'out.electricity.pool_heater.energy_consumption_intensity',
    'out.electricity.pool_pump.energy_consumption',
    'out.electricity.pool_pump.energy_consumption_intensity',
    'out.electricity.well_pump.energy_consumption',
    'out.electricity.well_pump.energy_consumption_intensity'
]

total = [
    'out.electricity.net.energy_consumption',
    # 'out.electricity.net.energy_consumption_intensity',
    'out.electricity.total.energy_consumption',
    # 'out.electricity.total.energy_consumption_intensity'
]

In [41]:
# Define the end uses included in the average
end_uses = heating + cooling + cooking + hot_water + appliances

all_data = pd.DataFrame()

for file_name in os.listdir(directory):
    file_path = os.path.join(directory, file_name)
    if file_path.endswith('.parquet'):
        data = pd.read_parquet(file_path)
        
        if 'timestamp' in data.columns and all(col in data.columns for col in end_uses):
            # Ensure the timestamp column is a datetime type
            data['timestamp'] = pd.to_datetime(data['timestamp'])
            
            # Filter to only the columns of interest including timestamp
            data = data[['timestamp'] + end_uses]
            
            # Append the data to the all_data DataFrame
            all_data = pd.concat([all_data, data], axis=0, ignore_index=True)

# Group all_data by 'timestamp' and calculate the mean across all buildings
average_profile = all_data.groupby('timestamp')[end_uses].mean()

# Create a date range covering the data range
full_year = pd.date_range(
    start=all_data['timestamp'].min(), 
    end=all_data['timestamp'].max(), 
    freq='H'
)

# Reindex the average_profile DataFrame to have a row for each hour in the data range
average_profile = average_profile.reindex(full_year)

# Calculate the total load across all end uses per hour
average_profile['total_load'] = average_profile[end_uses].sum(axis=1)

# Reset the index to have 'timestamp' as a column
average_profile.reset_index(inplace=True)
average_profile.rename(columns={'index': 'timestamp'}, inplace=True)

average_profile.to_csv("/Users/ana/Documents/Berkeley/CCAI/backend/adapters/sam/alameda/load/county_average_loads.csv", index=False)

average_profile.head()

Unnamed: 0,timestamp,out.electricity.heating.energy_consumption,out.electricity.heating_fans_pumps.energy_consumption,out.electricity.heating_hp_bkup.energy_consumption,out.electricity.cooling.energy_consumption,out.electricity.cooling_fans_pumps.energy_consumption,out.electricity.range_oven.energy_consumption,out.electricity.hot_water.energy_consumption,out.electricity.ceiling_fan.energy_consumption,out.electricity.clothes_dryer.energy_consumption,out.electricity.clothes_washer.energy_consumption,out.electricity.dishwasher.energy_consumption,out.electricity.lighting_exterior.energy_consumption,out.electricity.lighting_garage.energy_consumption,out.electricity.lighting_interior.energy_consumption,out.electricity.mech_vent.energy_consumption,out.electricity.refrigerator.energy_consumption,total_load
0,2018-01-01 01:00:00,0.500631,0.014459,0.0,0.009541,0.00141,0.006262,0.09923,0.010721,0.033139,0.001877,0.012123,0.024615,0.002369,0.169697,0.002828,0.059664,0.948566
1,2018-01-01 02:00:00,0.567418,0.016746,0.000566,0.008303,0.000934,0.006393,0.073164,0.007262,0.025877,0.0,0.002738,0.021213,0.00127,0.097295,0.001598,0.057172,0.887951
2,2018-01-01 03:00:00,0.605246,0.017754,0.004049,0.007533,0.000713,0.0,0.059803,0.005139,0.021951,0.001525,0.0,0.01482,0.000475,0.034434,0.00123,0.05332,0.827992
3,2018-01-01 04:00:00,0.969902,0.030697,0.0,0.006713,0.000156,0.000123,0.017689,0.00418,0.0,0.000508,0.001721,0.010664,0.000361,0.023451,0.001598,0.051975,1.119738
4,2018-01-01 05:00:00,1.029107,0.03223,0.0,0.007557,0.000803,0.0,0.020197,0.003885,0.0,0.0,0.003525,0.010664,0.000336,0.021852,0.002828,0.050779,1.183762
