In [1]:
import pandas as pd

def aggregate_monthly_data(spreadsheet_name):
    # Load the CSV file
    file_path = f'{spreadsheet_name}'
    data = pd.read_csv(file_path)

    # Convert the 'timestamp' column to datetime
    data['timestamp'] = pd.to_datetime(data['timestamp'])

    # Set the timestamp as the index
    data.set_index('timestamp', inplace=True)

    # Aggregate the energy consumption data monthly
    monthly_energy_aggregates = data.filter(like='out.').resample('M').sum()

    # Aggregate non-changing columns by taking the first available value for each month
    non_changing_columns = data[['upgrade', 'in.ashrae_iecc_climate_zone_2004', 'in.comstock_building_type', 'models_used', 'floor_area_represented']].resample('M').first()

    # Combine the aggregated data
    monthly_aggregated_data = pd.concat([non_changing_columns, monthly_energy_aggregates], axis=1)

    # Reset index to make the timestamp a column again
    monthly_aggregated_data.reset_index(inplace=True)

    return monthly_aggregated_data



In [2]:
# Example usage
baseline_data = 'up00-1a-smalloffice.csv'
baseline_monthly_df = aggregate_monthly_data(baseline_data)

LED_data = 'up05-1a-smalloffice.csv'
LED_monthly_df = aggregate_monthly_data(LED_data)



In [5]:
#baseline_monthly_df['out.electricity.total.energy_consumption.kwh']
#LED_monthly_df['out.electricity.total.energy_consumption.kwh']

0     7.227191e+07
1     7.818826e+07
2     7.880258e+07
3     8.732114e+07
4     9.296077e+07
5     9.963560e+07
6     1.081672e+08
7     1.121194e+08
8     1.045128e+08
9     1.060286e+08
10    8.894233e+07
11    7.779602e+07
12    2.294802e+04
Name: out.electricity.total.energy_consumption.kwh, dtype: float64

In [9]:
baseline_monthly_df['out.electricity.total.energy_consumption.kwh'] - LED_monthly_df['out.electricity.total.energy_consumption.kwh']

0     5.335784e+06
1     4.807729e+06
2     5.186511e+06
3     5.061891e+06
4     5.594590e+06
5     5.210383e+06
6     5.458849e+06
7     5.694990e+06
8     5.041298e+06
9     5.633974e+06
10    5.298605e+06
11    5.034486e+06
12    1.199397e+03
Name: out.electricity.total.energy_consumption.kwh, dtype: float64

In [8]:
LED_monthly_df['out.electricity.total.energy_consumption.kwh.savings']

0     5.335784e+06
1     4.807729e+06
2     5.186511e+06
3     5.061891e+06
4     5.594590e+06
5     5.210383e+06
6     5.458849e+06
7     5.694990e+06
8     5.041298e+06
9     5.633974e+06
10    5.298605e+06
11    5.034486e+06
12    1.199397e+03
Name: out.electricity.total.energy_consumption.kwh.savings, dtype: float64