In [1]:
import pandas as pd
import json
from green_city.utils import span

In [2]:
def rename_cols(s):
    new_name = (s
        .lower()
        .replace(' ', '_')
        .replace('[', '')
        .replace(']', '')
        .replace('/', '_')
        .replace('relative_humidity_%', 'hum')
        .replace('_kw', '_kW')
        .replace('_w', '_W')
        .replace('prediction', 'pred')
        .replace('temperature_c', 'temp')
        .replace('radiation_', '')
        .replace('drybulb_', '')
        .replace('_status', '')
        .replace('6h_pred', 'pred_6h')
        .replace('12h_pred', 'pred_12h')
        .replace('24h_pred', 'pred_24h')
        .replace('average_unmet_cooling_setpoint_difference_c', 'avg_unmet_cooling_temp') #do we even need this column?
    )
    return new_name

In [3]:
building_nr = 4

#1. load json for this building
with open("../data/citylearn_challenge_2021/schema.json") as schema_file:
    schema = json.load(schema_file)

building_properties = schema['buildings'][f'Building_{building_nr}']
pv_nominal_power_kW = building_properties['pv']['attributes']['nominal_power']

weather = pd.read_csv("../data/citylearn_challenge_2021/weather.csv")
building = pd.read_csv(f"../data/citylearn_challenge_2021/Building_{building_nr}.csv")
building = pd.concat([building, weather], axis=1)
assert len(building) == len(weather)

building = (building
.drop(columns=["Heating Load [kWh]"])
.assign(
        datetime = span('2008-01-02', '2011-12-31'),
        holiday = lambda x: x["Day Type"] == 8)
.assign(workday = lambda x: (x.datetime.dt.weekday >= 1) & (x.datetime.dt.weekday <= 5) & (x["Day Type"] != 8) )
.drop(columns=["Month", "Hour", "Day Type"])
.set_index("datetime")
.rename(columns=rename_cols)
.assign(solar_generation_kW = lambda x: x.solar_generation_W_kW * pv_nominal_power_kW/1000)
)

In [4]:
building.to_csv(f"../data/preprocessed/Building_{building_nr}.csv")