In [2]:
import pandas as pd
import numpy as np
import json
from green_city.utils import span

In [3]:
def rename_cols(s):
    new_name = (s
        .lower()
        .replace(' ', '_')
        .replace('[', '')
        .replace(']', '')
        .replace('/', '_')
        .replace('relative_humidity_%', 'hum')
        .replace('_kw', '_kW')
        .replace('_w', '_W')
        .replace('prediction', 'pred')
        .replace('temperature_c', 'temp')
        .replace('radiation_', '')
        .replace('drybulb_', '')
        .replace('_status', '')
        .replace('6h_pred', 'pred_6h')
        .replace('12h_pred', 'pred_12h')
        .replace('24h_pred', 'pred_24h')
        .replace('average_unmet_cooling_setpoint_difference_c', 'avg_unmet_cooling_temp') #do we even need this column?
    )
    return new_name

In [4]:
building_nrs = [4, 5]

for building_nr in building_nrs:
    #1. load json for this building
    with open("../data/citylearn_challenge_2021/schema.json") as schema_file:
        schema = json.load(schema_file)

    building_properties = schema['buildings'][f'Building_{building_nr}']
    pv_nominal_power_kW = building_properties['pv']['attributes']['nominal_power']

    weather = pd.read_csv("../data/citylearn_challenge_2021/weather.csv")
    building = pd.read_csv(f"../data/citylearn_challenge_2021/Building_{building_nr}.csv")
    building = pd.concat([building, weather], axis=1)
    assert len(building) == len(weather)

    building = (building
    .drop(columns=["Heating Load [kWh]"])
    .assign(
            datetime = span('2008-01-02', '2011-12-31'),
            holiday = lambda x: x["Day Type"] == 8)
    .assign(workday = lambda x: (x.datetime.dt.weekday >= 1) & (x.datetime.dt.weekday <= 5) & (x["Day Type"] != 8) )
    .drop(columns=["Month", "Hour", "Day Type"])
    .set_index("datetime")
    .rename(columns=rename_cols)
    .assign(solar_generation_kW = lambda x: x.solar_generation_W_kW * pv_nominal_power_kW/1000)
    )
    ##### heat pump electric consumption
    eta_hpc = 0.15; # technical efficiency coefficient 0.2 ... 0.3 typically
    temp_target_c = 5; # target temperature for hp cooling, typically 7 ... 10 °C, this equals approx. the temperature of the chilled water storage
    T_th = 273.15; # thermodynamic temperature in Kelvin [K] corresponding to 0 °C 
    cop_c = eta_hpc * (temp_target_c + T_th)/ ((building['outdoor_temp'] + T_th) - (temp_target_c + T_th)) 
    # Calculate the electric energy consumption of the hp for cooling
    building['electric_load_hp_kWh'] = round(building['cooling_load_kWh'] / cop_c,3) # add a column with the electric consumption [kW]
    
    building['net_load_kWh'] = building['equipment_electric_power_kWh'] + building['dhw_heating_kWh'] + building['electric_load_hp_kWh'] - building['solar_generation_kWh']
    
    building.to_csv(f"../data/preprocessed/Building_{building_nr}.csv")

### Check all buildings csv files for columns containing nan's or only single values (such as 0's)

In [None]:
def analyze_all_buildings():
    col_str = lambda x: x if (len(x) == 1 or any([np.isnan(val) for val in x])) else ""
    unique_entries = {}
    for building_nr in range(1, 10):
        df = pd.read_csv(f"../data/citylearn_challenge_2021/Building_{building_nr}.csv")
        assert df.shape == (35040, 12)
        unique_entries[building_nr] = pd.Series({col: col_str(df[col].unique()) for col in df.columns})
    print("all building's csv files are of shape (35040, 12).")
        
    comparison_df = pd.DataFrame(unique_entries.values(), index=unique_entries.keys())
    
    return comparison_df
        
analyze_all_buildings()