In [13]:
import pandas as pd
import numpy as np
import json
from green_city.utils import span

In [14]:
def rename_cols(s):
    new_name = (s
        .lower()
        .replace(' ', '_')
        .replace('[', '')
        .replace(']', '')
        .replace('/', '_')
        .replace('relative_humidity_%', 'hum')
        .replace('_kw', '_kW')
        .replace('_w', '_W')
        .replace('prediction', 'pred')
        .replace('temperature_c', 'temp')
        .replace('radiation_', '')
        .replace('drybulb_', '')
        .replace('_status', '')
        .replace('6h_pred', 'pred_6h')
        .replace('12h_pred', 'pred_12h')
        .replace('24h_pred', 'pred_24h')
        .replace('kWh','kW')
        .replace('average_unmet_cooling_setpoint_difference_c', 'avg_unmet_cooling_temp') #do we even need this column?
    )
    return new_name

In [28]:
building_nrs = [1,2,3,4,5,6,7,8,9]

for building_nr in building_nrs:
    #1. load json for this building
    with open("../data/schema.json") as schema_file:
        schema = json.load(schema_file)

    building_properties = schema['buildings'][f'Building_{building_nr}']
    pv_nominal_power_kW = building_properties['pv']['attributes']['nominal_power']

    weather = pd.read_csv("../data/citylearn_challenge_2021/weather.csv")
    building = pd.read_csv(f"../data/citylearn_challenge_2021/Building_{building_nr}.csv")
    building = pd.concat([building, weather], axis=1)
    assert len(building) == len(weather)

    building = (building
    .drop(columns=["Heating Load [kWh]"])
    .assign(
            datetime = span('2008-01-02', '2011-12-31'),
            holiday = lambda x: x["Day Type"] == 8)
    .assign(workday = lambda x: (x.datetime.dt.weekday >= 1) & (x.datetime.dt.weekday <= 5) & (x["Day Type"] != 8) )
    .drop(columns=["Month", "Hour", "Day Type"])
    .set_index("datetime")
    .rename(columns=rename_cols)
    .assign(solar_generation_kW = lambda x: x.solar_generation_W_kW * pv_nominal_power_kW * 0.001)
    .assign(solar_generation_kW = lambda x: x.solar_generation_kW.fillna(0))
    )
    ##### heat pump electric consumption
    eta_hpc = 0.15; # technical efficiency coefficient 0.2 ... 0.3 typically
    temp_target_c = 5; # target temperature for hp cooling, typically 7 ... 10 °C, this equals approx. the temperature of the chilled water storage
    T_th = 273.15; # thermodynamic temperature in Kelvin [K] corresponding to 0 °C 
    cop_c = eta_hpc * (temp_target_c + T_th)/ ((building['outdoor_temp'] + T_th) - (temp_target_c + T_th)) 
    # Calculate the electric energy consumption of the hp for cooling
    building['electric_load_hp_kW'] = round(building['cooling_load_kW'] / cop_c,3) # add a column with the electric consumption [kW]
    
    building['net_load_kW'] = round(building['equipment_electric_power_kW'] + building['dhw_heating_kW'] + building['electric_load_hp_kW'] - building['solar_generation_kW'],3)
    
    building.to_csv(f"../data/preprocessed/Building_{building_nr}.csv")

### Check all buildings csv files for columns containing nan's or only single values (such as 0's)

In [None]:
def analyze_all_buildings():
    col_str = lambda x: x if (len(x) == 1 or any([np.isnan(val) for val in x])) else ""
    unique_entries = {}
    for building_nr in range(1, 10):
        df = pd.read_csv(f"../data/citylearn_challenge_2021/Building_{building_nr}.csv")
        assert df.shape == (35040, 12)
        unique_entries[building_nr] = pd.Series({col: col_str(df[col].unique()) for col in df.columns})
    print("all building's csv files are of shape (35040, 12).")
        
    comparison_df = pd.DataFrame(unique_entries.values(), index=unique_entries.keys())
    
    return comparison_df
        
analyze_all_buildings()

In [26]:
df = pd.read_csv('../data/preprocessed/Building_6.csv')
df.head(20)

Unnamed: 0,datetime,daylight_savings,indoor_temp,avg_unmet_cooling_temp,indoor_hum,equipment_electric_power_kW,dhw_heating_kW,cooling_load_kW,solar_generation_W_kW,outdoor_temp,...,pred_12h_diffuse_solar_W_m2,pred_24h_diffuse_solar_W_m2,pred_6h_direct_solar_W_m2,pred_12h_direct_solar_W_m2,pred_24h_direct_solar_W_m2,holiday,workday,solar_generation_kW,electric_load_hp_kW,net_load_kW
0,2008-01-02 00:00:00,0,22.8,0.0,53.12,10.3,2.04,2.46,0.0,9.4,...,170.84,0.0,-0.0,5.32,0.0,True,False,0.0,0.259,12.599
1,2008-01-02 01:00:00,0,22.54,0.0,51.02,7.6,2.52,1.36,0.0,7.61,...,112.24,0.0,1.11,2.04,-0.0,True,False,0.0,0.085,10.205
2,2008-01-02 02:00:00,0,22.64,0.0,48.25,11.9,0.36,2.07,0.0,8.11,...,65.93,0.0,1.74,1.04,0.0,True,False,0.0,0.154,12.414
3,2008-01-02 03:00:00,0,22.47,0.0,47.55,7.0,1.68,1.27,0.0,8.36,...,38.59,-0.0,1.09,0.99,0.0,True,False,0.0,0.102,8.782
4,2008-01-02 04:00:00,0,22.51,0.0,46.37,10.1,0.0,1.61,0.0,8.78,...,15.92,0.0,1.78,0.84,0.0,True,False,0.0,0.146,10.246
5,2008-01-02 05:00:00,0,22.62,0.0,45.61,13.3,1.2,2.34,0.0,9.25,...,0.0,-0.0,4.83,0.0,-0.0,True,False,0.0,0.238,14.738
6,2008-01-02 06:00:00,0,22.62,0.0,45.76,11.9,8.88,2.19,0.0,9.78,...,0.0,0.0,5.26,0.0,0.0,True,False,0.0,0.251,21.031
7,2008-01-02 07:00:00,0,22.77,0.0,45.54,18.8,19.68,4.92,0.0,10.0,...,-0.0,20.07,2.05,-0.0,3.16,True,False,0.0,0.59,39.07
8,2008-01-02 08:00:00,0,22.66,0.0,45.76,13.2,12.96,2.21,28.2155,10.0,...,0.0,61.04,1.05,0.0,5.64,True,False,0.56431,0.265,25.86069
9,2008-01-02 09:00:00,0,22.64,0.0,45.63,13.2,13.68,2.55,45.8566,10.5,...,-0.0,134.76,0.99,-0.0,12.92,True,False,0.917132,0.336,26.298868
