# Twomes data extraction and backup

This JupyterLabs notebook can be used download raw data from a Twomes database (see also [more information how to setup a Twomes server](https://github.com/energietransitie/twomes-backoffice-configuration#jupyterlab)).

Don't forget to install the requirements listed in [requirements.txt](../requirements.txt) first!



## Setting the stage

First several imports and variables need to be defined


### Imports and generic settings

In [None]:
from datetime import datetime, timedelta
import pytz
import pylab as plt

import pandas as pd
import numpy as np

# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)
pd.options.display.precision = 2

import sys
sys.path.append('../data/')

%load_ext autoreload
import gc

from measurements import Measurements

from tqdm.notebook import tqdm


import logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s %(levelname)-8s %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    filename='log.txt',
                   )

### Defining which homes, which period 

- which `homes` 
- what the location and timezone is of those homes (currently, we only support one location and timezone for a batch of homes) 
- from which `start_day` to which `end_day` 

In [None]:
# location: center of Assendorp neighbourhood in Zwolle
lat, lon = 52.50655, 6.09961

# timezone: 
timezone_database = 'UTC'
timezone_homes = 'Europe/Amsterdam'

# Period: maximum period of datacollection
first_day = pytz.timezone(timezone_homes).localize(datetime(2021, 10, 25))
last_day = pytz.timezone(timezone_homes).localize(datetime(2022, 5, 8))

# Period: Shorter period with suitable weather and lots of homes with measurements.
# first_day = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 3))
# last_day = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 31))


# Homes: full set
homes_all = [803422, 805164, 809743, 811308, 815925, 817341, 822479, 829947, 830088, 831062, 839440, 845966, 845997, 846697, 857477, 864296, 873985, 879481, 881611, 886307, 895671, 897349, 899510]

# Homes: subset of homes
homes_3 = [803422, 805164, 809743]

# Homes: single home (virtual home)
homes_single = [886307]

# Homes: single home for gap assessment
# homes = [803422]

# Properties: a single one
twomes_single_property_rename = {
    'roomTemp' : 'temp_in___degC'
}
twomes_single_property = list(twomes_single_property_rename.keys())

twomes_single_property_type = {
    'temp_in___degC' : 'float32'
}


# Properties: limited set
twomes_limited_properties_rename = {
    'eMeterReadingSupplyLow' : 'e_use_lo_cum__kWh',
    'eMeterReadingSupplyHigh' : 'e_use_hi_cum__kWh',
    'eMeterReadingReturnLow' : 'e_ret_lo_cum__kWh',
    'eMeterReadingReturnHigh' : 'e_ret_hi_cum__kWh',
    'eMeterReadingTimestamp' : 'e_timestamp__YYMMDDhhmX',
    'gMeterReadingTimestamp' : 'g_timestamp__YYMMDDhhmX',
    'gMeterReadingSupply' : 'g_use_cum__m3', 
    'roomTemp' : 'temp_in__degC',
    'roomSetpointTemp' : 'temp_set__degC'
}
twomes_limited_properties = list(twomes_limited_properties_rename.keys())

twomes_limited_properties_types = {
    'e_use_lo_cum__kWh' : 'float64',
    'e_use_hi_cum__kWh' : 'float64',
    'e_ret_lo_cum__kWh' : 'float64',
    'e_ret_hi_cum__kWh' : 'float64',
    'e_timestamp__YYMMDDhhmX' : 'str',
    'g_timestamp__YYMMDDhhmX' : 'str',
    'g_use_cum__m3' : 'float64',
    'temp_in__degC' : 'float32',
    'temp_set__degC' : 'float32'
}

# Properties:  full set
twomes_full_properties_rename = {
    'eMeterReadingSupplyLow' : 'e_use_lo_cum__kWh',
    'eMeterReadingSupplyHigh' : 'e_use_hi_cum__kWh',
    'eMeterReadingReturnLow' : 'e_ret_lo_cum__kWh',
    'eMeterReadingReturnHigh' : 'e_ret_hi_cum__kWh',
    'eMeterReadingTimestamp' : 'e_timestamp__YYMMDDhhmX',
    'gMeterReadingTimestamp' : 'g_timestamp__YYMMDDhhmX',
    'gMeterReadingSupply' : 'g_use_cum__m3', 
    'roomTemp' : 'temp_in__degC',
    'roomSetpointTemp' : 'temp_set__degC',
    'CO2concentration' : 'co2__ppm',
    'humidity' : 'rel_humidity__0',
    'roomTempCO2' : 'temp_in__degC',
    'roomTemp' : 'temp_in__degC',
    'boilerTemp1' : 'temp1__degC',
    'boilerTemp2' : 'temp2__degC',
    'heartbeat' : 'heartbeat',
    'isCentralHeatingModeOn' : 'ch__bool',
    'isDomesticHotWaterModeOn' : 'dhw__bool',
    'isBoilerFlameOn' : 'flame__bool',
    'maxModulationLevel' : 'mod_max__0',
    'maxBoilerCap' : 'cap_max__kW',
    'minModulationLevel' : 'mod_min__0',
    'relativeModulationLevel' : 'mod__0',
    'roomTemp' : 'temp_in__degC',
    'boilerMaxSupplyTemp' : 'temp_ch_max__degC',
    'boilerSupplyTemp' : 'temp_sup__degC',
    'boilerReturnTemp' : 'temp_ret__degC',
    'listRSSI' : 'presence__dBm_csv',
    'heartbeat' : 'heartbeat'
}
twomes_full_properties = list(twomes_full_properties_rename.keys())

twomes_full_properties_types = {
    'e_use_lo_cum__kWh' : 'float64',
    'e_use_hi_cum__kWh' : 'float64',
    'e_ret_lo_cum__kWh' : 'float64',
    'e_ret_hi_cum__kWh' : 'float64',
    'e_timestamp__YYMMDDhhmX' : 'str',
    'g_timestamp__YYMMDDhhmX' : 'str',
    'g_use_cum__m3' : 'float64',
    'temp_in__degC' : 'float32',
    'temp_set__degC' : 'float32',
    'co2__ppm' : 'float32',
    'rel_humidity__0' : 'float32',
    'temp1__degC' : 'float32',
    'temp2__degC' : 'float32',
    'heartbeat' : 'Int16',
    'ch__bool' : 'Int8',
    'dhw__bool' : 'Int8',
    'flame__bool' : 'Int8',
    'mod_max__0' : 'Int8',
    'cap_max__kW' : 'Int8',
    'mod_min__0' : 'Int8',
    'mod__0' : 'Int8',
    'temp_ch_max__degC' : 'float32',
    'temp_sup__degC' : 'float32',
    'temp_ret__degC' : 'float32',
    'presence__dBm_csv' : 'str',
    'heartbeat' : 'Int16'
}


## Getting measurements: 1 property, 1 home

In [None]:
%%time 
%autoreload 2
df_meas= Measurements.get_raw_measurements(
    homes_single,
    first_day, last_day,
    twomes_single_property, twomes_single_property_rename,
    timezone_database, timezone_homes)

In [None]:
df_meas.info()

In [None]:
df_meas

## Get measuremens for more properties for 3 homes

In [None]:
%%time 
df_meas= Measurements.get_raw_measurements(
    homes_3,
    first_day, last_day,
    twomes_limited_properties, twomes_limited_properties_rename,
    timezone_database, timezone_homes)

In [None]:
df_meas.info()

In [None]:
df_meas

## Get all measurements for all homes and write to parquet file(s)

### Get measurements for all properties for 23 homes

In [None]:
%%time 
df_meas = Measurements.get_raw_measurements(
    homes_all,
    first_day, last_day,
    twomes_full_properties, twomes_full_properties_rename,
    timezone_database, timezone_homes)

In [None]:
df_meas.info()

In [None]:
df_meas

### Writing raw measurements to a parquet file

In [None]:
%%time 
df_meas.to_parquet('twomes_raw_measurements.parquet', index=True, engine='pyarrow')

### Write raw measurements per home to parquet files

In [None]:
%%time 
for home_id in tqdm(list(df_meas.index.unique(level='id'))):
    df_meas.xs(home_id, drop_level=False).to_parquet(f'{home_id}_raw_measurements.parquet', index=True, engine='pyarrow')

## Put properties in separate columns, apply types and write parquet file(s)

In [None]:
# unstacking takes the entire Twomes dataset uses 32 GB memory, so we have to do it home by home
del df_meas
gc.collect()

### Writing raw properties per home to a parquet file

In [None]:
%%time
df_prop = pd.DataFrame()

for home_id in tqdm(homes_all):
    df_prop_home = Measurements.to_properties(
        pd.read_parquet(f'{home_id}_raw_measurements.parquet', engine='pyarrow'),
        twomes_full_properties_types
    )
    df_prop_home.to_parquet(f'{home_id}_raw_properties.parquet', index=True, engine='pyarrow')
    df_prop = pd.concat([df_prop, df_prop_home])   

In [None]:
df_prop.info()

In [None]:
df_prop

### Writing raw properties to a parquet file

In [None]:
%%time 
df_prop.to_parquet('twomes_raw_properties.parquet', index=True, engine='pyarrow')