# Twomes data extraction and backup

This JupyterLabs notebook can be used download raw data from a Twomes database (see also [more information how to setup a Twomes server](https://github.com/energietransitie/twomes-backoffice-configuration#jupyterlab)).

Don't forget to install the requirements listed in [requirements.txt](../requirements.txt) first!



## Setting the stage

First several imports and variables need to be defined


### Imports and generic settings

In [None]:
from datetime import datetime, timedelta
import pytz
import math
import pylab as plt

import pandas as pd
import numpy as np

import sys
sys.path.append('../data/')
sys.path.append('../view/')
sys.path.append('../analysis/')

%load_ext autoreload

%matplotlib widget
from plotter import Plot
from filewriter import ExcelWriter as ex

from measurements import Measurements
from tqdm.notebook import tqdm


import logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s %(levelname)-8s %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    filename='log.txt',
                   )

### Defining which homes, which period 

- which `homes` 
- what the location and timezone is of those homes (currently, we only support one location and timezone for a batch of homes) 
- from which `start_day` to which `end_day' 

In [None]:
#location: center of Assendorp neighbourhood in Zwolle
lat, lon = 52.50655, 6.09961

#timezone: 
timezone_database = 'UTC'
timezone_homes = 'Europe/Amsterdam'

# Below, the maximum period for data collection
first_day = pytz.timezone(timezone_homes).localize(datetime(2021, 10, 25))
last_day = pytz.timezone(timezone_homes).localize(datetime(2022, 5, 8))

# # Alternatively, you may want to test things only on a three week periode. This is a period with suitable weather and lots of homes with measurements.
# first_day = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 3))
# last_day = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 31))

# # The full set of homes
homes_all = [803422, 805164, 809743, 811308, 815925, 817341, 822479, 829947, 830088, 831062, 839440, 845966, 845997, 846697, 857477, 864296, 873985, 879481, 881611, 886307, 895671, 897349, 899510]

# A subset of homes
homes_3 = [803422, 805164, 809743]

# single home for virtual homes
homes_single = [886307]

# single home for gap assessment
# homes = [803422]

## Getting a single property of raw data

In [None]:
# extract a more minimal set of properties, as done in earlier queries
twomes_single_type_dict = {
    'roomTemp' : 'float32'
}


In [None]:
%%time 
%autoreload 2
df = Measurements.get_raw_homes_data(homes_single,
                                     first_day, last_day,
                                     twomes_single_type_dict,
                                     timezone_database, timezone_homes)


### Plotting a single property (e.g. roomTemp)

In [None]:
# This cell can be used to plot roomTemp

devices = list(df.index.unique(level='device_type'))
property_name = str(list(twomes_single_type_dict.keys())[0])

Plot.temperature_and_power_one_home_plot(f'{property_name} in {homes_single}',
                                df,
                                temp_plot_dict = {property_name: 'r'}
                               )

for col in list(df.index.unique(level='device_type')):
    Plot.temperature_and_power_one_home_plot(f'{col}:{property_name} in {homes_single}',
                                df.loc[homes_single,:,col],
                                temp_plot_dict = {property_name: 'r'}
                               )

## Getting more properties and writing to a parquet file

In [None]:
# extract the  full set of properties
twomes_full_type_dict = {
    'heartbeat' : 'Int16',
    'eMeterReadingReturnHigh' : 'float32',
    'eMeterReadingReturnLow' : 'float32',
    'eMeterReadingSupplyHigh' : 'float32', 
    'eMeterReadingSupplyLow' : 'float32',
    'eMeterReadingTimestamp' : 'str',
    'gMeterReadingSupply' : 'float32',
    'gMeterReadingTimestamp' : 'str',
    'listRSSI' : 'str',
    'boilerTemp1' : 'float32',
    'boilerTemp2' : 'float32',
    'roomTemp' : 'float32',
    'boilerSupplyTemp' : 'float32',
    'isBoilerFlameOn' : 'Int8',
    'isCentralHeatingModeOn' : 'Int8',
    'isDomesticHotWaterModeOn' : 'Int8',
    'maxModulationLevel' : 'Int8',
    'roomSetpointTemp' : 'float32',
    'minModulationLevel' : 'Int8',
    'boilerReturnTemp' : 'float32',
    'relativeModulationLevel' : 'Int8',
    'boilerMaxSupplyTemp' : 'float32',
    'CO2concentration' : 'Int16',
    'relativeHumidity' : 'float32',
    'roomTemp2' : 'float32'}


In [None]:
# extract a more minimal set of properties, as done in earlier queries
twomes_limited_type_dict = {
    'eMeterReadingReturnHigh' : 'float32',
    'eMeterReadingReturnLow' : 'float32',
    'eMeterReadingSupplyHigh' : 'float32', 
    'eMeterReadingSupplyLow' : 'float32',
    'eMeterReadingTimestamp' : 'str',
    'gMeterReadingSupply' : 'float32',
    'gMeterReadingTimestamp' : 'str',
    'roomTemp' : 'float32',
    'roomSetpointTemp' : 'float32'
}


In [None]:
%%time 
df_data_homes = Measurements.get_raw_homes_data(homes_all,
                                                first_day, last_day,
                                                twomes_limited_type_dict,
                                                timezone_database, timezone_homes)



In [None]:
%%time 
df_data_homes.to_parquet('homes_all_twomes_limited_type_dict.parquet', index=True, engine='pyarrow')

In [None]:
%%time 
df_data_homes = Measurements.get_raw_homes_data(homes_all,
                                                first_day, last_day,
                                                twomes_full_type_dict,
                                                timezone_database, timezone_homes)



In [None]:
df_data_homes.info()

In [None]:
%%time 
df_data_homes.to_parquet('homes_all_twomes_full_type_dict.parquet', index=True, engine='pyarrow')

In [None]:
df_data_homes

In [None]:
df_data_homes.describe()

### Write individual data for individual homes to parquet files

In [None]:
%%time 
for home_id in tqdm(homes_all):
    filename = f'{home_id}-rawdata_{first_day.isoformat()}-{(last_day+timedelta(days=1)+ timedelta(hours=1)).isoformat()}.parquet'
    df_data_homes.loc[home_id].to_parquet(filename, index=True, engine='pyarrow')


### Write raw data to a CSV file

In [None]:
%%time 
%autoreload 2
df = Measurements.get_raw_measurements(homes_all,
                                     first_day, last_day,
                                     twomes_full_type_dict,
                                     timezone_database, timezone_homes)


In [None]:
%%time 
for home_id in tqdm(list(df.index.unique(level='home_id'))):
    filename = f'{home_id}-rawdata_{first_day.isoformat()}-{(last_day+timedelta(days=1)+ timedelta(hours=1)).isoformat()}.csv'
    df_write = df.copy(deep=True).reset_index([0,2,3])
    df_write['unix_time'] = df_write.index.map(pd.Timestamp.timestamp).astype(int)
    # df_write['unix_time'] = df_write['unix_time'].astype(int)
    df_write = (df_write
                .sort_values('unix_time')
                .reset_index(drop=True)
                [['home_id', 'unix_time', 'device_type','property', 'value', 'unit']])
    df_write.index.name = '#'
    df_write.to_csv(filename)
    del(df_write)
