In [None]:
from datetime import datetime, timedelta
import pytz
import math
import pylab as plt

import pandas as pd
import numpy as np

import sys
sys.path.append('../data/')
sys.path.append('../view/')
sys.path.append('../analysis/')

%load_ext autoreload

%matplotlib widget
from plotter import Plot
from filewriter import ExcelWriter as ex

from extractor import WeatherExtractor, Extractor, Period

from inversegreyboxmodel import Learner

n_std_outliers = 3.0 # default for the multiplier of the the standard deviation; further out than this times the std, outliers are removed during preprocessing
up_intv = '5min' # the default upsampling interval that is used before interpolation is done
gap_n_intv = 11 # the default maximum number of consecutive NaNs to fill(one for each upsampling interval), i.e. valid measurement values (11+1)* 5 min = 1 hour apart apart will be bridget by interpolation, but not more
sampling_interval = '15min' # the default interval on which interpolation will be done during preprocessing
moving_horizon_duration_d = 7

#location: center of Assendorp neighbourhood in Zwolle
lat, lon = 52.5065500000, 6.0996100000

#timezone: 
timezone_homes = 'Europe/Amsterdam'

# TODO: get list of pseudonyms, (valid) dates and reference parameters from Excel file?  
# utimately: #min, max dates of the analysis
start = pytz.timezone(timezone_homes).localize(datetime(2021, 10, 21))
end = pytz.timezone(timezone_homes).localize(datetime(2022, 5, 12))
homes = [815925, 817341, 857477, 873985, 879481, 895671, 864296, 845997, 881611, 822479, 831062, 830088, 811308, 809743, 829947, 803422, 886307, 846697, 845966, 839440, 805164, 899510, 897349]
reversedhomes = [803422, 805164, 809743, 811308, 815925, 817341, 822479, 829947, 830088, 831062, 839440, 845966, 845997, 846697, 857477, 864296, 873985, 879481, 881611, 886307, 895671, 897349, 899510]

In [None]:
#check whether the datetime object is properly timezone-aware
start

In [None]:
#check whether the datetime object is properly timezone-aware
end

In [None]:
%%time 
%autoreload 2
# get geospatially interpolated weather from KNMI
# for Twomes, the Weather for all all homes studies can be approached by a single location
# get the dataframe only once for all homes to save time

df_weather = WeatherExtractor.get_interpolated_weather_nl(start, end, lat, lon, timezone_homes, sampling_interval)

In [None]:
df_weather

In [None]:
%%time 
# get interpolated data from the Twomes database and combine with weather data already obtained


# Optionally: oveerride homes and dates for which to get data
homes_to_extract =  [803422, 805164, 817341, 886307, 873985]
# homes_to_extract =  homes

start_extraction = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 3))
end_extraction = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 6))
# start_extraction = start
# end_extraction = end

df_data_homes = Extractor.get_preprocessed_homes_data(homes_to_extract, start_extraction, end_extraction, n_std_outliers, up_intv, gap_n_intv, sampling_interval, timezone_homes, df_weather)

In [None]:
# see what the extracted and interpolated data looks like
df_data_homes

In [None]:
%%time 
%autoreload 2

filename_prefix = datetime.now().astimezone(pytz.timezone('Europe/Amsterdam')).replace(microsecond=0).isoformat().replace(":","")
ex.write(df_data_homes, str('{0}-data_homes-{1}-{2}.xlsx'.format(filename_prefix, start.isoformat(),end.isoformat())))

In [None]:
# plot temperature data

Plot.weather_and_other_temperatures('Weather in Assendorp, Zwolle', df_weather)

# N.B. The resulting figure below can be manipulated interactively; hover with mouse for tips & tricks

In [None]:
# how to select data from a single home
# df_data_homes[df_data_homes['homepseudonym'] == 817341]

In [None]:
%%time 
%autoreload 2

# homes with most data
# homes_to_analyse = [886307, 873985, 817341]
# start_analysis_period = pytz.timezone(timezone_homes).localize(datetime(2021, 12, 20))
# end_analysis_period = pytz.timezone(timezone_homes).localize(datetime(2022, 1, 18))
# date and times of seemingly valid periods for short set of 3 
# 886307	2021-12-19 18:55	2022-03-17 09:37
# 873985	2021-12-20 12:54	2022-03-17 17:54
# 817341	2022-01-14 20:27	2022-03-17 18:15
    
# homes_to_analyze = [817341]
homes_to_analyse = homes_to_extract
# homes_to_analyse = homes

# use comment/ncomment lines below if you want the default values for full analysis
start_analysis_period = start_extraction
end_analysis_period = end_extraction
# start_analysis_period = start
# end_analysis_period = end

# Use one of the lines below to set the moving horizon duration used for analysis 
moving_horizon_duration_d_analysis = 2
# moving_horizon_duration_d_analysis = moving_horizon_duration_d

# learn the model parameters and write rerults an intermediate results to excel files
df_results = Learner.learn_home_parameter_moving_horizon(df_data_homes, 
                                                         n_std_outliers, up_intv, gap_n_intv, sampling_interval, 
                                                         moving_horizon_duration_d_analysis,
                                                         homes_to_analyze,
                                                         start_analysis_period, end_analysis_period, 
                                                         showdetails=False, A_m2=6.0)

In [None]:
#show the results
df_results

In [None]:
# plot temperature data of multiple homes from an array 
# %autoreload 2
# for home_id in homes:
#     df_data_one_home = df_data_homes[df_data_homes['homepseudonym'] == home_id]
#     Plot.weather_and_other_temperatures(home_id, df_data_one_home, [('indoor_temp_degC','r'),('indoor_setpoint_temp_degC','g')])

# # N.B. The resulting figure below can be manipulated interactively; hover with mouse for tips & tricks


In [None]:
#plot a series of weeks for all homes
# plt.ioff()

# for home_id in homes:
#     df_data_one_home = df_data_homes[df_data_homes['homepseudonym'] == home_id]
#     for moving_horizon_start in pd.date_range(start=start, end=end, inclusive='left', freq='7D'):
#         moving_horizon_end = min(end, moving_horizon_start + timedelta(days=7))
#         df_moving_horizon = df_data_one_home[moving_horizon_start:moving_horizon_end]
#         Plot.weather_and_other_temperatures(home_id, df_moving_horizon, ['indoor_temp_degC'])

# plt.ion()  
    