In [47]:
import os
from dateutil.tz import gettz
import pandas as pd

SRC_HOUSE_DATASET_PATH = os.path.abspath("..\\shared_storage\\datasets\\src\\apartment_house\\1.db")
PREPROCESSED_HOUSE_DATASET_PATH = os.path.abspath("..\\shared_storage\\datasets\\preprocessed\\apartment_house_heating_circuit\\1.pickle")

SRC_BOILER_DATASET_PATH = os.path.abspath("..\\shared_storage\\datasets\\src\\boilers\\144.db")

start_timestamp = pd.Timestamp(2021,9,4,0,0,0, tz=gettz("Asia/Yekaterinburg"))
end_timestamp = pd.Timestamp(2022,5,1,0,0,0, tz=gettz("Asia/Yekaterinburg"))

time_tick = pd.Timedelta(minutes=3)

In [56]:
from boiler.constants import column_names
from boiler.data_processing.timestamp_parsing_algorithm import SimpleTimestampParsingAlgorithm

import sqlite3


def parse_from_conn(con):
    df = pd.read_sql("SELECT * FROM meter_measurements", con)
    df = df[df["service"]=="OV"] 
    df = df[["d_timestamp", "t1"]]
    
    df["t1"] = df["t1"].apply(float)
    timestamp_parser = SimpleTimestampParsingAlgorithm(
        datetime_patterns=(
            r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})\s(?P<hours>\d{2}):(?P<minutes>\d{2}).{7}",
            r"(?P<day>\d{2})\.(?P<month>\d{2})\.(?P<year>\d{4})\s(?P<hours>\d{1,2}):(?P<minutes>\d{2})"
        ), 
        timezone=gettz("Asia/Yekaterinburg")
    )
    df["d_timestamp"] = df["d_timestamp"].apply(timestamp_parser.parse_datetime)
    
    df = df.rename(columns={"t1": column_names.FORWARD_TEMP, "d_timestamp": column_names.TIMESTAMP})

    return df

In [57]:
from boiler.data_processing.beetween_filter_algorithm import LeftClosedTimestampFilterAlgorithm
from boiler.data_processing.timestamp_interpolator_algorithm import TimestampInterpolationAlgorithm
from boiler.data_processing.timestamp_round_algorithm import NearestTimestampRoundAlgorithm
from boiler.data_processing.value_interpolation_algorithm import \
    LinearInsideValueInterpolationAlgorithm, \
    LinearOutsideValueInterpolationAlgorithm

timestamp_filter = LeftClosedTimestampFilterAlgorithm()
timestamp_rounder = NearestTimestampRoundAlgorithm(time_tick)
timestamp_interpolator = TimestampInterpolationAlgorithm(timestamp_rounder, time_tick)
inside_interpolator = LinearInsideValueInterpolationAlgorithm()
outside_interpolator = LinearOutsideValueInterpolationAlgorithm()

In [58]:
con = sqlite3.connect(SRC_HOUSE_DATASET_PATH)
house_df = parse_from_conn(con)

house_df = timestamp_filter.filter_df_by_min_max_timestamp(house_df, start_timestamp, end_timestamp)
house_df[column_names.TIMESTAMP] = timestamp_rounder.round_series(house_df[column_names.TIMESTAMP])
house_df = house_df.drop_duplicates(subset=[column_names.TIMESTAMP])
house_df = timestamp_interpolator.process_df(house_df, start_timestamp, end_timestamp)
house_df[column_names.FORWARD_TEMP] = outside_interpolator.interpolate_series(house_df[column_names.FORWARD_TEMP])
house_df[column_names.FORWARD_TEMP] = inside_interpolator.interpolate_series(house_df[column_names.FORWARD_TEMP])

house_df.head()

Unnamed: 0,timestamp,forward_temp
0,2021-09-04 00:00:00+05:00,24.84
1,2021-09-04 00:03:00+05:00,24.82
2,2021-09-04 00:06:00+05:00,24.84
3,2021-09-04 00:09:00+05:00,24.83
4,2021-09-04 00:12:00+05:00,24.82


In [61]:
con = sqlite3.connect(SRC_BOILER_DATASET_PATH)
boiler_df = parse_from_conn(con)

boiler_df = timestamp_filter.filter_df_by_min_max_timestamp(boiler_df, start_timestamp, end_timestamp)
boiler_df[column_names.TIMESTAMP] = timestamp_rounder.round_series(boiler_df[column_names.TIMESTAMP])
boiler_df = boiler_df.drop_duplicates(subset=[column_names.TIMESTAMP])
boiler_df = timestamp_interpolator.process_df(boiler_df, start_timestamp, end_timestamp)
boiler_df[column_names.FORWARD_TEMP] = outside_interpolator.interpolate_series(boiler_df[column_names.FORWARD_TEMP])
boiler_df[column_names.FORWARD_TEMP] = inside_interpolator.interpolate_series(boiler_df[column_names.FORWARD_TEMP])

boiler_df.head()

Unnamed: 0,timestamp,forward_temp
0,2021-09-04 00:00:00+05:00,68.300003
1,2021-09-04 00:03:00+05:00,68.290001
2,2021-09-04 00:06:00+05:00,68.089996
3,2021-09-04 00:09:00+05:00,68.040001
4,2021-09-04 00:12:00+05:00,68.349998


In [85]:
from boiler.timedelta.calculators.algo.std_var_time_delta_calculation_algorithm import StdVarTimeDeltaCalculationAlgorithm
from matplotlib import pyplot as plt

boiler_temp = boiler_df[column_names.FORWARD_TEMP].to_numpy()
house_temp = house_df[column_names.FORWARD_TEMP].to_numpy()

calculator = StdVarTimeDeltaCalculationAlgorithm()
lag = calculator.find_lag(boiler_temp, house_temp)
print(lag)

moved_boiler_temp = boiler_temp[:-lag]
moved_house_temp = house_temp[lag:]

9


In [None]:
train_count = 14400
test_count = 14400
