In [None]:
"""
this script injects the data obtained via csv download on the fluvius website into 
the home assistant database statistics
"""

### Power sensor

In [None]:
import pandas
import datetime
LOCAL_TIMEZONE = "Europe/Brussels" 

path_flub = "/home/jan/code_projects/huis-monitoring/data/Elektriciteit-541448860018673818-20201018-20210418-Kwartiertotalen.csv"


In [46]:
# get export/import from fluvius -> all meter stuff could come from fluvius
df_orig_kwartier = pandas.read_csv(path_flub, delimiter=";",)
df_orig_kwartier = df_orig_kwartier.assign(
    **{
        "datetime": df_orig_kwartier["Tot Datum"]
        + "T"
        + df_orig_kwartier["Tot Tijdstip"]
    }
)

df_orig_kwartier["Volume"] = (
    df_orig_kwartier["Volume"].str.replace(pat=",", repl=".").astype(float).fillna(0)
)

df_new = pandas.DataFrame()
for col in ["Afname", "Injectie"]:
    df_new[col] = df_orig_kwartier.loc[
        df_orig_kwartier["Register"].str.contains(col), "Volume"
    ].to_list()
df_new["datetime"] = df_orig_kwartier.loc[
    df_orig_kwartier["Register"].str.contains(col), "datetime"
].to_list()
df_new["datetime"] = pandas.to_datetime(
    df_new["datetime"], format="%d/%m/%YT%H:%M:%S"
).dt.tz_localize(LOCAL_TIMEZONE, ambiguous="infer").dt.tz_convert('UTC')
df_new



Unnamed: 0,Afname,Injectie,datetime
0,0.000,0.0,2020-10-17 22:15:00+00:00
1,0.072,0.0,2020-10-17 22:30:00+00:00
2,0.043,0.0,2020-10-17 22:45:00+00:00
3,0.031,0.0,2020-10-17 23:00:00+00:00
4,0.045,0.0,2020-10-17 23:15:00+00:00
...,...,...,...
17467,0.001,0.0,2021-04-17 21:00:00+00:00
17468,0.001,0.0,2021-04-17 21:15:00+00:00
17469,0.000,0.0,2021-04-17 21:30:00+00:00
17470,0.000,0.0,2021-04-17 21:45:00+00:00


In [47]:
df_new=df_new.rename(
    columns={
        # "Afname": "Negative active energy",
        # "Injectie": "Positive active energy",
        "datetime": "start",
    }
)
df_new['start']=df_new['start'].dt.tz_convert(None)


In [49]:
df_dic={}
df_dic["Power Sensor"]=df_new

In [50]:
# make sure that last value is 0 (same as first value of power sensor)
df_dic["Power Sensor"]['Negative active energy']=df_dic["Power Sensor"]['Afname'].cumsum() #-df_new['Afname'].sum()
df_dic["Power Sensor"]['Positive active energy']=df_dic["Power Sensor"]['Injectie'].cumsum() #-df_new['Injectie'].sum()
df_dic["Power Sensor"]

Unnamed: 0,Afname,Injectie,start,Negative active energy,Positive active energy
0,0.000,0.0,2020-10-17 22:15:00,0.000,0.000
1,0.072,0.0,2020-10-17 22:30:00,0.072,0.000
2,0.043,0.0,2020-10-17 22:45:00,0.115,0.000
3,0.031,0.0,2020-10-17 23:00:00,0.146,0.000
4,0.045,0.0,2020-10-17 23:15:00,0.191,0.000
...,...,...,...,...,...
180106,0.000,0.0,2022-10-22 21:35:00,2458.979,15278.582
180107,0.000,0.0,2022-10-22 21:40:00,2458.979,15278.582
180108,0.000,0.0,2022-10-22 21:45:00,2458.979,15278.582
180109,0.000,0.0,2022-10-22 21:50:00,2458.979,15278.582


### Inject into HASS statistics


In [51]:
# read from sql database
path = "home-assistant_v2.db"
import pandas
import sqlite3

con = sqlite3.connect(path)


In [52]:
from dataclasses import dataclass
# these are the sensors that i want to backfill the statistics for

@dataclass
class Sensor:
    name: str
    id: int
    columns: list
    originator: str
    offset: float = 0
    daily_reset: bool = False


sensors = [
    Sensor(
        "sensor.power_meter_exported",
        id=None,
        columns=["Positive active energy"],
        originator="Power Sensor",
    ),
    Sensor(
        "sensor.power_meter_consumption",
        id=None,
        columns=["Negative active energy"],
        originator="Power Sensor",
    ),
]

In [53]:
for sensor in sensors:
    sensor.id=pandas.read_sql_query(f"select id FROM statistics_meta where statistic_id = '{sensor.name}';", con).loc[0,'id']
    # print(sensor.name,sensor.id)

In [54]:
def prepare_drop_insert(sensor, table):
    df = df_dic[sensor.originator].copy(deep=True)

    # state can be the sum of multiple columns
    df["state"] = 0
    for col in sensor.columns:
        df["state"] = df["state"] + df[col]

    # filter on time conditions
    if table == "statistics":
        df = df.loc[df["start"].dt.minute == 0, ["start", "state"]]
    else:
        df = df.loc[
            df["start"] > (datetime.datetime.now() - datetime.timedelta(days=30)),
            ["start", "state"],
        ]

    # read what is currently in the db
    existing_df = pandas.read_sql_query(f"select * FROM {table};", con)
    next_row_index = existing_df["id"].max() + 1

    # find offset
    oldstat_df = pandas.read_sql_query(
        f"select * FROM statistics where metadata_id = '{sensor.id}';", con
    )
    oldstat_df["start"] = pandas.to_datetime(oldstat_df["start"])

    common_dt = sorted(set(oldstat_df["start"]) & set(df["start"]))[-1]
    old_val = oldstat_df.loc[oldstat_df["start"] == common_dt, "state"].iloc[0]
    new_val = df.loc[df["start"] == common_dt, "state"].iloc[0]
    print(common_dt, old_val, new_val)

    # find most recent data
    recent_old_stat_df = existing_df[
        (existing_df["metadata_id"] == sensor.id)
        & (pandas.to_datetime(existing_df["start"]) > df["start"].max())
    ].copy(deep=True)
    # correct sum of most recent data
    recent_old_stat_df["sum"] = recent_old_stat_df["state"] + new_val - old_val

    # prepare the data, that we want to insert, into the correct table format
    df["metadata_id"] = sensor.id
    # id needs to be unique, so for the values that i will be inserting,
    #  i will start from the max id that is currently in the table
    df["id"] = range(next_row_index, next_row_index + len(df))
    # hass works with UTC internally, so all timestamps need to be converted to UTC
    df["created"] = pandas.Timestamp.utcnow().tz_convert(tz=None)
    # state is the value that the sensor returns,
    # while sum is the sum up to now, in my case the state was 0 at the start
    # so the sum is the same except for solar_energy_riemann
    # df["sum"] = df["state"] - sensor.offset
    df["sum"] = df["state"]
    df["state"] = df["sum"] + (old_val - new_val)

    # concat new historical statistics with updated old statistics
    df = pandas.concat([df, recent_old_stat_df], ignore_index=True)

    df["start"] = pandas.to_datetime(df["start"])
    df["created"] = pandas.to_datetime(df["created"])
    df = df.sort_values("start")

    # drop all existing rows with same metadata_id from the sensor
    # because i will recreate all statistics for that sensor
    # instead of updating the sum value which one would do if you would like
    # to keep the existing statistics
    stmnt = f"""DELETE FROM {table}
                WHERE metadata_id={sensor.id};"""
    cur = con.cursor()
    cur.execute(stmnt)
    con.commit()

    # insert new data into table
    df.to_sql(
        table, con, schema=None, if_exists="append", index=False,
    )
    return df


In [55]:
for sensor in sensors:
    print(sensor.name)
    for table in ("statistics", "statistics_short_term"):
        print(table)
        test=prepare_drop_insert(sensor, table)
        # break
    # break

sensor.power_meter_exported
statistics
2022-10-22 21:00:00 13637.8 15278.582000000308
statistics_short_term
2022-10-22 21:00:00 13637.8 15278.582000000308
sensor.power_meter_consumption
statistics
2022-10-22 21:00:00 1356.23 2458.9790000001194
statistics_short_term
2022-10-22 21:00:00 1356.23 2458.9790000001194
sensor.solar_energy_riemann
statistics
2022-10-22 21:00:00 1495.9113 21831.75099999923
statistics_short_term
2022-10-22 21:00:00 1495.9112999999998 21831.75099999923
sensor.battery_total_discharge
statistics
2022-10-22 21:00:00 2490.79 1883.4625000000112
statistics_short_term
2022-10-22 21:00:00 2490.79 1883.4625000000112
sensor.battery_total_charge
statistics
2022-10-22 21:00:00 2663.07 2877.8425000005905
statistics_short_term
2022-10-22 21:00:00 2663.07 2877.8425000005905
