In [1]:
"""
This scripts merges the statistics of 2 sensors into 1
It does this for multiple sensors because i had added almost the same sensors via 2 different integrations

Script can also be used to recover statistics from corrupted database


Before executing the script, stop you homeassistant service:
ssh jan@192.168.1.25
sudo systemctl stop home-assistant@homeassistant.service 

Don't forget to start afterwards:
sudo systemctl start home-assistant@homeassistant.service 
"""

'\nThis scripts merges the statistics of 2 sensors into 1\nIt does this for multiple sensors because i had added almost the same sensors via 2 different integrations\n'

# Parameters to change

In [1]:
hass_ip="192.168.1.25" # change this to the ip of you home assistant installation

In [3]:
# mapping of the sensors, statistics of the left ones will be merged into the right ones
mapping={'sensor.nibe_93714_44270': 'sensor.calc_cooling_supply_s1_44270',
        'sensor.nibe_93714_40013': 'sensor.bt7_hw_top_40013',
        'sensor.nibe_93714_40015': 'sensor.eb100_ep14_bt10_brine_in_temp_40015',
        'sensor.nibe_93714_40004': 'sensor.bt1_outdoor_temperature_40004',
        'sensor.nibe_93714_40012': 'sensor.eb100_ep14_bt3_return_temp_40012',
        'sensor.nibe_93714_40018': 'sensor.eb100_ep14_bt14_hot_gas_temp_40018',
        'sensor.nibe_93714_40017': 'sensor.eb100_ep14_bt12_condensor_out_40017',
        'sensor.nibe_93714_40033': 'sensor.bt50_room_temp_s1_40033',
        'sensor.nibe_93714_40067': 'sensor.bt1_average_40067',
        'sensor.nibe_93714_40016': 'sensor.eb100_ep14_bt11_brine_out_temp_40016',
        'sensor.nibe_93714_40014': 'sensor.bt6_hw_load_40014',
        'sensor.nibe_93714_43009': 'sensor.calc_supply_s1_43009',
        'sensor.nibe_93714_40019': 'sensor.eb100_ep14_bt15_liquid_line_40019',
        'sensor.nibe_93714_40008': 'sensor.bt2_supply_temp_s1_40008',
        'sensor.nibe_93714_40022': 'sensor.eb100_ep14_bt17_suction_40022'}

In [None]:
target_db_path="home-assistant_v2.db"

#  in case you want to merge two sensors in the same db:
source_db_path = target_db_path 
# otherwise if there is really a seperate db used as source:
# source_db_path = "home-assistant_v2.db.corrupt.2023-03-12T03:12:13.457493+00:00" 


# Retrieve database from home assistant server

In [4]:
# add your public key to the homeassistant account to make scp possible
! scp homeassistant@{hass_ip}:/home/homeassistant/.homeassistant/home-assistant_v2.db .
! mkdir -p backup
! cp home-assistant_v2.db backup # make a backup

home-assistant_v2.db                          100%   97MB  11.6MB/s   00:08    


# Connect to DB

In [2]:
import pandas
import sqlite3
import pandas
pandas.options.plotting.backend = "plotly"

con_target = sqlite3.connect(target_db_path)
if source_db_path != target_db_path :
    con_source = sqlite3.connect(source_db_path)
else:
    con_source = con_target


In [3]:
# in case you want to merge all sensors from source_db into target_db
# mapping = {sens:sens for sens in pandas.read_sql("""select statistic_id from statistics_meta""", con_target)['statistic_id'].values}

# Merge

In [8]:
def merge(source_sensor, target_sensor, table='statistics'):
    global source_df, target_df
    print(f"source: {source_sensor}, target: {target_sensor}")
    # read data from target sensor
    target_sensor_id, has_sum=pandas.read_sql_query(f"""select id, has_sum FROM statistics_meta 
                                                where statistic_id like '{target_sensor}';""", con_target).loc[0,['id','has_sum']]
    target_df=pandas.read_sql_query(
            f"select * FROM {table} where metadata_id = '{target_sensor_id}';", con_target
        )
    print(f"length of existing statistics for target sensor: {len(target_df)}")

    min_dt = target_df["start"].min()

    # read data from source sensor
    source_sensor_id=pandas.read_sql_query(f"""select id FROM statistics_meta 
                                                where statistic_id like '{source_sensor}';""", con_source).loc[0,'id']
    source_df=pandas.read_sql_query(
            f"select * FROM {table} where metadata_id = '{source_sensor_id}' and start < '{min_dt}';", con_source
        )
    print(f"length of existing statistics for source sensor: {len(source_df)}")

    # or only look at sum and assume first record of target is same as last record of source
    if has_sum:
        # source_offset=source_df['sum'].max() #last record should be equal 
        source_offset=source_df.loc[source_df['start']==source_df['start'].max(),"sum"].max()
        source_df['sum']=source_df['sum']-source_offset       

    # concat new historical statistics with updated old statistics
    df = pandas.concat([source_df, target_df], ignore_index=True).sort_values("start")

    # set metadata_id to the one of the target sensor
    df["metadata_id"]=target_sensor_id

    # deduplicate timestamp in case of non-clean merge
    df=df.drop_duplicates(subset=['start'], keep='first')

    # set a temporary unique id
    max_id=pandas.read_sql_query(f"""select max(id) FROM {table};""", con_target).loc[0,'max(id)']
    df['id']=range(max_id+1,max_id+len(df)+1)

    # drop all existing rows with same metadata_id as the sensors
    stmnt = f"""DELETE FROM {table}
                WHERE metadata_id = {target_sensor_id};"""
    cur = con_target.cursor()
    cur.execute(stmnt)
    con_target.commit()

    # drop all existing rows with same metadata_id as the sensors
    stmnt = f"""DELETE FROM {table}
                WHERE metadata_id = {source_sensor_id};"""
    cur = con_source.cursor()
    cur.execute(stmnt)
    con_source.commit()

    # insert new data into table of target db
    df.to_sql(
        table, con_target, schema=None, if_exists="append", index=False,
    )
    return df


In [1]:
from IPython.display import clear_output
for source_sensor, target_sensor in mapping.items():
    for table in ("statistics", "statistics_short_term"):    
        print(table)
        test=merge(source_sensor, target_sensor, table)
        # fig=test[['state','sum']].plot()
        # fig=test[['mean']].plot()
        # fig.show()
        # input("Press to continue")
        # clear_output(wait=True)
    

# Reindex entire table

In [10]:
for table in ("statistics", "statistics_short_term"):   
    df=pandas.read_sql_query(
        f"select * FROM {table};", con_target
    )
    
    # reindex
    df['id']=range(1, len(df) + 1)

    # drop all existing rows with same metadata_id as the sensors
    stmnt = f"""DELETE FROM {table};"""
    cur = con_target.cursor()
    cur.execute(stmnt)
    con_target.commit()

    # insert new data into table
    df.to_sql(
        table, con_target, schema=None, if_exists="append", index=False,
    )

In [12]:
con_target.close()
con_source.close()

# Copy db back to home assistant server

In [13]:
! scp home-assistant_v2.db homeassistant@{hass_ip}:/home/homeassistant/.homeassistant/

home-assistant_v2.db                          100%  276MB  11.9MB/s   00:23    
