In [None]:
! pip install psycopg2
! pip install cachetools
! pip install pandas
! pip install seaborn

import psycopg2
import psycopg2.extras

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import heapq as hq

from cachetools import cached, TTLCache

import time

In [None]:
sns.set(rc={'figure.figsize':(15, 10)})

In [None]:
def create_connection():
    dbname = 'postgres'
    user = 'postgres'
    password = 'postgres'
    host = '192.168.11.2'
    return psycopg2.connect(dbname=dbname, user=user, password=password, host=host)

In [None]:
def fetch_devices(connection, sensor):
	with connection.cursor() as cursor:
		cursor.execute(f"""SELECT DISTINCT device_id FROM sensor_data_{sensor}""")
		return [row[0] for row in cursor.fetchall()]

def fetch(connection, sensor, signal, device_id, include_timestamp=False):
	with connection.cursor() as cursor:
		sql = f"""SELECT id, {signal} as signal {', received_at ' if include_timestamp else ''} 
			FROM sensor_data_{sensor}
			WHERE device_id = %s AND received_at > now() - interval '1 day'
			ORDER BY received_at ASC"""
		cursor.execute(sql, (device_id,))
		
		columns = ['id', 'signal']
		if include_timestamp:
			columns.append('received_at')
		return pd.DataFrame.from_records(cursor.fetchall(), index=['id'], columns=columns)

def fetch_scd30_ppm():
	with create_connection() as connection:
		return fetch(connection, 'scd30', 'ppm', 'zero', include_timestamp=True)

def fetch_sgp40_voc():
	with create_connection() as connection:
		return fetch(connection, 'sgp40', 'voc', 'zero', include_timestamp=True)

In [None]:
def remove_old_data(connection, sensor):
	with connection.cursor() as cursor:
		cursor.execute(f"""DELETE FROM sensor_data_{sensor}
		WHERE received_at < now() - interval '1 day'""")

def truncate_weights(connection, sensor, signal):
	with connection.cursor() as cursor:
		cursor.execute(f"""TRUNCATE weights_{sensor}_{signal}""")

def update_weight(connection, sensor, signal, series):
	with connection.cursor() as cursor:
		sql = f"""INSERT INTO weights_{sensor}_{signal} (id, weight) VALUES %s"""
		data = [(id, weight) for id, weight in series.items()]
		psycopg2.extras.execute_values(cursor, sql, data)

In [None]:
def calculate_weights(data, ratio = 1):
    y = data
    y = (y - y.mean()) / y.std()
    x = np.arange(len(y))
    indeces = {0:0, len(y)-1:0}

    processed = 2
    limit = max(10, int(len(data) * ratio))

    queue = []
    hq.heappush(queue, (0, (0, len(y)-1)))

    while queue and processed < limit:
        _, (left, right) = hq.heappop(queue)

        if right - left == 1:
            continue

        y_range = y[left:right + 1]
        x_range = x[left:right + 1]
        
        x1, y1, x2, y2 = x_range[0], y_range[0], x_range[-1], y_range[-1]
        a = (y2 - y1) / (x2 - x1)
        b = -x1 * (y2 - y1) / (x2 - x1) + y1
        y_hat = a*x_range + b
        diff = np.abs(y_range - y_hat)
        diff = diff[1:-1]

        i = np.argmax(diff)
        error = diff[i]
        i += left + 1

        indeces[i] = error
        hq.heappush(queue, (-error, (left, i)))
        hq.heappush(queue, (-error, (i, right)))
        processed += 1 

    indeces = dict(sorted(indeces.items(), key=lambda item: item[0]))
    return np.array([indeces[x] if x in indeces.keys() else 0 for x in x])

In [None]:
def calculate_weights_for_series(series, ratio=0.1):
    data = series.to_numpy()
    start = time.time()
    weight = calculate_weights(data, ratio)
    end = time.time()
    print("weight calculation took ", end-start)
    
    weight = (weight - weight.min()) / weight.ptp()
    weight[0] = 1
    weight[-1] = 1

    return pd.Series(index=series.index, data=weight)

In [None]:
def process_weights():
    sensors = {'scd30': ['ppm', 'temperature', 'humidity'],'sgp40': ['voc']}
    with create_connection() as connection:
        for sensor, signals in sensors.items():
            remove_old_data(connection, sensor)
            devices = fetch_devices(connection, sensor)
            for signal in signals:
                truncate_weights(connection, sensor, signal)
                for device in devices:
                    df = fetch(connection, sensor, signal, device)
                    print(sensor, signal, device, len(df))
                    df.signal = df.signal.astype(float)
                    
                    weights = calculate_weights_for_series(df.signal)
                    weights = weights[weights > 0]
                    update_weight(connection, sensor, signal, weights)
        connection.commit()

In [None]:
original = fetch_scd30_ppm()
original.signal = original.signal.astype(float)
original['type'] = 'original'

local = original.copy()
weights = calculate_weights_for_series(local.signal, ratio=0.05)
local['weight'] = weights
local = local[local.weight > 0]
local['type'] = 'smart'
print(len(local.index), len(original.index))

graph = pd.concat([original, local], ignore_index=True)

sns.lineplot(data=graph, x='received_at', y='signal', hue='type')

In [None]:
sns.histplot(data=local, x="weight")

In [None]:
process_weights()