In [None]:
! pip install psycopg2
! pip install cachetools
! pip install pandas
! pip install seaborn

import psycopg2

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import heapq as hq

from cachetools import cached, TTLCache

In [None]:
device_id = "zero"
sns.set(rc={'figure.figsize':(15, 10)})
#plt.rcParams["figure.figsze"] = (15, 10)

In [None]:
def create_connection():
    dbname = 'postgres'
    user = 'postgres'
    password = 'postgres'
    host = '192.168.11.11'
    return psycopg2.connect(dbname=dbname, user=user, password=password, host=host)

In [None]:
@cached(cache=TTLCache(maxsize=32, ttl=60))
def fetch_scd30():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT ppm, received_at
				FROM sensor_data_scd30
				WHERE device_id = %s AND received_at > now() - interval '1 day'
                ORDER BY received_at ASC""", (device_id,))  #AND received_at > now() - interval '1 day'
			return cursor.fetchall()

@cached(cache=TTLCache(maxsize=32, ttl=60))
def fetch_sgp40():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT voc, received_at
				FROM sensor_data_sgp40
				WHERE device_id = %s AND received_at > now() - interval '1 day'
                ORDER BY received_at ASC""", (device_id,))
			return cursor.fetchall()

In [None]:
def fetch_scd30_batch():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT id, ppm, temperature, humidity
							FROM sensor_data_scd30
							WHERE device_id = %s AND id >= COALESCE(
								(SELECT id
								FROM sensor_data_scd30
								WHERE device_id = %s AND weight IS NOT NULL AND weight != 0
								ORDER BY id DESC
								LIMIT 1), 0)
							ORDER BY id ASC
							LIMIT 1000""", (device_id,device_id,))
			return cursor.fetchall()

In [None]:
def fetch_scd30_smart():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT ppm, received_at
				FROM sensor_data_scd30
				WHERE id IN (SELECT id FROM sensor_data_scd30 WHERE device_id = %s AND received_at > now() - interval '1 day' AND weight IS NOT NULL ORDER BY weight DESC LIMIT 100) 
                ORDER BY received_at ASC""", (device_id,))
			return cursor.fetchall()

In [None]:
def update_scd30_weight(series):
    with create_connection() as connection:
        connection = create_connection()
        with connection.cursor() as cursor:
            sql = """UPDATE sensor_data_scd30
            SET weight = %s
            WHERE id = %s AND (weight IS NULL OR weight < %s)
            """
            for id, weight in series.iteritems():
                cursor.execute(sql, (weight, id, weight))
            connection.commit()

In [None]:
def calculate_weights_for_dataframe(df, columns):
    weights = []
    for column in columns:
        data = df[column].to_numpy()
        weight = calculateWeights(data)
        weights.append(weight)
    weight = np.stack(weights, axis=1).max(axis=1)
    df['weight'] = weight

In [None]:
def process_weights():
    while True:
        data = fetch_scd30_batch()
        df = pd.DataFrame.from_records(data, index=['id'], columns=['id', 'ppm', 'temperature', 'humidity'])
        df.ppm = df.ppm.astype(float)
        df.temperature = df.temperature.astype(float)
        df.humidity = df.humidity.astype(float)

        if len(df.index) < 100:
            break

        calculate_weights_for_dataframe(df, ['ppm', 'temperature', 'humidity'])

        update_scd30_weight(df.weight)

In [None]:
process_weights()

In [None]:
def calculateWeights(data):
    y = data
    y = (y - y.mean()) / y.std()
    x = np.arange(len(y))
    indeces = {0:0, len(y)-1:0}

    queue = []
    hq.heappush(queue, (0, (0, len(y)-1)))

    while queue:
        _, (left, right) = hq.heappop(queue)

        if right - left == 1:
            continue

        y_range = y[left:right + 1]
        x_range = x[left:right + 1]
        
        x1, y1, x2, y2 = x_range[0], y_range[0], x_range[-1], y_range[-1]
        a = (y2 - y1) / (x2 - x1)
        b = -x1 * (y2 - y1) / (x2 - x1) + y1
        y_hat = a*x_range + b
        diff = np.abs(y_range - y_hat)
        diff = diff[1:-1]

        i = np.argmax(diff)
        error = diff[i]
        i += left + 1

        indeces[i] = error
        hq.heappush(queue, (-error, (left, i)))
        hq.heappush(queue, (-error, (i, right)))

    indeces = dict(sorted(indeces.items(), key=lambda item: item[0]))
    return np.array([x for x in indeces.values()])

In [None]:
original = fetch_scd30()
original = pd.DataFrame.from_records(original, columns=['ppm', 'receivedAt'])
original.ppm = original.ppm.astype(float)
original['type'] = 'original'

approximation = pd.DataFrame.from_records(fetch_scd30_smart(), columns=['ppm', 'receivedAt'])
approximation.ppm = approximation.ppm.astype(float)
approximation['type'] = 'approximation'

local = original.copy()
calculate_weights_for_dataframe(local, ['ppm'])
local = local[local.weight > 0.1]
print(len(local.index))

graph = pd.concat([original, approximation, local], ignore_index=True)

sns.lineplot(data=graph, x='receivedAt', y='ppm', hue='type')