In [None]:
! pip install psycopg2
! pip install cachetools
! pip install pandas
! pip install seaborn

import psycopg2

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import heapq as hq

from cachetools import cached, TTLCache

In [None]:
device_id = "zero"
sns.set(rc={'figure.figsize':(15, 10)})
#plt.rcParams["figure.figsze"] = (15, 10)

In [None]:
def create_connection():
    dbname = 'postgres'
    user = 'postgres'
    password = 'postgres'
    host = '192.168.11.11'
    return psycopg2.connect(dbname=dbname, user=user, password=password, host=host)

In [None]:
@cached(cache=TTLCache(maxsize=32, ttl=60))
def fetch_scd30():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT ppm, received_at
				FROM sensor_data_scd30
				WHERE device_id = %s AND received_at > now() - interval '1 hour'
                ORDER BY received_at ASC""", (device_id,))  #AND received_at > now() - interval '1 day'
			return cursor.fetchall()

@cached(cache=TTLCache(maxsize=32, ttl=60))
def fetch_sgp40():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT voc, received_at
				FROM sensor_data_sgp40
				WHERE device_id = %s AND received_at > now() - interval '1 day'
                ORDER BY received_at ASC""", (device_id,))
			return cursor.fetchall()

In [None]:
data = fetch_scd30()

In [None]:
df = pd.DataFrame.from_records(data, columns=['ppm', 'receivedAt'])

In [None]:
df.ppm = df.ppm.astype(float)

In [None]:
df.plot.line(x='receivedAt', y='ppm')

In [None]:
def calculateWeights(data):
    y = data
    #y = (y - y.mean()) / y.std()
    x = np.arange(len(y))
    indeces = {0:0, len(y)-1:0}

    queue = []
    hq.heappush(queue, (0, (0, len(y)-1)))

    while queue:
        _, (left, right) = hq.heappop(queue)

        if right - left == 1:
            continue

        y_range = y[left:right + 1]
        x_range = x[left:right + 1]
        
        x1, y1, x2, y2 = x_range[0], y_range[0], x_range[-1], y_range[-1]
        a = (y2 - y1) / (x2 - x1)
        b = -x1 * (y2 - y1) / (x2 - x1) + y1
        y_hat = a*x_range + b
        diff = np.abs(y_range - y_hat)
        diff = diff[1:-1]

        i = np.argmax(diff)
        error = diff[i]
        i += left + 1

        indeces[i] = error
        hq.heappush(queue, (-error, (left, i)))
        hq.heappush(queue, (-error, (i, right)))

    indeces = dict(sorted(indeces.items(), key=lambda item: item[0]))
    return np.array([x for x in indeces.values()])

In [None]:
y = df.ppm.to_numpy()
weights = calculateWeights(y)

In [None]:
df['weight'] = weights
df.describe()

In [None]:
original = df.copy()
original['type'] = 'original'

approximation = df[df.weight > 1].copy()
approximation['type'] = 'approximation'

graph = pd.concat([original, approximation], ignore_index=True)

sns.lineplot(data=graph, x='receivedAt', y='ppm', hue='type')