In [None]:
! pip install psycopg2
! pip install sklearn

import psycopg2

import numpy as np

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression



In [None]:
device_id = "iskra"

In [None]:
def create_connection():
    dbname = 'postgres'
    user = 'postgres'
    password = 'postgres'
    host = '192.168.11.17'
    return psycopg2.connect(dbname=dbname, user=user, password=password, host=host)

In [None]:
def fetch_aggregated_calibration_data(ignore_outliers = True):
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT temperature, humidity, AVG(r0) as r0 
				FROM sensor_calibration_data
				WHERE device_id = %s {'AND is_outlier = false ' if ignore_outliers else ''}
				GROUP BY temperature, humidity """, (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

In [None]:
def fetch_calibration_data(ignore_outliers = True):
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"""SELECT temperature, humidity, r0 
				FROM sensor_calibration_data 
				WHERE device_id = %s {'AND is_outlier = false' if ignore_outliers else ''} """, (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

In [None]:
def invalidate_out_of_bounds_values():
    with create_connection() as connection:
        connection = create_connection()
        with connection.cursor() as cursor:
            sqlCommand = """
            UPDATE sensor_calibration_data
            SET is_outlier = true
            WHERE device_id = %s AND
            ( temperature < -40 OR temperature > 80 OR humidity < 0 OR humidity > 100 OR uptime < '20 minutes' )
            """
            cursor.execute(sqlCommand, (device_id,))
            connection.commit()

def invalidate_ids(ids):
    with create_connection() as connection:
        connection = create_connection()
        with connection.cursor() as cursor:
            sqlCommand = """
            UPDATE sensor_calibration_data
            SET is_outlier = true
            WHERE device_id = %s AND id IN %s
            """
            cursor.execute(sqlCommand, (device_id, tuple(ids)))
            connection.commit()

def invalidate_loners():
    with create_connection() as connection:
        connection = create_connection()
        with connection.cursor() as cursor:
            sqlCommand = """
            WITH data AS (
                SELECT row_number() OVER () row_id, id, received_at
                FROM sensor_calibration_data
                WHERE device_id = %s
                ORDER BY received_at
            ),
            tails AS (
                SELECT n.id
                FROM data n
                LEFT JOIN data p ON (n.row_id = p.row_id + 1 AND n.received_at - p.received_at < interval '5 second')
                WHERE p.id IS NULL
            ),
            heads AS (
                SELECT p.id
                FROM data p
                LEFT JOIN data n ON (n.row_id = p.row_id + 1 AND n.received_at - p.received_at < interval '5 second')
                WHERE n.id IS NULL
            ),
            loners AS (
                SELECT t.id
                FROM tails t
                JOIN heads h ON (t.id = h.id)
            )
            UPDATE sensor_calibration_data
            SET is_outlier = true
            WHERE id IN (SELECT id FROM loners)
            """
            cursor.execute(sqlCommand, (device_id,))
            connection.commit()

def fetch_dr0(ignore_outliers = True):
    with create_connection() as connection:
        connection = create_connection()
        with connection.cursor() as cursor:
            sqlCommand = f"""
            WITH data AS (
                SELECT row_number() OVER () row_id, id, r0, received_at
                FROM sensor_calibration_data
                WHERE device_id = %s {'AND is_outlier = false' if ignore_outliers else ''}
                ORDER BY received_at
            )
            SELECT p.id, n.id, (n.r0 - p.r0)/(EXTRACT(EPOCH FROM (n.received_at - p.received_at))) as dr0
            FROM data n
            JOIN data p ON (n.row_id = p.row_id + 1 AND n.received_at - p.received_at < interval '5 second')
            """
            cursor.execute(sqlCommand, (device_id,))
            return np.array(cursor.fetchall(), dtype=float)

def get_outliers():
    data = fetch_dr0()

    dr0 = data[:,2].reshape(-1, 1)
    scaler = StandardScaler()
    scaler.fit(dr0)
    dr0_std = scaler.transform(dr0).reshape(-1)

    outliers_mask = (dr0_std < -5) | (dr0_std > 5)
    outliers_id = data[outliers_mask][:, :2].astype(int).flatten().tolist()

    return set(outliers_id)

def invalidate_outliers():
    outliers = get_outliers()
    if outliers:
        invalidate_ids(outliers)

def clean_up():
    invalidate_out_of_bounds_values()
    invalidate_outliers()
    invalidate_loners()

In [None]:
invalidate_out_of_bounds_values()

In [None]:
invalidate_outliers()

In [None]:
invalidate_loners()

In [None]:
clean_up()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 15))

data = fetch_dr0(ignore_outliers=True)
x = data[:,2].reshape(-1, 1)

scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)

# the histogram of the data
n, bins, patches = plt.hist(x, 30, density=True, facecolor='g', alpha=0.75)

plt.grid(True)
plt.show()

In [None]:
def buildModel(degree, train):
    pipe = make_pipeline(StandardScaler(), PolynomialFeatures(degree), LinearRegression())
    pipe.fit(train[:,:-1], train[:,-1])
    return pipe

In [None]:
def runExperiment(degree, aggregated_data, ignore_outliers):
    data = fetch_aggregated_calibration_data(ignore_outliers) if aggregated_data else fetch_calibration_data(ignore_outliers)
    train, test = train_test_split(data)    
    pipe = buildModel(degree, train)
    train_score = pipe.score(train[:,:-1], train[:,-1])
    test_score = pipe.score(test[:,:-1], test[:,-1])
    return (train_score, test_score)

In [None]:
param_grid = {'degree': [1, 2, 3, 4, 5], 'aggregated_data': [True], 'ignore_outliers': [True, False] }

results = [(test_case, runExperiment(**test_case))for test_case in ParameterGrid(param_grid)]
results.sort(key=lambda p: p[1][1], reverse=True)
for result in results:
        print(result)

In [None]:
plot_data = fetch_aggregated_calibration_data()

In [None]:
model = buildModel(2, plot_data)
model.predict(np.array([[23.7, 85.4]]))

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import numpy as np

fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
fig.set_size_inches(20, 15)

# Make data.
X = np.arange(10, 50, 1)
Y = np.arange(20, 100, 1)
#print(X, Y)
X, Y = np.meshgrid(X, Y)

print(X.shape)
XYpairs = np.dstack([X, Y]).reshape(-1, 2)
print(XYpairs.shape)
Z = model.predict(XYpairs)
Z = Z.reshape(X.shape)

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=True)


# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

#plot_dots = plot_data
#print(plot_dots.shape)
#ax.scatter(plot_dots[:,0], plot_dots[:,1], plot_dots[:,2], cmap=cm.coolwarm )

plt.show()

In [None]:
zdata = plot_data[:,-1]
xdata = plot_data[:, 0]
ydata = plot_data[:, 1]
plt.scatter(xdata, ydata, c=zdata, cmap=cm.coolwarm);