In [29]:
! pip install psycopg2
! pip install sklearn

import psycopg2

import numpy as np

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression





In [None]:
device_id = "iskra"

In [None]:
def create_connection():
    dbname = 'postgres'
    user = 'postgres'
    password = 'postgres'
    host = '192.168.11.17'
    return psycopg2.connect(dbname=dbname, user=user, password=password, host=host)

In [None]:
def fetch_aggregated_calibration_data():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"SELECT temperature, humidity, AVG(r0) as r0 " \
				"FROM sensor_calibration_data " \
				"WHERE device_id = %s "
				"GROUP BY temperature, humidity ", (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

In [None]:
def fetch_calibration_data():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"SELECT temperature, humidity, r0 " \
				"FROM sensor_calibration_data " \
				"WHERE r0 > 40 AND device_id = %s ", (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

In [None]:
def fetch_calibration_data_with_id():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"SELECT id, temperature, humidity, r0 " \
				"FROM sensor_calibration_data " \
				"WHERE r0 > 40 AND device_id = %s ", (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

def fetch_r0_change_data():
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			sqlCommand = """
WITH data AS (
	SELECT row_number() OVER () row_id, id, temperature, humidity, r0, received_at
	FROM sensor_calibration_data
	WHERE device_id = %s
	ORDER BY received_at
)
SELECT p.id, (n.r0 - p.r0)/(EXTRACT(EPOCH FROM (n.received_at - p.received_at))) as dr0
FROM data n
JOIN data p ON (n.row_id = p.row_id + 1)
WHERE n.received_at - p.received_at < interval '1 minute';
"""
			cursor.execute(sqlCommand, (device_id,))
			return np.array(cursor.fetchall(), dtype=float)

def get_outliers():
    data = fetch_r0_change_data()
    r0_change = data[:,1].reshape(-1, 1)
    scaler = StandardScaler()
    scaler.fit(r0_change)
    r0_change_standard = scaler.transform(r0_change)
    extended_data = np.hstack((data, r0_change_standard))
    outliers_mask = (extended_data[:, 2] < -5) | (extended_data[:, 2] > 5)
    outliers_id = data[outliers_mask][:, 0].astype(int)
    return outliers_id

def clean_up_db():
	outliers = tuple([row for row in get_outliers().tolist()])
	with create_connection() as connection:
		connection = create_connection()
		with connection.cursor() as cursor:
			cursor.execute(f"DELETE " \
                "FROM sensor_calibration_data " \
                "WHERE id in %s", (outliers,))
			connection.commit()


def fetch_clean_calibration_data():
	outliers = get_outliers()
	calibration_data = fetch_calibration_data_with_id()
	outliers_mask = np.isin(calibration_data[:, 0], outliers)
	return calibration_data[~outliers_mask][:, 1:]

In [None]:
len(get_outliers())

In [None]:
#if len(get_outliers()) > 0:
#    clean_up_db()

In [None]:
import matplotlib.pyplot as plt

data = fetch_r0_change_data()

x = data[:,1].reshape(-1, 1)
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)

#extended = np.hstack((data, x))
#mask = (extended[:, 2] > 5) | (extended[:, 2] < -5)
#outliers = extended[mask]
#x = data[~mask][:,1].reshape(-1, 1)

mask = (x < -5) | (x > 5) 
x = data[:,1].reshape(-1, 1)
x = x[~mask]

# the histogram of the data
n, bins, patches = plt.hist(x, density=True, facecolor='g', alpha=0.75)

plt.grid(True)
plt.show()

In [None]:
def aggregateData(train):
    grouped = {}
    for t, h, r0 in train:
        if (t , h) in grouped.keys():
            grouped[(t, h)].append(r0)
        else:
            grouped[(t, h)] = [r0]
    averaged = [(t, h, sum(grouped[(t, h)]) / len(grouped[(t, h)])) for (t, h) in grouped.keys()]
    return np.array(averaged, dtype=float)

In [None]:
def buildModel(degree, train):
    pipe = make_pipeline(StandardScaler(), PolynomialFeatures(degree), LinearRegression())
    pipe.fit(train[:,:-1], train[:,-1])
    return pipe

In [None]:
def runExperiment(degree = 2, aggregate_train_data = False, clean_data = False):
    data = fetch_clean_calibration_data() if clean_data else fetch_calibration_data()
    data = aggregateData(data) if aggregate_train_data else data
    train, test = train_test_split(data)    
    pipe = buildModel(degree, train)
    train_score = pipe.score(train[:,:-1], train[:,-1])
    test_score = pipe.score(test[:,:-1], test[:,-1])
    return (train_score, test_score)

In [None]:
param_grid = {'degree': [1, 2, 3], 'aggregate_train_data': [True], 'clean_data': [True, False] }

results = [(test_case, runExperiment(**test_case))for test_case in ParameterGrid(param_grid)]
results.sort(key=lambda p: p[1][1], reverse=True)
for result in results:
        print(result)

In [None]:
plot_data = fetch_aggregated_calibration_data()
#model = buildModel(2, aggregateData(plot_data))
#model = buildModel(10, plot_data)

In [None]:
model = buildModel(2, plot_data)
model.predict(np.array([[23.7, 85.4]]))

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import numpy as np

fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
fig.set_size_inches(20, 15)

# Make data.
X = np.arange(10, 50, 1)
Y = np.arange(20, 100, 1)
#print(X, Y)
X, Y = np.meshgrid(X, Y)

print(X.shape)
XYpairs = np.dstack([X, Y]).reshape(-1, 2)
print(XYpairs.shape)
Z = model.predict(XYpairs)
Z = Z.reshape(X.shape)

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=True)

# Customize the z axis.
#ax.set_zlim(-1.01, 1.01)
#ax.zaxis.set_major_locator(LinearLocator(10))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

#plot_dots = plot_data
#print(plot_dots.shape)
#ax.scatter(plot_dots[:,0], plot_dots[:,1], plot_dots[:,2], cmap=cm.coolwarm )

plt.show()

In [None]:
zdata = plot_data[:,-1]
xdata = plot_data[:, 0]
ydata = plot_data[:, 1]
plt.scatter(xdata, ydata, c=zdata, cmap=cm.coolwarm);