# CROP 30 MHz Data Health Check
This notebook checks the health of data from the 30MHz sensors.

In [1]:
#!pip3 install psycopg2
#!pip3 install plotly

In [2]:
import os
from datetime import datetime, timedelta
import psycopg2
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


Parameters

In [3]:
crop_host = "cropapptestsqlserver.postgres.database.azure.com"
crop_port = "5432"
crop_dbname = "app_db"
crop_user = "cropdbadmin@cropapptestsqlserver"
crop_password = "QhXZ7qZddDr224Mc2P4k"

In [4]:
conn = psycopg2.connect(
    host=crop_host, port=crop_port, dbname=crop_dbname, 
    user=crop_user, password=crop_password)

cur = conn.cursor()

### Sensor Types

All sensor types

In [5]:
sql_command = """SELECT * FROM sensor_types"""
df_sensors = pd.read_sql(sql_command, conn)
df_sensors

Unnamed: 0,id,sensor_type,source,origin,frequency,data,description,time_created,time_updated
0,1,Advanticsys,Raspberry Pis,Cambridge,Every 30 mins,"Temperature, humidity, CO2",https://www.advanticsys.com/,2020-05-13 16:22:39.864533,
1,2,Stark,Energy meter,web scraping,Daily,"Timestamp,Energy(kW/half hour)",https://id.stark.co.uk/,2020-05-14 15:44:05.860386,
2,4,TinyTag,TinyTag sensors,Manual download csv,15 min,Temperature,https://www.micronmeters.com/section/tinytag-p...,2020-08-04 11:28:39.864705,
3,3,30MHz T&RH,30MHz Zensie,Zensie API,5 mins,"Temperature, relative humidity",https://api.30mhz.com/api/swagger#/check,2020-07-27 08:36:05.860386,
4,11,30MHz Weather,30MHz Zensie,Zensie API,60 mins,"temperature,rain_probability,relative_humidity...",https://api.30mhz.com/api/stats/check/i-b9bf54...,2021-05-04 00:00:00.000000,


### Sensors

All sensors

In [6]:
sql_command = """SELECT * FROM sensors ORDER by last_updated DESC"""
df_sensors = pd.read_sql(sql_command, conn)
df_sensors

Unnamed: 0,id,type_id,device_id,time_created,time_updated,name,last_updated
0,1,1,1,2020-05-13 16:22:39.888959,NaT,RasPi 1,NaT
1,2,1,2,2020-05-13 16:22:39.888959,NaT,RasPi 2,NaT
2,3,1,4,2020-05-13 16:22:39.888959,NaT,RasPi 4,NaT
3,4,1,5,2020-05-13 16:22:39.888959,NaT,RasPi 5,NaT
4,5,1,6,2020-05-13 16:22:39.888959,NaT,RasPi 6,NaT
5,6,1,7,2020-05-13 16:22:39.888959,NaT,RasPi 7,NaT
6,7,1,8,2020-05-13 16:22:39.888959,NaT,RasPi 8,NaT
7,45,4,631155,2020-08-04 13:52:21.224656,NaT,Lisa,NaT
8,41,4,631162,2020-08-04 12:26:21.681331,NaT,William,NaT
9,46,4,631378,2020-08-04 14:04:09.010796,NaT,Tony,NaT


### 30 MHz Data Access

All 30 MHz data for the last year

In [7]:
#dt_to = datetime.now()
dt_to = datetime(2021, 6, 30)
dt_from = dt_to + timedelta(days=-19)

In [8]:
sql_command = """SELECT sensors.name, zensie_trh_data.* FROM sensor_types, sensors, zensie_trh_data WHERE sensors.id = zensie_trh_data.sensor_id AND zensie_trh_data.timestamp >= '%s' AND zensie_trh_data.timestamp < '%s'""" % (dt_from, dt_to)

In [9]:
df_30MHz_raw = pd.read_sql(sql_command, conn)
df_30MHz = df_30MHz_raw.drop_duplicates()

Get individual dataframes per sensor

In [10]:
s1 = df_30MHz['name']=="Farm_T/RH_16B4"
df_16B4 = df_30MHz[s1]

s2 = df_30MHz['name']=="Farm_T/RH_29B2"
df_29B2 = df_30MHz[s2]

s3 = df_30MHz['name']=="FARM_T/RH_1B2"
df_1B2 = df_30MHz[s3]

s4 = df_30MHz['name']=="R&D_T/RH_1"
df_RD = df_30MHz[s4]


### Plots to visualise gaps

In [14]:
fig = make_subplots(rows=4, cols=1)

#candlesticks = go.Candlestick(x=data.index, open=data['Open'], high=data['High'],
#                   low=data['Low'], close=data['Close'])
#fig = go.Figure(layout=cf_layout)
#fig.add_trace(trace=candlesticks)
#fig.show()


fig.append_trace(go.Scatter(
    x=df_16B4["timestamp"],
    y=df_16B4["temperature"],
    name='MHz30_16B4',
    mode="markers"
), row=1, col=1)


fig.append_trace(go.Scatter(
    x=df_29B2["timestamp"],
    y=df_29B2["temperature"],
    name='MHz30_29B2',
    mode="markers"
), row=2, col=1)

fig.append_trace(go.Scatter(
    x=df_1B2["timestamp"],
    y=df_1B2["temperature"],
    name='MHz30_1B2',
    mode="markers"
), row=3, col=1)

fig.append_trace(go.Scatter(
    x=df_RD["timestamp"],
    y=df_RD["temperature"],
    name='MHz30_R&D',
    mode="markers"
), row=4, col=1)


fig.update_traces(marker=dict(size=2))
fig.show()

In [12]:
df = df_16B4[['timestamp','temperature', 'humidity']].copy()

df['Day'] = df['timestamp'].apply(lambda x: "%d-%d-%d" % (x.year, x.month, x.day))
df['Day'] = pd.to_datetime(df['Day'])

df_cnt = df.groupby(['Day']).count().reset_index()
df_cnt
plot = px.scatter(df_cnt, x="Day", y="temperature" , height=400)

plot = px.scatter(df_cnt, x="Day", y="temperature" , height=400, 
                  labels={
                     "Day": "Date",
                     "temperature": "Sensor enties Counts"
                 },
                title="Count Sensor Entries per day")
plot.update_traces(marker=dict(size=4))
plot.show()
