# CROP Data Health Check
This notebook checks the health of data from the 30MHz sensors.

In [None]:
!pip3 install psycopg2
!pip3 install plotly

In [None]:
import os
from datetime import datetime, timedelta
import psycopg2
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


Parameters

In [None]:
crop_host = ""
crop_port = ""
crop_dbname = ""
crop_user = ""
crop_password = ""

In [None]:
conn = psycopg2.connect(
    host=crop_host, port=crop_port, dbname=crop_dbname, 
    user=crop_user, password=crop_password)

cur = conn.cursor()

### Sensor Types

All sensor types

In [None]:
sql_command = """SELECT * FROM sensor_types"""
df_sensors = pd.read_sql(sql_command, conn)
df_sensors

### Sensors

All sensors

In [None]:
sql_command = """SELECT * FROM sensors ORDER by last_updated DESC"""
df_sensors = pd.read_sql(sql_command, conn)
df_sensors

### 30 MHz Data Access

All 30 MHz data for the last year

In [None]:
dt_to = datetime.now()
dt_from = dt_to + timedelta(days=-365)

In [None]:
sql_command = """SELECT sensors.name, zensie_trh_data.* FROM sensor_types, sensors, zensie_trh_data WHERE sensors.id = zensie_trh_data.sensor_id AND zensie_trh_data.timestamp >= '%s' AND zensie_trh_data.timestamp < '%s'""" % (dt_from, dt_to)

In [None]:
df_30MHz_raw = pd.read_sql(sql_command, conn)
df_30MHz = df_30MHz_raw.drop_duplicates()

Get individual dataframes per sensor

In [None]:
s1 = df_30MHz['name']=="Farm_T/RH_16B4"
df_16B4 = df_30MHz[s1]

s2 = df_30MHz['name']=="Farm_T/RH_29B2"
df_29B2 = df_30MHz[s2]

s3 = df_30MHz['name']=="FARM_T/RH_1B2"
df_1B2 = df_30MHz[s3]

s4 = df_30MHz['name']=="R&D_T/RH_1"
df_RD = df_30MHz[s4]


### Plots to visualise gaps

In [None]:
fig = make_subplots(rows=4, cols=1)

#candlesticks = go.Candlestick(x=data.index, open=data['Open'], high=data['High'],
#                   low=data['Low'], close=data['Close'])
#fig = go.Figure(layout=cf_layout)
#fig.add_trace(trace=candlesticks)
#fig.show()


fig.append_trace(go.Scatter(
    x=df_16B4["timestamp"],
    y=df_16B4["temperature"],
    name='MHz30_16B4',
    mode="markers"
), row=1, col=1)


fig.append_trace(go.Scatter(
    x=df_29B2["timestamp"],
    y=df_29B2["temperature"],
    name='MHz30_29B2',
    mode="markers"
), row=2, col=1)

fig.append_trace(go.Scatter(
    x=df_1B2["timestamp"],
    y=df_1B2["temperature"],
    name='MHz30_1B2',
    mode="markers"
), row=3, col=1)

fig.append_trace(go.Scatter(
    x=df_RD["timestamp"],
    y=df_RD["temperature"],
    name='MHz30_R&D',
    mode="markers"
), row=4, col=1)


fig.update_traces(marker=dict(size=2))
fig.show()

In [None]:
df = df_16B4[['timestamp','temperature', 'humidity']].copy()

df['Day'] = df['timestamp'].apply(lambda x: "%d-%d-%d" % (x.year, x.month, x.day))
df['Day'] = pd.to_datetime(df['Day'])

df_cnt = df.groupby(['Day']).count().reset_index()
df_cnt
plot = px.scatter(df_cnt, x="Day", y="temperature" , height=400)

plot = px.scatter(df_cnt, x="Day", y="temperature" , height=400, 
                  labels={
                     "Day": "Date",
                     "temperature": "Sensor enties Counts"
                 },
                title="Count Sensor Entries per day")
plot.update_traces(marker=dict(size=4))
plot.show()
