In [None]:
#-----------------------------------------------------------------------------------------------------
# Connection to pyVMMonitor for profiling
#-----------------------------------------------------------------------------------------------------
import sys
sys.path.append('D:\\Program Files\\Brainwy\\PyVmMonitor 1.1.2\\public_api')
import pyvmmonitor
pyvmmonitor.connect()

In [9]:
# Libraries Imports
#-----------------------------------------------------------------------------------------------------
from configparser import ConfigParser
from sqlalchemy import create_engine
from datetime import datetime
import pandas as pd
import numpy as np
from datetime import date, timedelta

# Load connexion configuration file
#-----------------------------------------------------------------------------------------------------
config_filename = "./conf/GetWeather.ini"

# Read config.ini file
config_object = ConfigParser()
config_object.read(config_filename)
if  not config_object:
    print("Error while loading configuration !!!")

# Log initialization
cfg = config_object['INFO']
# Establish connection with DB (using sqlalchemy)
#-----------------------------------------------------------------------------------------------------
db_connection_str = "mysql+pymysql://{}:{}@{}/WeatherDB".format(cfg['user'],cfg['password'],cfg['host'])
db_connection = create_engine(db_connection_str)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Query : List records created by locations and creation dates
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
"""SELECT date(date_timestamp) AS dte, Locations.name, count(date_timestamp) AS nb_records
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE id_location IN (SELECT id FROM Locations)
    GROUP BY id_location, date(date_timestamp) 
    ORDER BY date(date_timestamp);""",
    con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(100)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Query : List records created by locations and creation dates grouped 
#         by locations + month
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT date(date_timestamp) AS dte, Locations.name AS LName, count(date_timestamp) AS nb_records
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE MONTH(DATE(date_timestamp)) in (SELECT MONTH(DATE(date_timestamp)) FROM RawRecords GROUP BY MONTH(DATE(date_timestamp)))
    AND ( id_location IN (SELECT id FROM Locations) ) 
    GROUP BY id_location, month(date_timestamp) 
    ORDER BY id_location, month(date_timestamp);""",
    con=db_connection  
)
pdata = pd.DataFrame(raw_data)
pdata.head(100)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Query : Count all records
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT COUNT(*) AS totalRecs 
    FROM RawRecords;""",
    con=db_connection
)
pdata = pd.DataFrame(raw_data)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Query : Count sorted number of records per locations
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
    """ SELECT Locations.name as name, COUNT(*) as nbRecords
        FROM RawRecords
        INNER JOIN Locations ON RawRecords.id_location = Locations.id
        GROUP BY id_location
        ORDER BY nbRecords DESC;""", 
        con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(10)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# List records created by locations and creation dates
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT date(date_timestamp) AS dte, Locations.name, count(date_timestamp) AS nb_records
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE id_location IN (SELECT id FROM Locations)
    GROUP BY id_location, date(date_timestamp) 
    ORDER BY date(date_timestamp);""",
    con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(1009)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Query : List records created by locations and creation dates grouped 0
#         by locations + month
#----------------------------------------------------------------------------------

raw_data = pd.read_sql(
""" SELECT date(date_timestamp) AS dte, Locations.name AS LName, count(date_timestamp) AS nb_records
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE MONTH(DATE(date_timestamp)) in (SELECT MONTH(DATE(date_timestamp)) FROM RawRecords GROUP BY MONTH(DATE(date_timestamp)))
    AND ( id_location IN (SELECT id FROM Locations) ) 
    GROUP BY id_location, month(date_timestamp) 
    ORDER BY id_location, month(date_timestamp);""",
    con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(100)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Select MIN,MAX,AVG by locations and creation dates grouped by locations + day
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT date(date_timestamp) AS dte, Locations.name AS LName, 
    (round(min(temp),2) - 273.15) as minTemp, (round(max(temp),2) - 273.15) as maxTemp, (round(avg(temp), 2) - 273.15) as avgTemp,
    round(min(humidity),2) as minHumidity, round(max(humidity),2) as maxHumidity, round(avg(humidity), 2) as avgHumidity,
    round(min(pressure),2) as minPressure, round(max(pressure),2) as maxPressure, round(avg(pressure), 2) as avgPressure,
    round(sum(rain_1h))
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE id_location IN (SELECT id FROM Locations)
    GROUP BY id_location, month(date_timestamp),day(date_timestamp)
    ORDER BY id_location, date_timestamp; """,
    con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(100)

In [None]:
#----------------------------------------------------------------------------------
# Table : RawRecords
# Select MIN,MAX,AVG by locations and creation dates grouped by locations + month
#----------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT date(date_timestamp) AS dte, Locations.name AS LName, 
    (round(min(temp),2) - 273.15) as minTemp, (round(max(temp),2) - 273.15) as maxTemp, (round(avg(temp), 2) - 273.15) as avgTemp,
    round(min(humidity),2) as minHumidity, round(max(humidity),2) as maxHumidity, round(avg(humidity), 2) as avgHumidity,
    round(min(pressure),2) as minPressure, round(max(pressure),2) as maxPressure, round(avg(pressure), 2) as avgPressure
    FROM RawRecords
    INNER JOIN Locations ON Locations.id = RawRecords.id_location
    WHERE MONTH(DATE(date_timestamp)) in (SELECT MONTH(DATE(date_timestamp)) FROM RawRecords GROUP BY MONTH(DATE(date_timestamp)))
    AND ( id_location IN (SELECT id FROM Locations) ) 
    GROUP BY id_location, month(date_timestamp) 
    ORDER BY id_location, month(date_timestamp);""",
    con=db_connection
)
pdata = pd.DataFrame(raw_data)
pdata.head(100)
