In [1]:
# Libraries Imports
#-----------------------------------------------------------------------------------------------------
from configparser import ConfigParser
from sqlalchemy import create_engine
import pandas as pd
from datetime import date, timedelta


In [2]:
# Load connexion configuration file
#-----------------------------------------------------------------------------------------------------
config_filename = "./GetWeather.ini"

# Read config.ini file
config_object = ConfigParser()
config_object.read(config_filename)
if  not config_object:
    print("Error while loading configuration !!!")

# Log initialization
cfg = config_object['INFO']

In [3]:
# Establish connection with DB (using sqlalchemy)
#-----------------------------------------------------------------------------------------------------
db_connection_str = "mysql+pymysql://{}:{}@{}/WeatherDB".format(cfg['user'],cfg['password'],cfg['host'])
db_connection = create_engine(db_connection_str)

In [10]:
# Count sorted number of records per locations
#-----------------------------------------------------------------------------------------------------
raw_data = pd.read_sql(
"""SELECT Locations.name as name, COUNT(*) as nbRecords
FROM Records
INNER JOIN Locations ON Records.id_location = Locations.id
GROUP BY id_location
ORDER BY nbRecords DESC;""", con=db_connection)
pdata = pd.DataFrame(raw_data)

# Display header just to be sure we have something
pdata.head(10)


Unnamed: 0,name,nbRecords
0,Hérémence,1883
1,Geneva,1709
2,Lausanne,1708
3,Sion,1561
4,Evolène,1186


In [None]:
# Get average values on all records per locations
#-----------------------------------------------------------------------------------------------------
raw_data = pd.read_sql(
"""SELECT 
	    id_location, 
        Locations.name,
	    ROUND(AVG(pressure),2) AS avgPress,
	    ROUND(AVG(temp - 271.15),2) AS avgTemp,
	    ROUND(AVG(humidity),2) AS avgHumidity
    FROM Records
    INNER JOIN Locations on id_location = Locations.id
    GROUP BY id_location;""", con=db_connection)
pdata = pd.DataFrame(raw_data)

# Display header just to be sure we have something
pdata.head(10)


In [None]:
# Get minimum values per locations
#-----------------------------------------------------------------------------------------------------
raw_data = pd.read_sql(
""" SELECT 
	    id_location, 
        Locations.name,
	    ROUND(MIN(pressure),2) AS MINPress,
	    ROUND(MIN(temp - 271.15),2) AS MINTemp,
	    ROUND(MIN(humidity),2) AS MINHumidity
    FROM Records
    Inner join Locations on id_location = Locations.id
    group by id_location;;""", con=db_connection)
pdata = pd.DataFrame(raw_data)

# Display header just to be sure we have something
pdata.head(10)

In [None]:
# Hourly stats for a given day at a given location
#-----------------------------------------------------------------------------------------------------
my_date='2021-08-10'
loc = 6
raw_data = pd.read_sql(""" call Get_Hourly_Stats('{}',{});""".format(my_date, loc), con=db_connection)
pdata = pd.DataFrame(raw_data)

# Display header just to be sure we have something
pdata.head(30)

In [7]:
# Get hourly stats for a given range of day(s) and a single location
#-----------------------------------------------------------------------------------------------------
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

# work variables for aggregation
start_date = date(2021,1,1)
end_date = date(2022,1,1)
loc = 6
pdata = pd.DataFrame([])
flagFirstRec = True
iter = 0

for my_date in daterange(start_date, end_date):
    iter += 1
    query = "call Get_Hourly_Stats('{}',{});".format(my_date, loc)
    raw_data = pd.read_sql(query, con=db_connection)
   
    if flagFirstRec:
        pdata = pd.DataFrame(raw_data)
        flagFirstRec = False
    else:
        pdata = pdata.append(pd.DataFrame(raw_data))
    
print("{} aggregate(s) created for a period of {} day(s).".format(pdata.shape,iter))

(286, 22) aggregate(s) created for a period of 365 day(s).


In [8]:
# Display pdata quick information
print(type(pdata))
print(pdata.shape)

<class 'pandas.core.frame.DataFrame'>
(286, 22)


In [9]:
#pdata.insert(0, 'idx', range(0,len(pdata)))
pdata.reset_index(drop=True)

Unnamed: 0,DATE(date_timestamp),Hours,nbRec,Hour,minTemp,maxTemp,avgTemp,minFeelsLike,maxFeelsLike,avgFeelsLike,...,avgPressure,minHumidity,maxHumidity,avgHumidity,avgWindSpeed,gust,avgWindDir,avgCloudsCover,sumRain,sumSnow
0,2021-06-10,08:00 - 09:00,6,0,15.37,16.85,15.80,14.57,16.04,15.01,...,1022.00,62.0,69.0,66.67,1.03,1.03,0.00,20.0,0.0,0.0
1,2021-06-10,09:00 - 10:00,7,0,17.53,18.48,18.07,16.77,17.65,17.28,...,1021.43,55.0,61.0,57.86,0.51,0.51,0.00,20.0,0.0,0.0
2,2021-06-10,10:00 - 11:00,6,0,18.48,20.01,19.14,17.71,19.34,18.41,...,1021.00,55.0,57.0,56.00,1.28,1.54,0.00,20.0,0.0,0.0
3,2021-06-10,11:00 - 12:00,6,0,20.05,21.08,20.44,19.23,20.28,19.60,...,1020.50,45.0,49.0,46.83,1.80,2.06,105.00,20.0,0.0,0.0
4,2021-06-10,12:00 - 13:00,7,0,21.39,22.42,21.78,20.57,21.70,21.01,...,1020.00,44.0,45.0,44.29,2.57,2.57,234.29,20.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,2021-08-24,21:00 - 22:00,1,0,14.75,14.75,14.75,13.89,13.89,13.89,...,1017.00,68.0,68.0,68.00,5.66,5.66,250.00,40.0,0.0,0.0
282,2021-08-24,22:00 - 23:00,1,0,14.12,14.12,14.12,13.20,13.20,13.20,...,1018.00,68.0,68.0,68.00,4.12,4.12,240.00,20.0,0.0,0.0
283,2021-08-28,21:00 - 22:00,1,0,12.76,12.76,12.76,11.47,11.47,11.47,...,1017.00,59.0,59.0,59.00,1.54,1.54,0.00,75.0,0.0,0.0
284,2021-08-28,22:00 - 23:00,1,0,12.22,12.22,12.22,10.77,10.77,10.77,...,1017.00,55.0,55.0,55.00,1.03,1.03,0.00,75.0,0.0,0.0
