In [None]:
from dotenv import load_dotenv

import pandas as pd

from database.postgresql_functools import PostgreSQLManager

load_dotenv()

In [None]:
postgres = PostgreSQLManager()

In [None]:
query = """
    SELECT 
        EXTRACT(YEAR FROM daily_weather.date) || '-' || EXTRACT(MONTH FROM daily_weather.date) || '-' || EXTRACT(DAY FROM daily_weather.date) AS "Date",
        city.name AS "Location",
        daily_weather.min_temp AS "MinTemp",
        daily_weather.max_temp AS "MaxTemp",
        daily_weather.rainfall AS "Rainfall",
        daily_weather.cloudiness AS "Cloudiness",
        daily_weather.pressure AS "Pressure",
        daily_weather.humidity AS "Humidity",
        daily_weather.wind_gust_speed AS "WindSpeed",
        daily_weather.wind_gust_dir AS "WindDir"
    FROM 
        daily_weather
    JOIN 
        city ON daily_weather.city_id = city.id
    ORDER BY 
        city.name, daily_weather.date
"""
df_daily = pd.read_sql_query(query, postgres.engine)
df_daily = df_daily.drop_duplicates(subset=['Date', 'Location'])
df_daily['Evaporation'] = -1
df_daily['Sunshine'] = -1
df_daily.head()

In [None]:
df_daily['RainToday'] = df_daily['Rainfall'].apply(lambda x: 'Yes' if x >= 1 else 'No')
df_daily['RainTomorrow'] = df_daily['RainToday'].shift(-1).apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_daily.head()

In [None]:
query = """
    SELECT 
        EXTRACT(YEAR FROM weather.date) || '-' || EXTRACT(MONTH FROM weather.date) || '-' || EXTRACT(DAY FROM weather.date) AS "Date",
        city.name AS "Location",
        weather.temp AS "Temp9am",
        weather.cloudiness AS "Cloud9am",
        weather.pressure AS "Pressure9am",
        weather.humidity AS "Humidity9am",
        weather.wind_gust_speed AS "WindSpeed9am",
        weather.wind_gust_dir AS "WindDir9am"
    FROM 
        weather
    JOIN 
        city ON weather.city_id = city.id
    WHERE 
        EXTRACT(HOUR FROM weather.date) = 17
    ORDER BY 
        city.name, weather.date
"""
df_9am = pd.read_sql_query(query, postgres.engine)
df_9am = df_9am.drop_duplicates(subset=['Date', 'Location'])
print(df_9am.shape)
df_9am

In [None]:
query = """
    SELECT 
        EXTRACT(YEAR FROM weather.date) || '-' || EXTRACT(MONTH FROM weather.date) || '-' || EXTRACT(DAY FROM weather.date) AS "Date",
        city.name AS "Location",
        weather.temp AS "Temp3pm",
        weather.cloudiness AS "Cloud3pm",
        weather.pressure AS "Pressure3pm",
        weather.humidity AS "Humidity3pm",
        weather.wind_gust_speed AS "WindSpeed3pm",
        weather.wind_gust_dir AS "WindDir3pm"
    FROM 
        weather
    JOIN 
        city ON weather.city_id = city.id
    WHERE 
        EXTRACT(HOUR FROM weather.date) = 23
    ORDER BY 
        city.name, weather.date
"""
df_3pm = pd.read_sql_query(query, postgres.engine)
df_3pm = df_3pm.drop_duplicates(subset=['Date', 'Location'])
print(df_3pm.shape)
df_3pm

In [None]:
df_3_9 = pd.merge(df_9am, df_3pm, on=['Date', 'Location'])
print(df_3_9.shape)
df_3_9

In [None]:
df_open = pd.merge(df_daily, df_3_9, on=['Date', 'Location'])
print(df_open.shape)
df_open

In [None]:
query = """
    SELECT 
        EXTRACT(YEAR FROM date) || '-' || EXTRACT(MONTH FROM date) || '-' || EXTRACT(DAY FROM date) AS "Date",
        location AS "Location",
        min_temp AS "MinTemp",
        max_temp AS "MaxTemp",
        rainfall AS "Rainfall",
        evaporation AS "Evaporation",
        sunshine AS "Sunshine",
        wind_gust_dir AS "WindDir",
        wind_gust_speed AS "WindSpeed",
        temp_9am AS "Temp9am",
        humidity_9am AS "Humidity9am",
        cloud_9am AS "Cloud9am",
        wind_dir_9am AS "WindDir9am",
        wind_speed_9am AS "WindSpeed9am",
        pressure_9am AS "Pressure9am",
        temp_3pm AS "Temp3pm",
        humidity_3pm AS "Humidity3pm",
        cloud_3pm AS "Cloud3pm",
        wind_dir_3pm AS "WindDir3pm",
        wind_speed_3pm AS "WindSpeed3pm",
        pressure_3pm AS "Pressure3pm"
    FROM 
        australian_meteorology_weather
"""
df_aus = pd.read_sql_query(query, postgres.engine)
df_aus['RainToday'] = df_aus['Rainfall'].apply(lambda x: 'Yes' if x >= 1 else 'No')
df_aus['RainTomorrow'] = df_aus['RainToday'].shift(-1).apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_aus.head()

In [None]:
df_open

In [None]:
df_aus

In [None]:
df = pd.concat([df_open, df_aus])
df

In [None]:
df.to_csv('weather_study.csv', index=False)