In [None]:
from pathlib import Path
from dotenv import load_dotenv
import os

import pandas as pd

from database.postgresql_functools import PostgreSQLManager

load_dotenv()
dir_path = Path().resolve().parent

In [None]:
postgres = PostgreSQLManager()

In [None]:
daily_weather_query = """
SELECT 
    EXTRACT(YEAR FROM dw.date) || '-' || EXTRACT(MONTH FROM dw.date) || '-' || EXTRACT(DAY FROM dw.date) AS "Date",
    c.name AS "Location",
    dw.min_temp AS "MinTemp",
    dw.max_temp AS "MaxTemp",
    dw.rainfall AS "Rainfall",
    dw.wind_gust_speed AS "WindGustSpeed",
    dw.wind_gust_dir AS "WindGustDir"
FROM 
    daily_weather dw
JOIN 
    city c ON dw.city_id = c.id
ORDER BY 
    c.name, dw.date
"""

df_daily = pd.read_sql_query(daily_weather_query, postgres.engine)
df_daily = df_daily.drop_duplicates(subset=['Date', 'Location'])
df_daily['Evaporation'], df_daily['Sunshine'] = -1, -1
df_daily['RainToday'] = df_daily['Rainfall'].apply(lambda x: 'Yes' if x >= 1 else 'No')
df_daily['RainTomorrow'] = df_daily['RainToday'].shift(-1).apply(lambda x: 'Yes' if x == 'Yes' else 'No')

In [None]:
weather_9am_query = """
SELECT 
    EXTRACT(YEAR FROM w.date) || '-' || EXTRACT(MONTH FROM w.date) || '-' || EXTRACT(DAY FROM w.date) AS "Date",
    c.name AS "Location",
    w.temp AS "Temp9am",
    w.cloudiness AS "Cloud9am",
    w.pressure AS "Pressure9am",
    w.humidity AS "Humidity9am",
    w.wind_gust_speed AS "WindSpeed9am",
    w.wind_gust_dir AS "WindDir9am"
FROM 
    weather w
JOIN 
    city c ON w.city_id = c.id
WHERE 
    EXTRACT(HOUR FROM w.date) = 17
ORDER BY 
    c.name, w.date
"""

weather_3pm_query = """
SELECT 
    EXTRACT(YEAR FROM w.date) || '-' || EXTRACT(MONTH FROM w.date) || '-' || EXTRACT(DAY FROM w.date) AS "Date",
    c.name AS "Location",
    w.temp AS "Temp3pm",
    w.cloudiness AS "Cloud3pm",
    w.pressure AS "Pressure3pm",
    w.humidity AS "Humidity3pm",
    w.wind_gust_speed AS "WindSpeed3pm",
    w.wind_gust_dir AS "WindDir3pm"
FROM 
    weather w
JOIN 
    city c ON w.city_id = c.id
WHERE 
    EXTRACT(HOUR FROM w.date) = 23
ORDER BY 
    c.name, w.date
"""

df_9am = pd.read_sql_query(weather_9am_query, postgres.engine)
df_9am = df_9am.drop_duplicates(subset=['Date', 'Location'])

df_3pm = pd.read_sql_query(weather_3pm_query, postgres.engine)
df_3pm = df_3pm.drop_duplicates(subset=['Date', 'Location'])

In [None]:
df_3_9 = pd.merge(df_9am, df_3pm, on=['Date', 'Location'])

In [None]:
df_open = pd.merge(df_daily, df_3_9, on=['Date', 'Location'])

In [None]:
aus_weather_query = """
SELECT 
    EXTRACT(YEAR FROM date) || '-' || EXTRACT(MONTH FROM date) || '-' || EXTRACT(DAY FROM date) AS "Date",
    location AS "Location",
    min_temp AS "MinTemp",
    max_temp AS "MaxTemp",
    rainfall AS "Rainfall",
    evaporation AS "Evaporation",
    sunshine AS "Sunshine",
    wind_gust_dir AS "WindGustDir",
    wind_gust_speed AS "WindGustSpeed",
    temp_9am AS "Temp9am",
    humidity_9am AS "Humidity9am",
    cloud_9am AS "Cloud9am",
    wind_dir_9am AS "WindDir9am",
    wind_speed_9am AS "WindSpeed9am",
    pressure_9am AS "Pressure9am",
    temp_3pm AS "Temp3pm",
    humidity_3pm AS "Humidity3pm",
    cloud_3pm AS "Cloud3pm",
    wind_dir_3pm AS "WindDir3pm",
    wind_speed_3pm AS "WindSpeed3pm",
    pressure_3pm AS "Pressure3pm"
FROM 
    australian_meteorology_weather
"""

df_aus = pd.read_sql_query(aus_weather_query, postgres.engine)
df_aus['RainToday'] = df_aus['Rainfall'].apply(lambda x: 'Yes' if x >= 1 else 'No')
df_aus['RainTomorrow'] = df_aus['RainToday'].shift(-1).apply(lambda x: 'Yes' if x == 'Yes' else 'No')

In [None]:
df_final = pd.concat([df_open, df_aus], ignore_index=True)

In [None]:
csv_path = os.path.join(dir_path, 'dataCsv', 'weather_study.csv')
df_final.to_csv(csv_path, index=False)