In [23]:
from dagster import op, Out, In, get_dagster_logger, job
from pymongo import MongoClient, errors
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from sqlalchemy import create_engine

log = get_dagster_logger()
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession(".cache", expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# postgres_connect = "postgresql://dap:dap@postgres_database:5432/projectdb"
# mongo_connect = "mongodb://dap:dap@mongodb_database"
postgres_connect = "postgresql://dap:dap@127.0.0.1:5432/projectdb"
mongo_connect = "mongodb://dap:dap@127.0.0.1"



In [26]:
@op()
def transform_weather() -> pd.DataFrame:
    client = MongoClient(mongo_connect)
    projectdb_mongo = client["projectdb_mongo"]

    weather_collection = projectdb_mongo["weather_collection"]

    weather_df = pd.DataFrame(list(weather_collection.find({})))

    weather_df['date'] = pd.to_datetime(weather_df['date'])

    # Filtering data from 2023-01-01 to 2023-12-31
    weather_df = weather_df[(weather_df['date'] >= '2023-01-01') & (weather_df['date'] < '2024-01-01')]


    return weather_df

In [28]:
# Checking for missing data in the filtered dataframe
missing_data = transform_weather().isnull().sum()
missing_data


_id                     0
date                    0
temperature_2m          0
relative_humidity_2m    0
dew_point_2m            0
apparent_temperature    0
precipitation           0
rain                    0
snowfall                0
weather_code            0
cloud_cover             0
wind_speed_10m          0
wind_direction_10m      0
is_day                  0
sunshine_duration       0
dtype: int64

In [27]:
display(transform_weather())

Unnamed: 0,_id,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_direction_10m,is_day,sunshine_duration
1,1672531200,2023-01-01 00:00:00,6.27,91.387459,4.97,2.430736,0.0,0.0,0.0,3.0,100.000000,17.935081,218.480225,0.0,0.0
2,1672534800,2023-01-01 01:00:00,5.57,90.385803,4.12,1.473115,0.1,0.1,0.0,51.0,6.900000,18.504139,217.092911,0.0,0.0
3,1672538400,2023-01-01 02:00:00,4.72,92.905891,3.67,0.670981,0.0,0.0,0.0,2.0,55.500000,17.566378,225.830231,0.0,0.0
4,1672542000,2023-01-01 03:00:00,4.92,93.571838,3.97,1.040413,0.0,0.0,0.0,3.0,100.000000,16.808571,223.264328,0.0,0.0
5,1672545600,2023-01-01 04:00:00,5.47,91.976448,4.27,1.724712,0.0,0.0,0.0,2.0,61.500000,16.299694,223.210114,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,1704045600,2023-12-31 18:00:00,6.82,87.689705,4.92,1.515806,0.2,0.2,0.0,51.0,100.000000,27.908709,263.333435,0.0,0.0
8756,1704049200,2023-12-31 19:00:00,7.12,85.905251,4.92,1.778767,0.1,0.1,0.0,51.0,100.000000,28.162956,265.601379,0.0,0.0
8757,1704052800,2023-12-31 20:00:00,6.97,86.189529,4.82,1.755062,0.2,0.2,0.0,51.0,93.299995,27.153164,263.911560,0.0,0.0
8758,1704056400,2023-12-31 21:00:00,6.57,87.056412,4.57,1.318548,0.0,0.0,0.0,2.0,66.899994,27.047956,260.036255,0.0,0.0
