In [33]:
import pandas as pd
import motor.motor_asyncio
from motor.motor_asyncio import AsyncIOMotorClient
from pymongo.server_api import ServerApi

%autoawait asyncio

In [34]:
uri = "mongodb+srv://scientificprogramming:***REMOVED***@scientificprogramming.nzfrli0.mongodb.net/test"
DBclient = AsyncIOMotorClient(uri, server_api=ServerApi('1'))
db = DBclient.data
collection = db.openweather

In [35]:
async def extract_data_daily(collection=collection) -> pd.DataFrame:
    """Extract the daily averages of all the interesting datapoints including hours of daylight"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }, 
                'sunhours': {
                    '$dateDiff': {
                        'startDate': '$sunrise', 
                        'endDate': '$sunset', 
                        'unit': 'minute'
                    }
                }, 
                'temp': {
                    '$cond': [
                        {
                            '$gte': [
                                '$temp', 200
                            ]
                        }, '$temp', {
                            '$add': [
                                '$temp', 10000
                            ]
                        }
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'sunhours': {
                    '$avg': '$sunhours'
                }, 
                'avg_temp': {
                    '$avg': '$temp'
                }, 
                'min_temp': {
                    '$min': '$temp'
                }, 
                'max_temp': {
                    '$max': '$temp'
                }, 
                'uvi': {
                    '$avg': '$uvi'
                }, 
                'wind_speed': {
                    '$avg': '$wind_speed'
                }, 
                'clouds': {
                    '$avg': '$clouds'
                }, 
                'rain': {
                    '$push': '$rain'
                }, 
                'snow': {
                    '$push': '$snow'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        total_rain = 0
        total_snow = 0

        for r in x['rain']:
            for k, v in r.items():
                rain = v
                hours = int(k.strip('h'))
                total_rain += (rain*hours)
        avg_rain = total_rain/24
        x['rain'] = avg_rain

        for s in x['snow']:
            for k, v in s.items():
                snow = v
                hours = int(k.strip('h'))
                total_snow += (rain*hours)
        avg_snow = total_snow/24
        x['snow'] = avg_snow

        results.append(x)


    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
 
    df["avg_temp"] -= 273
    df["min_temp"] -= 273
    df["max_temp"] -= 273

    return df

In [36]:
async def extract_heatingdemand(collection=collection) -> pd.DataFrame:
    """Extract the daily average of the negative deviation of 14°C = 288°K"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }, 
                'temp': {
                    '$cond': [
                        {
                            '$gte': [
                                '$temp', 200
                            ]
                        }, '$temp', {
                            '$add': [
                                '$temp', 10000
                            ]
                        }
                    ]
                }
            }
        }, {
            '$addFields': {
                'heatingdemand': {
                    '$cond': {
                        'if': {
                            '$lte': [
                                '$temp', 288
                            ]
                        }, 
                        'then': {
                            '$subtract': [
                                288, '$temp'
                            ]
                        }, 
                        'else': 0
                    }
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'avg_demand': {
                    '$avg': '$heatingdemand'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [37]:
async def extract_windpower(collection=collection) -> pd.DataFrame:
    """Extract the daily average of wind-speed**2, which is the equivalent of wind-power"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'windpower': {
                    '$avg': {
                        '$pow': [
                            '$wind_speed', 2
                        ]
                    }
                }
            }
        }
    ]


    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [38]:
df_daily = await(extract_data_daily(collection))
df_heatingdemand = await(extract_heatingdemand(collection))
df_windpower = await(extract_windpower(collection))

In [41]:
df_windpower

Unnamed: 0_level_0,windpower
date,Unnamed: 1_level_1
2023-03-30 00:00:00+00:00,4.127036
2023-03-31 00:00:00+00:00,15.749086
2023-04-01 00:00:00+00:00,18.008197
2023-04-02 00:00:00+00:00,8.844671
2023-04-03 00:00:00+00:00,17.628465
2023-04-04 00:00:00+00:00,16.69763
2023-04-05 00:00:00+00:00,6.471008
2023-04-06 00:00:00+00:00,5.409244
2023-04-07 00:00:00+00:00,5.069079
2023-04-08 00:00:00+00:00,7.277141
