In [2]:
import pandas as pd
from motor.motor_asyncio import AsyncIOMotorClient
from pymongo.server_api import ServerApi



In [3]:
uri = "mongodb+srv://scientificprogramming:***REMOVED***@scientificprogramming.nzfrli0.mongodb.net/test"
DBclient = AsyncIOMotorClient(uri, server_api=ServerApi('1'))
db = DBclient.data
collection = db.wetter2

In [15]:
async def extract_data_daily(collection=collection) -> pd.DataFrame:
    """Extract the daily averages of all the interesting datapoints"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, 
        {
            '$group': {
                '_id': '$date', 
                'avg_temp': {
                    '$avg': '$temp_C'
                }, 
                'min_temp': {
                    '$min': '$temp_C'
                }, 
                'max_temp': {
                    '$max': '$temp_C'
                }, 
                'rain': {
                    '$avg': '$rain_mm'
                }, 
                'wind_speed': {
                    '$avg': '$wind_kmh'
                }, 
                'clouds': {
                    '$avg': '$cloud_percent'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    df["wind_speed"] /= 3.6
    
    return df

In [16]:
df = await extract_data_daily()
df

Unnamed: 0_level_0,temp_C,min_temp,max_temp,rain,wind_speed,clouds
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00+00:00,-5.450073,-22.5,3.1,0.135892,1.965115,68.975146
2021-01-02 00:00:00+00:00,-5.371462,-19.9,4.2,0.072484,2.147537,60.567610
2021-01-03 00:00:00+00:00,-5.525786,-23.2,3.5,0.107154,1.369213,78.767296
2021-01-04 00:00:00+00:00,-6.982469,-24.3,2.9,0.004638,1.580953,45.385220
2021-01-05 00:00:00+00:00,-7.139230,-25.6,1.7,0.004796,1.475432,52.488994
...,...,...,...,...,...,...
2023-04-20 00:00:00+00:00,3.027055,-8.5,25.1,0.550228,1.530996,77.523973
2023-04-21 00:00:00+00:00,5.021347,-6.5,22.9,0.525228,1.807379,65.139269
2023-04-22 00:00:00+00:00,6.496007,-7.8,20.7,0.209606,2.035156,55.269097
2023-04-23 00:00:00+00:00,7.318287,-7.6,19.2,0.681134,1.632877,77.428819


In [19]:
async def extract_heatingdemand(collection=collection) -> pd.DataFrame:
    """Extract the daily average of the negative deviation of 14°C = 288°K"""
    pipeline = [
        {
            '$addFields': {
                'heatingdemand': {
                    '$cond': {
                        'if': {
                            '$lte': [
                                '$temp_C', 14
                            ]
                        }, 
                        'then': {
                            '$subtract': [
                                14, '$temp_C'
                            ]
                        }, 
                        'else': 0
                    }
                }
            }
        }, {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'avg_demand': {
                    '$avg': '$heatingdemand'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")
    
    return df

In [17]:
async def extract_windpower(collection=collection) -> pd.DataFrame:
    """Extract the daily average of wind-speed**2, which is the equivalent of wind-power"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'windpower': {
                    '$avg': {
                        '$pow': [
                            {
                                '$divide': [
                                    "$wind_kmh",3.6
                                ]
                            }, 2]
                    }
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")
    
    return df

In [22]:
df_1 = await extract_data_daily(collection)
df_2 = await extract_heatingdemand(collection)
df_3 = await extract_windpower(collection)

In [25]:
df_3

Unnamed: 0_level_0,windpower,total
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-01 00:00:00+00:00,60.260249,60.260249
2021-01-02 00:00:00+00:00,77.537044,77.537044
2021-01-03 00:00:00+00:00,30.938734,30.938734
2021-01-04 00:00:00+00:00,38.524646,38.524646
2021-01-05 00:00:00+00:00,38.531454,38.531454
...,...,...
2023-04-20 00:00:00+00:00,38.433898,38.433898
2023-04-21 00:00:00+00:00,53.812666,53.812666
2023-04-22 00:00:00+00:00,72.302402,72.302402
2023-04-23 00:00:00+00:00,53.583345,53.583345
