In [2]:
import pandas as pd

from sp_project.data_preparation.db_client import get_global_db_client

%autoawait asyncio



In [3]:
async def extract_data_daily() -> pd.DataFrame:
    """Extract the daily averages of all the interesting datapoints"""
    
    collection = get_global_db_client().wetter2
    
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, 
        {
            '$group': {
                '_id': '$date', 
                'avg_temp': {
                    '$avg': '$temp_C'
                }, 
                'min_temp': {
                    '$min': '$temp_C'
                }, 
                'max_temp': {
                    '$max': '$temp_C'
                }, 
                'rain': {
                    '$avg': '$rain_mm'
                }, 
                'wind_speed': {
                    '$avg': '$wind_kmh'
                }, 
                'clouds': {
                    '$avg': '$cloud_percent'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    df["wind_speed"] /= 3.6
    
    return df

In [4]:
async def extract_heatingdemand() -> pd.DataFrame:
    """Extract the daily average of the negative deviation of 14°C = 288°K"""
    
    collection = get_global_db_client().wetter2
    
    pipeline = [
        {
            '$addFields': {
                'heatingdemand': {
                    '$cond': {
                        'if': {
                            '$lte': [
                                '$temp_C', 14
                            ]
                        }, 
                        'then': {
                            '$subtract': [
                                14, '$temp_C'
                            ]
                        }, 
                        'else': 0
                    }
                }
            }
        }, {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'heating_demand': {
                    '$avg': '$heatingdemand'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [5]:
async def extract_windpower() -> pd.DataFrame:
    """Extract the daily average of wind-speed**2, which is the equivalent of wind-power"""
    
    collection = get_global_db_client().wetter2
    
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'windpower': {
                    '$avg': {
                        '$pow': [
                            {
                                '$divide': [
                                    "$wind_kmh",3.6
                                ]
                            }, 2]
                    }
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [6]:
df_1 = await extract_data_daily()
df_2 = await extract_heatingdemand()
df_3 = await extract_windpower()

In [7]:
df_3

Unnamed: 0_level_0,windpower,total
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01 00:00:00+00:00,0.723493,0.723493
2019-01-02 00:00:00+00:00,3.726806,3.726806
2019-01-03 00:00:00+00:00,2.259182,2.259182
2019-01-04 00:00:00+00:00,2.404820,2.404820
2019-01-05 00:00:00+00:00,1.214024,1.214024
...,...,...
2023-05-02 00:00:00+00:00,6.620124,6.620124
2023-05-03 00:00:00+00:00,4.695109,4.695109
2023-05-04 00:00:00+00:00,1.878311,1.878311
2023-05-05 00:00:00+00:00,4.525206,4.525206
