In [1]:
import pandas as pd
import motor.motor_asyncio
from motor.motor_asyncio import AsyncIOMotorClient
from pymongo.server_api import ServerApi

%autoawait asyncio



In [2]:
uri = "mongodb+srv://scientificprogramming:***REMOVED***@scientificprogramming.nzfrli0.mongodb.net/test"
DBclient = AsyncIOMotorClient(uri, server_api=ServerApi('1'))
db = DBclient.data
collection = db.weatherprediction

In [3]:
async def extract_data_daily(collection=collection) -> pd.DataFrame:
    """Extract the daily averages of all the interesting datapoints including hours of daylight and probability
    of the prediction"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }, 
                'sunminutes': {
                    '$dateDiff': {
                        'startDate': '$sunrise', 
                        'endDate': '$sunset', 
                        'unit': 'minute'
                    }
                }, 
                'avg_temp': {
                    '$divide': [
                        {
                            '$add': [
                                '$temp.day', '$temp.eve', '$temp.morn', '$temp.night'
                            ]
                        }, 4
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'sunminutes': {
                    '$avg': '$sunminutes'
                }, 
                'avg_temp': {
                    '$avg': {
                        '$subtract': [
                            '$temp.day', 273
                        ]
                    }
                }, 
                'uvi': {
                    '$avg': '$uvi'
                }, 
                'wind_speed': {
                    '$avg': '$wind_speed'
                }, 
                'clouds': {
                    '$avg': '$clouds'
                }, 
                'rain': {
                    '$sum': '$rain'
                }, 
                'snow': {
                    '$sum': '$snow'
                }, 
                'probability': {
                    '$avg': '$pop'
                }
            }
        }
    ]



    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)


    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    df["snow"] /= 24
    df["rain"] /= 24

    return df

In [4]:
async def extract_heatingdemand(collection=collection) -> pd.DataFrame:
    """Extract the daily average of the negative deviation of 14°C = 288°K"""
    pipeline = [
        {
            '$addFields': {
                'avg_temp': {
                    '$divide': [
                        {
                            '$add': [
                                '$temp.day', '$temp.eve', '$temp.morn', '$temp.night'
                            ]
                        }, 4
                    ]
                }
            }
        }, {
            '$addFields': {
                'heatingdemand': {
                    '$cond': {
                        'if': {
                            '$lte': [
                                '$avg_temp', 288
                            ]
                        }, 
                        'then': {
                            '$subtract': [
                                288, '$avg_temp'
                            ]
                        }, 
                        'else': 0
                    }
                }
            }
        }, {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'avg_demand': {
                    '$avg': '$heatingdemand'
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)


    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [5]:
async def extract_windpower(collection=collection) -> pd.DataFrame:
    """Extract the daily average of wind-speed**2, which is the equivalent of wind-power"""
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$dt', 0, 10
                    ]
                }
            }
        }, {
            '$group': {
                '_id': '$date', 
                'windpower': {
                    '$avg': {
                        '$pow': [
                            '$wind_speed', 2
                        ]
                    }
                }
            }
        }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    
    return df

In [6]:
df_daily = await(extract_data_daily(collection))
df_heatingdemand = await(extract_heatingdemand(collection))
df_windpower = await(extract_windpower(collection))

In [7]:
df_daily

Unnamed: 0_level_0,sunminutes,avg_temp,uvi,wind_speed,clouds,rain,snow,probability
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-05-11 00:00:00+00:00,887.0,0.185,5.0,2.27,100.0,0.498333,0.909583,1.0
