In [1]:
import pandas as pd

from sp_project.data_preparation.db_client import get_global_db_client

%autoawait asyncio



In [3]:
client = get_global_db_client()
type(client)

motor.motor_asyncio.AsyncIOMotorDatabase

In [10]:
collection = get_global_db_client().entsoe

In [11]:
db_field_projection = {
    'wind': '$Wind Onshore Generation', 
    'solar': '$Solar Generation', 
    'nuclear': '$Nuclear Generation', 
    'water_reservoir': '$Hydro Water Reservoir Generation', 
    'water_river': '$Hydro Run-of-river and poundage Generation', 
    'water_pump': '$Hydro Pumped Storage Generation',   
}

In [12]:
async def extract_energy_data_daily(collection=collection) -> pd.DataFrame:
    """Extract the daily average of all the data"""

    pipeline = [
    {
        '$addFields': {
            'date': {
                '$substr': [
                    '$datetime', 0, 10
                ]
            }
        }
    }, {
        '$group': {
            '_id': '$date', 
            **{k: {'$avg': v} for k, v in db_field_projection.items()},
        }
    }
    ]

    results=[]
    async for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index("_id")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC").rename("date"))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")

    return df

In [13]:
async def extract_energy_data_raw(collection=collection) -> pd.DataFrame:
    """Extract all the data"""
    
    projection={
        '_id': False,
        'datetime': "$datetime",
        **db_field_projection,
    }

    results= await collection.find(projection=projection).to_list(None)
    
    df = pd.DataFrame(results)
    df = df.set_index("datetime")
    df = df.set_index(pd.to_datetime(df.index).tz_localize("UTC"))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")

    return df

In [14]:
df_1 = await extract_energy_data_daily(collection)
df_2 = await extract_energy_data_raw(collection)

In [15]:
df_2

Unnamed: 0_level_0,wind,solar,nuclear,water_reservoir,water_river,water_pump,total
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00+00:00,15.0,0.0,3006.0,253.0,116.0,238.0,3628.0
2021-01-01 01:00:00+00:00,11.0,0.0,3006.0,224.0,115.0,158.0,3514.0
2021-01-01 02:00:00+00:00,9.0,0.0,3005.0,197.0,116.0,98.0,3425.0
2021-01-01 03:00:00+00:00,4.0,0.0,3005.0,169.0,113.0,73.0,3364.0
2021-01-01 04:00:00+00:00,4.0,0.0,3003.0,181.0,121.0,88.0,3397.0
...,...,...,...,...,...,...,...
2023-05-05 22:00:00+00:00,,,1714.0,849.0,213.0,846.0,3622.0
2023-05-05 23:00:00+00:00,,,1714.0,556.0,190.0,355.0,2815.0
2023-05-06 00:00:00+00:00,,,1715.0,433.0,186.0,199.0,2533.0
2023-05-06 01:00:00+00:00,,,1716.0,325.0,197.0,248.0,2486.0
