In [6]:
import os
import dotenv

import pymongo
import pandas as pd

In [7]:
def connect_to_db():
    """Open the connection to the DB and return the collection
    Create collection with unique index, if there is not yet one"""
    # Load environment variables from .env file
    dotenv.load_dotenv()
    
    # Get MongoDB-URI
    mongodb_uri = os.getenv("MONGODB_URI")
    DBclient = pymongo.MongoClient(mongodb_uri)
    db = DBclient["MDM-Python-MeinProjekt"]

    return db["Energie"]

In [8]:
def extract_daily_energy():

    collection = connect_to_db()
    
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        },
        {
            '$group': {
                '_id': '$date',
                'wind': {'$avg': '$Wind Onshore Generation'},
                'solar': {'$avg':'$Solar Generation'}, 
                'nuclear': {'$avg':'$Nuclear Generation'}, 
                'water_reservoir': {'$avg':'$Hydro Water Reservoir Generation'}, 
                'water_river': {'$avg':'$Hydro Run-of-river and poundage Generation'}, 
                'water_pump': {'$avg':'$Hydro Pumped Storage Generation'},  
            }
        }
    ]
    
    results = []
    for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index(("_id"))
    df = df.set_index(pd.to_datetime(df.index).rename("date").tz_localize("UTC"))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")
    
    return df

In [9]:
def extract_hourly_energy():
    collection = connect_to_db()

    projection = {
        '_id': False,
        'datetime': "$datetime",
        'wind': '$Wind Onshore Generation',
        'solar': '$Solar Generation', 
        'nuclear': '$Nuclear Generation', 
        'water_reservoir': '$Hydro Water Reservoir Generation', 
        'water_river': '$Hydro Run-of-river and poundage Generation', 
        'water_pump': '$Hydro Pumped Storage Generation', 
    }

    results = collection.find(projection=projection)
    
    df = pd.DataFrame(results)
    df = df.set_index("datetime")
    df = df.set_index(pd.to_datetime(df.index))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")

    return df

In [12]:
df_daily = extract_daily_energy()
df_daily

Unnamed: 0_level_0,wind,solar,nuclear,water_reservoir,water_river,water_pump,total
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-10-14 00:00:00+00:00,10.458333,318.000000,1726.500000,1102.791667,203.291667,695.083333,4056.125000
2021-10-15 00:00:00+00:00,5.958333,218.125000,1726.250000,1298.125000,192.208333,878.250000,4318.916667
2021-10-16 00:00:00+00:00,4.708333,293.416667,1724.958333,722.958333,154.500000,399.708333,3300.250000
2021-10-17 00:00:00+00:00,7.708333,143.458333,1725.708333,482.125000,141.833333,117.000000,2617.833333
2021-10-18 00:00:00+00:00,2.291667,253.333333,1722.583333,990.083333,170.166667,558.000000,3696.458333
...,...,...,...,...,...,...,...
2024-03-08 00:00:00+00:00,0.000000,0.000000,3019.875000,2062.875000,208.541667,799.416667,6090.708333
2024-03-09 00:00:00+00:00,0.000000,0.000000,2926.708333,1399.916667,183.458333,481.166667,4991.250000
2024-03-10 00:00:00+00:00,0.000000,0.000000,3016.541667,1158.583333,172.166667,479.291667,4826.583333
2024-03-11 00:00:00+00:00,0.000000,0.000000,3014.708333,1877.625000,189.666667,989.666667,6071.666667


In [13]:
df_hourly = extract_hourly_energy()
df_hourly

Unnamed: 0_level_0,wind,solar,nuclear,water_reservoir,water_river,water_pump,total
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-10-14 00:00:00,15.0,0.0,1729.0,402.0,156.0,19.0,2321.0
2021-10-14 01:00:00,16.0,0.0,1729.0,453.0,166.0,31.0,2395.0
2021-10-14 02:00:00,15.0,0.0,1729.0,371.0,168.0,48.0,2331.0
2021-10-14 03:00:00,22.0,0.0,1729.0,389.0,164.0,82.0,2386.0
2021-10-14 04:00:00,21.0,0.0,1729.0,924.0,191.0,515.0,3380.0
...,...,...,...,...,...,...,...
2024-03-12 19:00:00,0.0,0.0,3005.0,2437.0,256.0,1872.0,7570.0
2024-03-12 20:00:00,0.0,0.0,3003.0,2228.0,182.0,1297.0,6710.0
2024-03-12 21:00:00,0.0,0.0,3001.0,1789.0,157.0,600.0,5547.0
2024-03-12 22:00:00,0.0,0.0,3004.0,1216.0,156.0,408.0,4784.0
