In [13]:
import os
import dotenv

import pymongo
import pandas as pd

In [14]:
def connect_to_db():
    """Open the connection to the DB and return the collection
    Create collection with unique index, if there is not yet one"""
    # Load environment variables from .env file
    dotenv.load_dotenv()
    
    # Get MongoDB-URI
    mongodb_uri = os.getenv("MONGODB_URI")
    DBclient = pymongo.MongoClient(mongodb_uri)
    db = DBclient["MDM-Python-MeinProjekt"]

    return db["Energie"]

In [40]:
def extract_daily_average_energy():

    collection = connect_to_db()
    
    pipeline = [
        {
            '$addFields': {
                'date': {
                    '$substr': [
                        '$datetime', 0, 10
                    ]
                }
            }
        },
        {
            '$group': {
                '_id': '$date',
                'wind': {'$avg': '$Wind Onshore Generation'},
                'solar': {'$avg':'$Solar Generation'}, 
                'nuclear': {'$avg':'$Nuclear Generation'}, 
                'water_reservoir': {'$avg':'$Hydro Water Reservoir Generation'}, 
                'water_river': {'$avg':'$Hydro Run-of-river and poundage Generation'}, 
                'water_pump': {'$avg':'$Hydro Pumped Storage Generation'},  
            }
        }
    ]
    
    results = []
    for x in collection.aggregate(pipeline):
        results.append(x)
    
    df = pd.DataFrame(results)
    df = df.set_index(("_id"))
    df = df.sort_index()
    df.index = df.index.rename("date")
    df["total"] = df.sum(axis="columns")
    
    return df

In [41]:
def extract_daily_raw_energy():
    collection = connect_to_db()

    projection = {
        '_id': False,
        'datetime': "$datetime",
        'wind': '$Wind Onshore Generation',
        'solar': '$Solar Generation', 
        'nuclear': '$Nuclear Generation', 
        'water_reservoir': '$Hydro Water Reservoir Generation', 
        'water_river': '$Hydro Run-of-river and poundage Generation', 
        'water_pump': '$Hydro Pumped Storage Generation', 
    }

    results = collection.find(projection=projection)
    
    df = pd.DataFrame(results)
    df = df.set_index("datetime")
    df = df.set_index(pd.to_datetime(df.index))
    df = df.sort_index()
    df["total"] = df.sum(axis="columns")

    return df

In [42]:
df = extract_daily_average_energy()
df

Unnamed: 0_level_0,wind,solar,nuclear,water_reservoir,water_river,water_pump,total
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-10,46.166667,155.333333,3004.916667,1189.750000,157.416667,672.875000,5226.458333
2023-03-11,10.333333,173.083333,3017.791667,848.333333,141.833333,498.500000,4689.875000
2023-03-12,34.416667,296.166667,3008.458333,378.875000,135.666667,244.708333,4098.291667
2023-03-13,49.875000,441.250000,2992.833333,439.958333,160.166667,273.458333,4357.541667
2023-03-14,47.208333,210.666667,3009.125000,811.916667,212.625000,675.083333,4966.625000
...,...,...,...,...,...,...,...
2024-03-05,0.000000,0.000000,3015.666667,2185.625000,119.166667,1374.458333,6694.916667
2024-03-06,0.000000,272.041667,3018.625000,2351.375000,127.041667,1372.166667,7141.250000
2024-03-07,0.000000,222.833333,3019.666667,2151.833333,170.291667,1097.750000,6662.375000
2024-03-08,0.000000,0.000000,3019.875000,2062.875000,208.541667,799.416667,6090.708333
