In [10]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_denorm.drop_collection("gas_station_gas_ttc")

db_mongo_datalake = client_mongo.get_database("datalake")

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
# years = ['2008']

for year in years:
    year = int(year)
    print("YEAR: ", year, " // start_date:", datetime(year, 1, 1), "end_date:", datetime(year+1, 1, 1))
    
    # --- collection french_gas_station ----
    collection_mongo = db_mongo_datalake.get_collection("french_gas_station_price_logs_eur")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Id_station_essence": 1, "Date": 1, "Nom": 1, "Valeur": 1
        }
    )
    df_french_gas_station = pd.DataFrame(list(cursor))

    df_french_gas_station['Date'] = pd.to_datetime(df_french_gas_station['Date'])
    df_french_gas_station = df_french_gas_station.rename(columns={
        "Id_station_essence": "Gas_station_id", "Nom": "Gas_name", "Valeur": "Gas_eur_liter"
    })
    # Combine the gas station prices when same date and same gas name (by mean)
    df_gas_station_gas_ttc = df_french_gas_station.groupby(['Date', 'Gas_name'], as_index=False)['Gas_eur_liter'].mean().round(5)
    
    # Create col for each gas_name (pivot rotate df)
    df_gas_station_gas_ttc = df_gas_station_gas_ttc.pivot(index='Date', columns='Gas_name', values='Gas_eur_liter')
    df_gas_station_gas_ttc = df_gas_station_gas_ttc.rename(columns=lambda x: f"ttc_gas_station_gas_{x.upper()}_eur_liter").reset_index()
#     print("df_french_gas_station\n", df_french_gas_station)
    
    # always have all existing columns
    all_fuel_name_existing = [
        "ttc_gas_station_gas_E10_eur_liter",
        "ttc_gas_station_gas_E85_eur_liter",
        "ttc_gas_station_gas_GPLC_eur_liter",
        "ttc_gas_station_gas_GAZOLE_eur_liter",
        "ttc_gas_station_gas_SP95_eur_liter",
        "ttc_gas_station_gas_SP98_eur_liter"
    ]
    for col in all_fuel_name_existing:
        if col not in df_gas_station_gas_ttc.columns:
            df_gas_station_gas_ttc[col] = float('nan')
    
    print("df_gas_station_gas_ttc")
    print(df_gas_station_gas_ttc.head())
    
    
    # --- collection stockmarket_brent_eur ----
    collection_mongo = db_mongo_datalake.get_collection("stockmarket_brent_eur")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Date": 1, "Close": 1
        }
    )
    stockmarket_brent_eur = pd.DataFrame(list(cursor))

    stockmarket_brent_eur['Date'] = pd.to_datetime(stockmarket_brent_eur['Date'])
    stockmarket_brent_eur = stockmarket_brent_eur.rename(columns={"Close": "Brent_eur_barrel"})
    # Barrel contains 158,987 liters
    stockmarket_brent_eur['BRENT_eur_liter'] = (stockmarket_brent_eur['Brent_eur_barrel'] / 158.987).round(5)
    print("stockmarket_brent_eur")
    print(stockmarket_brent_eur.head())
    
    # Joined
    df_joined = pd.merge(
        df_gas_station_gas_ttc,
        stockmarket_brent_eur,
        on='Date',
        how='left'
    )
    df_joined = df_joined.drop(columns=['Brent_eur_barrel'])
    print("df_joined\n", df_joined.head())
    
    # ----- Push to MongoDB -----
    collection_mongo = db_mongo_denorm.get_collection("gas_station_gas_ttc")
    collection_mongo.create_index([("Date", pymongo.ASCENDING)])

    records = df_joined.to_dict(orient="records")
    collection_mongo.insert_many(records)
    print("correctly loaded", year," gas_station_gas_ttc to denormalized collection MongoDB")

YEAR:  2007  // start_date: 2007-01-01 00:00:00 end_date: 2008-01-01 00:00:00
df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E85_eur_liter  \
0        2007-01-01                                NaN   
1        2007-01-02                                NaN   
2        2007-01-03                                NaN   
3        2007-01-04                                NaN   
4        2007-01-05                                NaN   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                        NaN   
1                                        NaN   
2                                        NaN   
3                                        NaN   
4                                        NaN   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0                                      1.10647   
1                                      1.00571   
2                                      1.00645   
3                                      1.02514   
4          

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2010-01-01                            1.27211   
1        2010-01-02                            1.24007   
2        2010-01-03                            1.28735   
3        2010-01-04                            1.26830   
4        2010-01-05                            1.28669   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.89467   
1                                   0.84040   
2                                   0.84445   
3                                   0.84833   
4                                   0.86800   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    0.69638   
1                                    0.65644   
2                                    0.70745   
3                                    0.68086   
4                                    0.69397   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2013-01-01                            1.46829   
1        2013-01-02                            1.47276   
2        2013-01-03                            1.50348   
3        2013-01-04                            1.50624   
4        2013-01-05                            1.52166   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.94929   
1                                   0.92526   
2                                   0.93361   
3                                   0.93130   
4                                   0.92733   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    0.93813   
1                                    0.86400   
2                                    0.87958   
3                                    0.88060   
4                                    0.87512   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2016-01-01                            1.29030   
1        2016-01-02                            1.23477   
2        2016-01-03                            1.25684   
3        2016-01-04                            1.24156   
4        2016-01-05                            1.24829   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.71453   
1                                   0.71047   
2                                   0.70943   
3                                   0.71720   
4                                   0.71444   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    0.74580   
1                                    0.72849   
2                                    0.74048   
3                                    0.72928   
4                                    0.73290   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2019-01-01                            1.39991   
1        2019-01-02                            1.37236   
2        2019-01-03                            1.37276   
3        2019-01-04                            1.36540   
4        2019-01-05                            1.38231   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.69116   
1                                   0.68570   
2                                   0.68941   
3                                   0.68603   
4                                   0.68511   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    0.87050   
1                                    0.84696   
2                                    0.85714   
3                                    0.86185   
4                                    0.85945   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2022-01-01                            1.66666   
1        2022-01-02                            1.66048   
2        2022-01-03                            1.63338   
3        2022-01-04                            1.64766   
4        2022-01-05                            1.64934   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.73742   
1                                   0.73834   
2                                   0.73136   
3                                   0.73666   
4                                   0.73861   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    0.90897   
1                                    0.90936   
2                                    0.89528   
3                                    0.90954   
4                                    0.90140   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     

df_gas_station_gas_ttc
Gas_name       Date  ttc_gas_station_gas_E10_eur_liter  \
0        2025-01-01                            1.75098   
1        2025-01-02                            1.73692   
2        2025-01-03                            1.74349   
3        2025-01-04                            1.75123   
4        2025-01-05                            1.76463   

Gas_name  ttc_gas_station_gas_E85_eur_liter  \
0                                   0.79343   
1                                   0.78582   
2                                   0.79075   
3                                   0.79112   
4                                   0.79195   

Gas_name  ttc_gas_station_gas_GPLC_eur_liter  \
0                                    1.00802   
1                                    1.00151   
2                                    1.01011   
3                                    1.00917   
4                                    1.01206   

Gas_name  ttc_gas_station_gas_GAZOLE_eur_liter  \
0     