In [5]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_denorm.drop_collection("station_ttc_gas_eur_liter")

db_mongo_datalake = client_mongo.get_database("datalake")

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
# years = ['2008']

for year in years:
    year = int(year)
    print("YEAR: ", year, " // start_date:", datetime(year, 1, 1), "end_date:", datetime(year+1, 1, 1))
    
    # --- collection french_gas_station ----
    collection_mongo = db_mongo_datalake.get_collection("french_gas_station_price_logs_eur")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Id_station_essence": 1, "Date": 1, "Nom": 1, "Valeur": 1
        }
    )
    df_french_gas_station = pd.DataFrame(list(cursor))

    df_french_gas_station['Date'] = pd.to_datetime(df_french_gas_station['Date'])
    df_french_gas_station = df_french_gas_station.rename(columns={
        "Id_station_essence": "Gas_station_id", "Nom": "Gas_name", "Valeur": "Gas_eur_liter"
    })
    # Combine the gas station prices when same date and same gas name (by mean)
    df_station_ttc_gas_eur_liter = df_french_gas_station.groupby(['Date', 'Gas_name'], as_index=False)['Gas_eur_liter'].mean().round(5)
    
    # Create col for each gas_name (pivot rotate df)
    df_station_ttc_gas_eur_liter = df_station_ttc_gas_eur_liter.pivot(index='Date', columns='Gas_name', values='Gas_eur_liter')
    df_station_ttc_gas_eur_liter = df_station_ttc_gas_eur_liter.rename(
        columns=lambda x: f"station_ttc_{x.upper()}_eur_liter").reset_index()
#     print("df_station_ttc_gas_eur_liter\n", df_station_ttc_gas_eur_liter)
    
    # always have all existing columns
    all_fuel_name_existing = [
        "station_ttc_E10_eur_liter",
        "station_ttc_E85_eur_liter",
        "station_ttc_GPLC_eur_liter",
        "station_ttc_GAZOLE_eur_liter",
        "station_ttc_SP95_eur_liter",
        "station_ttc_SP98_eur_liter"
    ]
    for col in all_fuel_name_existing:
        if col not in df_station_ttc_gas_eur_liter.columns:
            df_station_ttc_gas_eur_liter[col] = float('nan')
    
    print("df_station_ttc_gas_eur_liter\n", df_station_ttc_gas_eur_liter.head())
    
    # ----- Push to MongoDB -----
    collection_mongo = db_mongo_denorm.get_collection("station_ttc_gas_eur_liter")
    collection_mongo.create_index([("Date", pymongo.ASCENDING)])

    records = df_station_ttc_gas_eur_liter.to_dict(orient="records")
    collection_mongo.insert_many(records)
    print("correctly loaded", year," df_station_ttc_gas_eur_liter to denormalized collection MongoDB")

YEAR:  2007  // start_date: 2007-01-01 00:00:00 end_date: 2008-01-01 00:00:00
df_station_ttc_gas_eur_liter
 Gas_name       Date  station_ttc_E85_eur_liter  station_ttc_GPLC_eur_liter  \
0        2007-01-01                        NaN                         NaN   
1        2007-01-02                        NaN                         NaN   
2        2007-01-03                        NaN                         NaN   
3        2007-01-04                        NaN                         NaN   
4        2007-01-05                        NaN                         NaN   

Gas_name  station_ttc_GAZOLE_eur_liter  station_ttc_SP95_eur_liter  \
0                              1.10647                     1.24179   
1                              1.00571                     1.17889   
2                              1.00645                     1.18080   
3                              1.02514                     1.18936   
4                              1.00072                     1.17936   

Ga

df_station_ttc_gas_eur_liter
 Gas_name       Date  station_ttc_E10_eur_liter  station_ttc_E85_eur_liter  \
0        2013-01-01                    1.46829                    0.94929   
1        2013-01-02                    1.47276                    0.92526   
2        2013-01-03                    1.50348                    0.93361   
3        2013-01-04                    1.50624                    0.93130   
4        2013-01-05                    1.52166                    0.92733   

Gas_name  station_ttc_GPLC_eur_liter  station_ttc_GAZOLE_eur_liter  \
0                            0.93813                       1.34179   
1                            0.86400                       1.34186   
2                            0.87958                       1.34925   
3                            0.88060                       1.35424   
4                            0.87512                       1.35351   

Gas_name  station_ttc_SP95_eur_liter  station_ttc_SP98_eur_liter  
0                  

df_station_ttc_gas_eur_liter
 Gas_name       Date  station_ttc_E10_eur_liter  station_ttc_E85_eur_liter  \
0        2019-01-01                    1.39991                    0.69116   
1        2019-01-02                    1.37236                    0.68570   
2        2019-01-03                    1.37276                    0.68941   
3        2019-01-04                    1.36540                    0.68603   
4        2019-01-05                    1.38231                    0.68511   

Gas_name  station_ttc_GPLC_eur_liter  station_ttc_GAZOLE_eur_liter  \
0                            0.87050                       1.41563   
1                            0.84696                       1.37816   
2                            0.85714                       1.37765   
3                            0.86185                       1.36659   
4                            0.85945                       1.37274   

Gas_name  station_ttc_SP95_eur_liter  station_ttc_SP98_eur_liter  
0                  

df_station_ttc_gas_eur_liter
 Gas_name       Date  station_ttc_E10_eur_liter  station_ttc_E85_eur_liter  \
0        2025-01-01                    1.75098                    0.79343   
1        2025-01-02                    1.73692                    0.78582   
2        2025-01-03                    1.74349                    0.79075   
3        2025-01-04                    1.75123                    0.79112   
4        2025-01-05                    1.76463                    0.79195   

Gas_name  station_ttc_GPLC_eur_liter  station_ttc_GAZOLE_eur_liter  \
0                            1.00802                       1.68511   
1                            1.00151                       1.66950   
2                            1.01011                       1.67498   
3                            1.00917                       1.68353   
4                            1.01206                       1.69999   

Gas_name  station_ttc_SP95_eur_liter  station_ttc_SP98_eur_liter  
0                  