In [2]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_denorm.drop_collection("compare_french_gas_to_stockmarket")

db_mongo_datalake = client_mongo.get_database("datalake")

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
# years = ['2007']


for year in years:
    year = int(year)
    print("YEAR: ", year, " // start_date:", datetime(year, 1, 1), "end_date:", datetime(year+1, 1, 1))
    
    # --- collection french_gas_station ----
    collection_mongo = db_mongo_datalake.get_collection("french_gas_station_price_logs_eur")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Id_station_essence": 1, "Date": 1, "Nom": 1, "Valeur": 1
        }
    )
    df_french_gas_station = pd.DataFrame(list(cursor))

    df_french_gas_station['Date'] = pd.to_datetime(df_french_gas_station['Date'])
    df_french_gas_station = df_french_gas_station.rename(columns={
        "Id_station_essence": "Gas_station_id", "Nom": "Gas_name", "Valeur": "Gas_eur_liter"
    })
    # Combine the gas station prices when same date and same gas name (by mean)
    df_french_gas_station = df_french_gas_station.groupby(['Date', 'Gas_name'], as_index=False)['Gas_eur_liter'].mean().round(5)
    
    # Create col for each gas_name (pivot rotate df)
    df_french_gas_station = df_french_gas_station.pivot(index='Date', columns='Gas_name', values='Gas_eur_liter')
    df_french_gas_station = df_french_gas_station.rename(columns=lambda x: f"Gas_{x.upper()}_eur_liter").reset_index()
#     print("df_french_gas_station\n", df_french_gas_station)
    
    # always have all existing columns
    all_fuel_name_existing = [
        "Gas_E10_eur_liter",
        "Gas_E85_eur_liter",
        "Gas_GPLC_eur_liter",
        "Gas_GAZOLE_eur_liter",
        "Gas_SP95_eur_liter",
        "Gas_SP98_eur_liter"
    ]
    for col in all_fuel_name_existing:
        if col not in df_french_gas_station.columns:
            df_french_gas_station[col] = float('nan')
    
    print("df_french_gas_station")
    print(df_french_gas_station.head())


    # --- collection stockmarket_brent_usd ----
    collection_mongo = db_mongo_datalake.get_collection("stockmarket_brent_usd")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Date": 1, "Close": 1
        }
    )
    stockmarket_brent_usd = pd.DataFrame(list(cursor))

    stockmarket_brent_usd['Date'] = pd.to_datetime(stockmarket_brent_usd['Date'])
    stockmarket_brent_usd = stockmarket_brent_usd.rename(columns={"Close": "Brent_usd_barrel"})
    print("stockmarket_brent_usd")
    print(stockmarket_brent_usd.head())


    # --- collection stockmarket_eur_usd ----
    collection_mongo = db_mongo_datalake.get_collection("stockmarket_eur_usd")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)} 
        },
        {
            "_id": 0, "Date": 1, "Close": 1
        }
    )
    stockmarket_eur_usd = pd.DataFrame(list(cursor))

    stockmarket_eur_usd['Date'] = pd.to_datetime(stockmarket_eur_usd['Date'])
    stockmarket_eur_usd = stockmarket_eur_usd.rename(columns={"Close": "Eur_usd"})
    print("stockmarket_eur_usd")
    print(stockmarket_eur_usd.head())

    # --- join the 3 collections ----
    df_joined = pd.merge(
        df_french_gas_station,
        stockmarket_brent_usd,
        on='Date',
        how='left'
    )

    # Join 2 : avec le taux de change EUR/USD
    df_joined = pd.merge(
        df_joined,
        stockmarket_eur_usd,
        on='Date',
        how='left'
    )
    df_result = df_joined.copy()
    print("df_result after all the joined")
    print(df_result.head())

    # ---- Create column Brent_eur_liter -----
    df_result['Brent_eur_barrel'] = df_result['Brent_usd_barrel'] / df_result['Eur_usd']
    # Barrel contains 158,987 liters
    df_result['BRENT_eur_liter'] = (df_result['Brent_eur_barrel'] / 158.987).round(5)
    print("df_result after create BRENT_eur_liter")
    print(df_result.head())

    df_clean = df_result.drop(columns=['Brent_usd_barrel', 'Eur_usd', 'Brent_eur_barrel'])
    print("df_clean len=", len(df_clean))
    print(df_clean.head(10))

    # ----- Push to MongoDB -----
    collection_mongo = db_mongo_denorm.get_collection("compare_french_gas_to_stockmarket")
    collection_mongo.create_index([("Date", pymongo.ASCENDING)])

    records = df_clean.to_dict(orient="records")
    collection_mongo.insert_many(records)
    print("correctly loaded", year,"denormalized datas to MongoDB")

YEAR:  2007  // start_date: 2007-01-01 00:00:00 end_date: 2008-01-01 00:00:00
df_french_gas_station
Gas_name       Date  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2007-01-01                NaN                 NaN   
1        2007-01-02                NaN                 NaN   
2        2007-01-03                NaN                 NaN   
3        2007-01-04                NaN                 NaN   
4        2007-01-05                NaN                 NaN   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_E10_eur_liter  \
0                      1.10647             1.24179                NaN   
1                      1.00571             1.17889                NaN   
2                      1.00645             1.18080                NaN   
3                      1.02514             1.18936                NaN   
4                      1.00072             1.17936                NaN   

Gas_name  Gas_SP98_eur_liter  
0                        NaN  
1                        N

df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2009-01-01                NaN                NaN             0.69737   
1        2009-01-02                NaN            0.83753             0.70053   
2        2009-01-03                NaN            0.83622             0.69460   
3        2009-01-04                NaN            0.84833             0.71425   
4        2009-01-05                NaN            0.83705             0.69463   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      0.99215             1.07377                 NaN  
1                      0.92289             1.03605                 NaN  
2                      0.92131             1.03783                 NaN  
3                      0.93677             1.05622                 NaN  
4                      0.93160             1.04988                 NaN  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2011-01-01            1.43855            0.85000             0.82688   
1        2011-01-02            1.49219            0.85000             0.81986   
2        2011-01-03            1.41594            0.83884             0.78407   
3        2011-01-04            1.44049            0.84095             0.82073   
4        2011-01-05            1.45294            0.84484             0.82335   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.26719             1.45732                 NaN  
1                      1.24115             1.44331                 NaN  
2                      1.24210             1.43745                 NaN  
3                      1.25581             1.44560                 NaN  
4                      1.25613             1.44767                 NaN  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2013-01-01            1.46829            0.94929             0.93813   
1        2013-01-02            1.47276            0.92526             0.86400   
2        2013-01-03            1.50348            0.93361             0.87958   
3        2013-01-04            1.50624            0.93130             0.88060   
4        2013-01-05            1.52166            0.92733             0.87512   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.34179             1.58390                 NaN  
1                      1.34186             1.51233                 NaN  
2                      1.34925             1.51556                 NaN  
3                      1.35424             1.52499                 NaN  
4                      1.35351             1.52556                 NaN  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2015-01-01            1.36229            0.94394             0.92184   
1        2015-01-02            1.26752            0.89315             0.79669   
2        2015-01-03            1.28381            0.86694             0.81314   
3        2015-01-04            1.28918            0.94410             0.87938   
4        2015-01-05            1.27091            0.89919             0.79622   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.20988             1.36034             1.40672  
1                      1.12315             1.28761             1.33447  
2                      1.12272             1.28723             1.33085  
3                      1.13079             1.29399             1.34595  
4                      1.12548             1.28719             1.33356  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2017-01-01            1.37712            0.68750             0.72194   
1        2017-01-02            1.37046            0.70997             0.70951   
2        2017-01-03            1.37175            0.70720             0.71387   
3        2017-01-04            1.36878            0.70734             0.71450   
4        2017-01-05            1.37816            0.70333             0.71860   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.26727             1.48197             1.46940  
1                      1.25477             1.40742             1.44889  
2                      1.25816             1.40860             1.45104  
3                      1.25794             1.41306             1.45145  
4                      1.26513             1.41293             1.46389  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2019-01-01            1.39991            0.69116             0.87050   
1        2019-01-02            1.37236            0.68570             0.84696   
2        2019-01-03            1.37276            0.68941             0.85714   
3        2019-01-04            1.36540            0.68603             0.86185   
4        2019-01-05            1.38231            0.68511             0.85945   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.41563             1.49858             1.49392  
1                      1.37816             1.39906             1.45679  
2                      1.37765             1.40047             1.46019  
3                      1.36659             1.40436             1.45027  
4                      1.37274             1.40247             1.46473  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2021-01-01            1.39303            0.69628             0.85642   
1        2021-01-02            1.34989            0.68052             0.84112   
2        2021-01-03            1.37511            0.69601             0.85525   
3        2021-01-04            1.35605            0.68287             0.83911   
4        2021-01-05            1.36755            0.68892             0.84814   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.32152             1.40044             1.49395  
1                      1.26886             1.36858             1.42101  
2                      1.29172             1.36626             1.45464  
3                      1.27815             1.37478             1.42902  
4                      1.28583             1.38040             1.44192  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2023-01-01            1.81163            1.11720             0.99690   
1        2023-01-02            1.79120            1.05788             0.97955   
2        2023-01-03            1.82532            1.10812             0.98791   
3        2023-01-04            1.83633            1.11374             0.99044   
4        2023-01-05            1.84161            1.12419             0.98646   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.91737             1.82890             1.91196  
1                      1.89080             1.82390             1.87265  
2                      1.90824             1.84849             1.91223  
3                      1.91533             1.85488             1.92080  
4                      1.91518             1.86191             1.92452  
stockmarket_brent_usd
        Date  Brent_usd_barrel


df_french_gas_station
Gas_name       Date  Gas_E10_eur_liter  Gas_E85_eur_liter  Gas_GPLC_eur_liter  \
0        2025-01-01            1.75098            0.79343             1.00802   
1        2025-01-02            1.73692            0.78582             1.00151   
2        2025-01-03            1.74349            0.79075             1.01011   
3        2025-01-04            1.75123            0.79112             1.00917   
4        2025-01-05            1.76463            0.79195             1.01206   

Gas_name  Gas_GAZOLE_eur_liter  Gas_SP95_eur_liter  Gas_SP98_eur_liter  
0                      1.68511             1.77197             1.86130  
1                      1.66950             1.77162             1.84131  
2                      1.67498             1.77232             1.84697  
3                      1.68353             1.76892             1.85840  
4                      1.69999             1.77121             1.87177  
stockmarket_brent_usd
        Date  Brent_usd_barrel
