In [9]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_denorm.drop_collection("spreads_ttc_gas_station_gas_brent")

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
# years = ['2007']


for year in years:
    year = int(year)
    print("YEAR: ", year, " // start_date:", datetime(year, 1, 1), "end_date:", datetime(year+1, 1, 1))
    
    # --- collection french_gas_station ----
    collection_mongo = db_mongo_denorm.get_collection("gas_station_gas_ttc")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Date": 1, "ttc_gas_station_gas_GAZOLE_eur_liter": 1,
            "ttc_gas_station_gas_SP95_eur_liter": 1, "ttc_gas_station_gas_SP98_eur_liter": 1,
            "ttc_gas_station_gas_E85_eur_liter": 1, "ttc_gas_station_gas_E10_eur_liter": 1,
            "ttc_gas_station_gas_GPLC_eur_liter": 1,"BRENT_eur_liter": 1
        }
    ) 
    df_gas_prices = pd.DataFrame(list(cursor))
    df_gas_prices = df_gas_prices[df_gas_prices['BRENT_eur_liter'].notna()].reset_index(drop=True)
    
    df_spreads_gas_brent = pd.DataFrame()
    df_spreads_gas_brent['Date'] = pd.to_datetime(df_gas_prices['Date'])
    
    df_spreads_gas_brent['Spread_ttc_GAZOLE_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_GAZOLE_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    df_spreads_gas_brent['Spread_ttc_SP95_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_SP95_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    df_spreads_gas_brent['Spread_ttc_SP98_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_SP98_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    df_spreads_gas_brent['Spread_ttc_E85_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_E85_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    df_spreads_gas_brent['Spread_ttc_E10_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_E10_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    df_spreads_gas_brent['Spread_ttc_GPLC_BRENT'] = (
        df_gas_prices['ttc_gas_station_gas_GPLC_eur_liter'] - df_gas_prices['BRENT_eur_liter']
    ).round(5)
    
    print("df_spreads_gas_brent\n", df_spreads_gas_brent)
    
    
    # ----- Push to MongoDB -----
    collection_mongo = db_mongo_denorm.get_collection("spreads_ttc_gas_station_gas_brent")
    collection_mongo.create_index([("Date", pymongo.ASCENDING)])

    records = df_spreads_gas_brent.to_dict(orient="records")
    collection_mongo.insert_many(records)
    print("correctly loaded", year,"spreads_gas_brent denormalized datas to MongoDB")
    

YEAR:  2007  // start_date: 2007-01-01 00:00:00 end_date: 2008-01-01 00:00:00
df_spreads_gas_brent
           Date  Spread_ttc_GAZOLE_BRENT  Spread_ttc_SP95_BRENT  \
0   2007-07-30                  0.71976                0.91788   
1   2007-07-31                  0.71446                0.91460   
2   2007-08-01                  0.72089                0.92039   
3   2007-08-02                  0.71234                0.90178   
4   2007-08-03                  0.72238                0.91214   
..         ...                      ...                    ...   
99  2007-12-21                  0.77882                0.92244   
100 2007-12-24                  0.77775                0.91790   
101 2007-12-26                  0.77672                0.91827   
102 2007-12-27                  0.77698                0.91937   
103 2007-12-28                  0.78349                0.92586   

     Spread_ttc_SP98_BRENT  Spread_ttc_E85_BRENT  Spread_ttc_E10_BRENT  \
0                      NaN       

correctly loaded 2018 spreads_gas_brent denormalized datas to MongoDB
YEAR:  2019  // start_date: 2019-01-01 00:00:00 end_date: 2020-01-01 00:00:00
df_spreads_gas_brent
           Date  Spread_ttc_GAZOLE_BRENT  Spread_ttc_SP95_BRENT  \
0   2019-01-02                  1.07684                1.09774   
1   2019-01-03                  1.06671                1.08953   
2   2019-01-04                  1.05152                1.08929   
3   2019-01-07                  1.05117                1.08881   
4   2019-01-08                  1.04944                1.08767   
..         ...                      ...                    ...   
245 2019-12-23                  1.09087                1.15145   
246 2019-12-24                  1.08980                1.14777   
247 2019-12-26                  1.08280                1.14236   
248 2019-12-27                  1.07943                1.14163   
249 2019-12-30                  1.09102                1.14651   

     Spread_ttc_SP98_BRENT  Spread_tt