In [6]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_denorm.drop_collection("spreads_gas_brent")

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
# years = ['2007']


for year in years:
    year = int(year)
    print("YEAR: ", year, " // start_date:", datetime(year, 1, 1), "end_date:", datetime(year+1, 1, 1))
    
    # --- collection french_gas_station ----
    collection_mongo = db_mongo_denorm.get_collection("compare_french_gas_to_stockmarket")
    cursor = collection_mongo.find(
        {
            "Date": {"$gte": datetime(year, 1, 1), "$lt": datetime(year+1, 1, 1)}
        },
        {
            "_id": 0, "Date": 1, "Gas_GAZOLE_eur_liter": 1,
            "Gas_SP95_eur_liter": 1, "Gas_SP98_eur_liter": 1,
            "Gas_E85_eur_liter": 1, "Gas_E10_eur_liter": 1,
            "Gas_GPLC_eur_liter": 1,"BRENT_eur_liter": 1
        }
    ) 
    df_gas_prices = pd.DataFrame(list(cursor))
    
    df_spreads_gas_brent = pd.DataFrame()
    df_spreads_gas_brent['Date'] = pd.to_datetime(df_gas_prices['Date'])
    df_spreads_gas_brent['Spread_GAZOLE_BRENT'] = (df_gas_prices['Gas_GAZOLE_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    df_spreads_gas_brent['Spread_SP95_BRENT'] = (df_gas_prices['Gas_SP95_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    df_spreads_gas_brent['Spread_SP98_BRENT'] = (df_gas_prices['Gas_SP98_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    df_spreads_gas_brent['Spread_E85_BRENT'] = (df_gas_prices['Gas_E85_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    df_spreads_gas_brent['Spread_E10_BRENT'] = (df_gas_prices['Gas_E10_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    df_spreads_gas_brent['Spread_GPLC_BRENT'] = (df_gas_prices['Gas_GPLC_eur_liter'] - df_gas_prices['BRENT_eur_liter']).round(5)
    print("df_spreads_gas_brent\n", df_spreads_gas_brent)
    
    
    # ----- Push to MongoDB -----
    collection_mongo = db_mongo_denorm.get_collection("spreads_gas_brent")
    collection_mongo.create_index([("Date", pymongo.ASCENDING)])

    records = df_spreads_gas_brent.to_dict(orient="records")
    collection_mongo.insert_many(records)
    print("correctly loaded", year,"spreads_gas_brent denormalized datas to MongoDB")
    

YEAR:  2007  // start_date: 2007-01-01 00:00:00 end_date: 2008-01-01 00:00:00
df_spreads_gas_brent
           Date  Spread_GAZOLE_BRENT  Spread_SP95_BRENT  Spread_SP98_BRENT  \
0   2007-01-01                  NaN                NaN                NaN   
1   2007-01-02                  NaN                NaN                NaN   
2   2007-01-03                  NaN                NaN                NaN   
3   2007-01-04                  NaN                NaN                NaN   
4   2007-01-05                  NaN                NaN                NaN   
..         ...                  ...                ...                ...   
360 2007-12-27              0.77698            0.91937                NaN   
361 2007-12-28              0.78349            0.92586                NaN   
362 2007-12-29                  NaN                NaN                NaN   
363 2007-12-30                  NaN                NaN                NaN   
364 2007-12-31                  NaN                Na

correctly loaded 2017 spreads_gas_brent denormalized datas to MongoDB
YEAR:  2018  // start_date: 2018-01-01 00:00:00 end_date: 2019-01-01 00:00:00
df_spreads_gas_brent
           Date  Spread_GAZOLE_BRENT  Spread_SP95_BRENT  Spread_SP98_BRENT  \
0   2018-01-01                  NaN                NaN                NaN   
1   2018-01-02              1.02681            1.11480            1.16246   
2   2018-01-03              1.02755            1.11480            1.16813   
3   2018-01-04              1.02399            1.10921            1.16258   
4   2018-01-05              1.02828            1.11198            1.16419   
..         ...                  ...                ...                ...   
360 2018-12-27              1.10181            1.11682            1.18216   
361 2018-12-28              1.08708            1.09981            1.16373   
362 2018-12-29                  NaN                NaN                NaN   
363 2018-12-30                  NaN                NaN      