In [1]:
import pandas as pd
import os
import warnings
import pymongo
from pymongo import ReplaceOne
import json
import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo = client_mongo.get_database("datalake")

# Supprimer la collection si elle existe
db_mongo.drop_collection("french_gas_station")
db_mongo.drop_collection("french_gas_station_price_logs_eur")

collection_mongo_gas_station = db_mongo.get_collection("french_gas_station")
collection_mongo_gas_station.create_index([("Id_station_essence", pymongo.ASCENDING), ("Cp", pymongo.ASCENDING)])

collection_mongo_price_logs = db_mongo.get_collection("french_gas_station_price_logs_eur")
collection_mongo_price_logs.create_index([("Date", pymongo.ASCENDING), ("Nom", pymongo.ASCENDING)])

# ALL YEARS
years = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']
#years = ['2007', '2008', '2009', '2010', '2011', '2012']
# ONE YEAR
#years = ['2008']

for year in years:
    print("YEAR: ", year)
    
    file_path = f"results/inputs_csv_zscore_last/PrixCarburants_annuel_filtered_{year}.csv"
    
    if not os.path.exists(file_path):
        print(f"Le fichier {file_path} n'existe pas.")
    else:
        df = pd.read_csv(file_path)
        print("len = ", len(df))
        df['date'] = pd.to_datetime(df['date'], format='%Y_%m_%d')
#         df['heuremin'] = pd.to_datetime(df['heuremin'], format='%H:%M').dt.strftime('%H:%M:%S')
        df['valeur'] = df['valeur']*0.001
        
        df.columns = [col.capitalize() for col in df.columns]
#         print(df.head(5))
#         print(df.dtypes)
        
        # Split data to gas-station and logs_price
        df_gas_station = df[['Id_station_essence', 'Adresse', 'Ville', 'Cp', 'Latitude', 'Longitude', 'Date']]
        # Sort by 'Id_station_essence' and 'Date', keep the latest (most recent) Date
        df_gas_station = df_gas_station.sort_values(['Id_station_essence', 'Date'], ascending=[True, True]) 
        df_gas_station = df_gas_station.drop_duplicates(subset=['Id_station_essence'], keep='last').reset_index(drop=True)
        print('gas_station \n', df_gas_station.head(5))
        # Replace gas-station Data if Id_station_essence exist
        records = df_gas_station.to_dict(orient="records")
        operations = [
            ReplaceOne(
                {"Id_station_essence": record["Id_station_essence"]},
                record,
                upsert=True
            )
            for record in records
        ]
        collection_mongo_gas_station.bulk_write(operations)
        print("correctly loaded", year, "gas-station datas to MongoDB")
        
        # Add prices_logs Data
        df_gas_station_prices_logs = df[['Date', 'Id_station_essence', 'Nom', 'Valeur', 'Heuremin']]
        print('gas_station_prices_logs \n', df_gas_station_prices_logs.head(5))
        records = df_gas_station_prices_logs.to_dict(orient="records")
        collection_mongo_price_logs.insert_many(records)
        print("correctly loaded", year, "gas_station_prices_logs csv to MongoDB")

YEAR:  2007
len =  651116
gas_station 
    Id_station_essence                     Adresse                  Ville  \
0             1000001             ROUTE NATIONALE  SAINT-DENIS-LèS-BOURG   
1             1000002         16 Avenue de Marboz        BOURG-EN-BRESSE   
2             1000004  20 Avenue du Maréchal Juin        Bourg-en-Bresse   
3             1000006    1 Boulevard John Kennedy        BOURG-EN-BRESSE   
4             1000007       Avenue Amédée Mercier        Bourg-en-Bresse   

     Cp   Latitude  Longitude       Date  
0  1000  4620114.0   519791.0 2007-12-22  
1  1000  4621842.0   522767.0 2007-12-22  
2  1000  4618836.0   524447.0 2007-10-30  
3  1000  4620754.0   523758.0 2007-12-28  
4  1000  4620105.0   524891.0 2007-12-21  
correctly loaded 2007 gas-station datas to MongoDB
gas_station_prices_logs 
         Date  Id_station_essence     Nom  Valeur Heuremin
0 2007-01-04             1000001  Gazole   0.999    18:43
1 2007-01-04             1000001    SP95   1.170    

correctly loaded 2013 gas-station datas to MongoDB
gas_station_prices_logs 
         Date  Id_station_essence     Nom  Valeur Heuremin
0 2013-01-07             1000001    SP95   1.499    09:30
1 2013-01-18             1000001    SP95   1.529    11:31
2 2013-01-20             1000001    SP95   1.529    10:19
3 2013-01-21             1000001  Gazole   1.339    09:23
4 2013-01-21             1000001    SP95   1.529    09:23
correctly loaded 2013 gas_station_prices_logs csv to MongoDB
YEAR:  2014
len =  1596921
gas_station 
    Id_station_essence                     Adresse                  Ville  \
0             1000001             ROUTE NATIONALE  SAINT-DENIS-LèS-BOURG   
1             1000002         16 Avenue de Marboz        BOURG-EN-BRESSE   
2             1000004  20 Avenue du Maréchal Juin        Bourg-en-Bresse   
3             1000006    1 Boulevard John Kennedy        BOURG-EN-BRESSE   
4             1000007       Avenue Amédée Mercier        Bourg-en-Bresse   

     Cp   Latitu

correctly loaded 2020 gas-station datas to MongoDB
gas_station_prices_logs 
         Date  Id_station_essence     Nom  Valeur Heuremin
0 2020-01-03             1000001  Gazole   1.442    10:36
1 2020-01-06             1000001  Gazole   1.443    10:36
2 2020-01-07             1000001  Gazole   1.458    10:48
3 2020-01-07             1000001    SP95   1.525    10:48
4 2020-01-11             1000001    SP95   1.530    10:15
correctly loaded 2020 gas_station_prices_logs csv to MongoDB
YEAR:  2021
len =  2978090
gas_station 
    Id_station_essence                     Adresse                  Ville  \
0             1000001       596 AVENUE DE TREVOUX  SAINT-DENIS-LèS-BOURG   
1             1000002         16 Avenue de Marboz        BOURG-EN-BRESSE   
2             1000004  20 Avenue du Maréchal Juin        Bourg-en-Bresse   
3             1000007     AVENUE FRANCOIS PIGNIER        Bourg-en-Bresse   
4             1000008        Bd Charles de Gaulle        BOURG-EN-BRESSE   

     Cp   Latitu