In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import os

# --- Config Selenium ---
driver = webdriver.Chrome()  # ou webdriver.Firefox()
driver.get("https://www.ecologie.gouv.fr/politiques-publiques/prix-produits-petroliers")

# --- Attendre que le tableau soit chargé ---
wait = WebDriverWait(driver, 10)

# Repérer le h3 correspondant au tableau voulu
h3_element = wait.until(EC.presence_of_element_located(
    (By.XPATH, '//h3[contains(text(), "Évolution des coûts moyens de transport-distribution en France en c€/l depuis 2015")]')
))

# Le tableau est juste après le h3
table_element = h3_element.find_element(By.XPATH, 'following-sibling::div[@class="fr-table fr-table--blue-ecume"]//table')

# Lire les lignes du tableau
rows = table_element.find_elements(By.TAG_NAME, "tr")

data = []
for row in rows[1:]:  # skip header
    cols = row.find_elements(By.TAG_NAME, "td")
    year = int(cols[0].text.strip())
    gazole = float(cols[1].text.strip().replace(",", "."))
    data.append([year, gazole])

# Créer DataFrame
df_costs = pd.DataFrame(data, columns=["Year", "Gazole_transportation_fees_cts_eur_liter"])
df_costs["Gazole_transportation_fees_eur_liter"] = df_costs["Gazole_transportation_fees_cts_eur_liter"] / 100
df_costs = df_costs.drop(columns=['Gazole_transportation_fees_cts_eur_liter'])

# Fermer le driver
driver.quit()


# Créer un dataframe de YYYY-MM
last_month = pd.Timestamp.today().replace(day=1)
df_monthly = pd.DataFrame({"Date_monthly": pd.date_range(start=f"{df_costs['Year'].min()}-01-01",
                                                         end=last_month,
                                                         freq='MS')})
df_monthly['Year'] = df_monthly['Date_monthly'].dt.year



# Merger avec le df des frais de transport
df_monthly_costs = df_monthly.merge(df_costs, on='Year', how='left')
df_monthly_costs = df_monthly_costs.drop(columns=['Year'])

# Pour les mois au-delà de la dernière année, remplir avec la dernière valeur disponible
last_value = df_costs['Gazole_transportation_fees_eur_liter'].iloc[-1]
df_monthly_costs['Gazole_transportation_fees_eur_liter'] = df_monthly_costs['Gazole_transportation_fees_eur_liter'].fillna(last_value)

# Export CSV
os.makedirs("csv", exist_ok=True)
df_monthly_costs.to_csv("csv/transport_fees_eur_liter.csv", index=False)

print(df_monthly_costs)

    Date_monthly  Gazole_transportation_fees_eur_liter
0     2015-01-01                                 0.086
1     2015-02-01                                 0.086
2     2015-03-01                                 0.086
3     2015-04-01                                 0.086
4     2015-05-01                                 0.086
..           ...                                   ...
129   2025-10-01                                 0.219
130   2025-11-01                                 0.219
131   2025-12-01                                 0.219
132   2026-01-01                                 0.219
133   2026-02-01                                 0.219

[134 rows x 2 columns]
