In [1]:
import pandas as pd
from tqdm.asyncio import tqdm
import requests
from pydantic import BaseModel
from time import time
from datetime import datetime
import asyncio
import httpx
import os
from typing import List, Optional, Dict
from pydantic import BaseModel
from datetime import datetime
import numpy as np
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logging.getLogger("httpx").setLevel(logging.WARNING)

In [2]:
class LoginInfos(BaseModel):
    access_token: str
    refresh_token: str
    token_type: str


class FuConstituantProduct(BaseModel):
    name: str
    quantity: float
    unit: str
    unitId: int
    constituantType: Optional[int]


class HealthData(BaseModel):
    airRating: Optional[str]
    eCovFormaldehyde: Optional[str]
    eRadioactive: Optional[str]
    otherHealthInfo: Optional[str]
    isContactDrinkingWater: Optional[bool]
    isContactNotDrinkingWater: Optional[bool]
    healthNumber: Optional[str]
    infosDrinkingWater: Optional[str]
    infosNotDrinkingWater: Optional[str]


class ComfortData(BaseModel):
    comfortHygrothermal: Optional[str]
    comfortAcoustic: Optional[str]
    comfortVisual: Optional[str]
    comfortOlfactory: Optional[str]
    otherComfortInfo: Optional[str]


class ResponsibleOrganism(BaseModel):
    name: Optional[str]
    acronym: Optional[str]
    country: Optional[str]
    address: Optional[str]
    website: Optional[str]


class ResponsibleContact(BaseModel):
    lastName: Optional[str]
    firstName: Optional[str]
    phone: Optional[str]
    fax: Optional[str]
    email: Optional[str]


class IndicatorQuantity(BaseModel):
    indicatorId: int
    indicatorName: Optional[str] = None
    indicatorUnit: Optional[str] = None
    phaseId: int
    phaseName: Optional[str] = None
    quantity: float

    def populate_indicator_fields(self, indicators: List[dict]):
        for indicator in indicators:
            if indicator["id"] == self.indicatorId:
                self.indicatorName = indicator["nameFr"]
                self.indicatorUnit = indicator["unitName"]
                return

    def populate_phase_name(self, phases: List[dict]):
        for phase in phases:
            if phase["id"] == self.phaseId:
                self.phaseName = phase["nameFr"]
                return


class IndicatorSet(BaseModel):
    id: int
    name: Optional[str] = None
    indicatorQuantities: List[IndicatorQuantity]

    def populate_name(self, mapping: dict):
        self.name = mapping.get(self.id, "Autre norme")

    def populate_indicators(self, indicators: List[dict], phases: List[dict]):
        for i in range(len(self.indicatorQuantities)):
            self.indicatorQuantities[i].populate_indicator_fields(indicators)
            self.indicatorQuantities[i].populate_phase_name(phases)


class EpdShort(BaseModel):
    id: int
    serialIdentifier: str
    name: Optional[str]
    classificationIds: List[int]
    lastUpdate: Optional[datetime]
    isArchived: Optional[bool]


class Epd(BaseModel):
    id: int
    serialIdentifier: str
    name: str
    version: Optional[str]
    issueDate: Optional[datetime]
    declarationType: Optional[int]
    declarationTypeName: Optional[str]
    responsibleOrganism: ResponsibleOrganism
    commercialReferences: Optional[str]
    dvt: Optional[int]
    ufQuantity: Optional[float]
    ufUnit: Optional[str]
    ufDescription: Optional[str]
    carbonBiogenicStorage: Optional[float]
    packagingCarbonBiogenicStorage: Optional[float]
    distanceTransportA4Km: Optional[float]
    productionPlace: Optional[str]
    productionRegionFr: List[str]
    fuConstituantProducts: List[FuConstituantProduct]
    indicatorSet: IndicatorSet


class EpdFull(Epd):
    statut: Optional[int]
    statutName: Optional[str]
    onlineDate: Optional[datetime]
    lastUpdateDate: Optional[datetime]
    expirationDate: Optional[datetime]
    isPep: Optional[bool]
    classificationId: Optional[int]
    classificationId2: Optional[int]
    classificationId3: Optional[int]
    isVerified: Optional[bool]
    verificationDate: Optional[datetime]
    commercialBrands: Optional[str]
    commercialReferencesNumber: Optional[int]
    usageAbility: Optional[str]
    ufUnitId: Optional[int]
    implementationFallRate: Optional[float]
    maintenanceFrequency: Optional[float]
    contentDeclaration: Optional[str]
    characteristicsNotInUf: Optional[str]
    healthData: HealthData
    comfortData: ComfortData
    responsibleContact: ResponsibleContact
    isBtoB: Optional[bool]
    performanceUf: Optional[str]
    performanceUfQuantity: Optional[float]
    performanceUfUnit: Optional[str]
    performanceUfUnitId: Optional[int]
    distanceTransportC2DechetsRecyclesKm: Optional[float]
    distanceTransportC2DechetsValorisesKm: Optional[float]
    distanceTransportC2DechetsEliminesKm: Optional[float]
    registrationDate: Optional[datetime]

In [8]:
class IniesClient:
    def __init__(self, login_infos: LoginInfos = None, max_concurrent_tasks: int = 20):
        if not login_infos:
            self.login_infos = self.login()
        else:
            self.login_infos = login_infos
        self.login_infos_last_update = time()
        self.normes = self.get_normes()
        self.indicators, self.phases = self.get_all_indicators_and_phases()
        self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
        self.client = httpx.AsyncClient(timeout=60.0)

    def login(self):
        url = "https://base-inies.fr/ws/Login"
        payload = {"email": os.getenv("API_LOGIN"), "apiKey": os.getenv("API_KEY")}
        headers = {"content-type": "application/json"}
        logging.info(f"Logging in to {url}.")

        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()

        data = response.json()
        logging.info(f"Login successful.")
        return LoginInfos(**data)


    def get_normes(self):
        url = "https://base-inies.fr/ws/Norme"
        headers = {"authorization": f"Bearer {self.login_infos.access_token}"}
        logging.info(f"Fetching normes...")

        response = requests.get(url, headers=headers)
        response.raise_for_status()

        normes = {}
        for resp in response.json():
            normes[resp["id"]] = resp["name"]
        logging.info(f"...Fetched normes successfully")

        return normes

    def get_all_indicators_and_phases(self):
        indicators: Dict[list] = {}
        phases: Dict[list] = {}
        if not self.normes:
            self.normes = self.get_normes()
        logging.info(f"Fetching indicators and phases...")
        for norme_id in self.normes.keys():
            indicators[norme_id], phases[norme_id] = self.get_indicators_and_phases_for_norme(norme_id)
        logging.info("...Fetched all indicators and phases.")
        return indicators, phases


    def get_indicators_and_phases_for_norme(self, norme_id: int):
        url = f"https://base-inies.fr/ws/Norme/{norme_id}"
        headers = {"authorization": f"Bearer {self.login_infos.access_token}"}

        response = requests.get(url, headers=headers)
        response.raise_for_status()
        response = response.json()

        return response["indicators"], response["phases"]



    async def refresh_token(self):
        url = "https://base-inies.fr/ws/RefreshToken"
        headers = {"content-type": "application/json"}
        payload = {
            "accessToken": self.login_infos.access_token,
            "refreshToken": self.login_infos.refresh_token,
        }
        logging.info(f"...Refreshing token...")

        response = await self.client.post(url, json=payload, headers=headers)

        response.raise_for_status()
        self.login_infos = LoginInfos(**response.json())
        self.login_infos_last_update = time()
        logging.info("...Token refreshed successfully.")

    async def get_auth_headers(self):
        if not self.login_infos or time() - self.login_infos_last_update > 20 * 60:
            logging.info("Token expired or missing...")
            await self.refresh_token()
        return {"authorization": f"Bearer {self.login_infos.access_token}"}

    async def get_all_epds(self, since_date: datetime = None) -> List[Epd]:
        all_epds_short = await self.get_all_epds_short(since_date)
        logging.info(f"Number of EPDs to retrieve: {len(all_epds_short)}")

        all_epds = []
        tasks = [
            self.async_func_with_retries(
                async_func=self.get_epd, retries=3, epd_id=epd.id
            )
            for epd in all_epds_short
        ]
        for result in tqdm.as_completed(
            tasks,
            desc=f"Processing EPDs",
            unit="epd",
            total=len(all_epds_short),
        ):
            try:
                epd = await result
                all_epds.append(epd)
            except Exception as e:
                logging.error(f"Error fetching EPD: {e}")

        return all_epds

    async def get_all_epds_short(self, since_date: datetime = None) -> List[EpdShort]:
        url = "https://base-inies.fr/ws/Epd"
        headers = await self.get_auth_headers()
        params = {"includeArchived": "false"}
        if since_date:
            params["referenceDateTime"] = since_date.strftime(r"%Y-%m-%d")

        logging.info(f"Fetching all EPDs (short).")

        response = await self.client.get(url, headers=headers, params=params)
        response.raise_for_status()

        return [EpdShort(**epd) for epd in response.json()]

    async def get_epd(self, epd_id: int) -> Epd:
        url = f"https://base-inies.fr/ws/Epd/{epd_id}"
        headers = await self.get_auth_headers()

        response = await self.client.get(url, headers=headers)
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError as e:
            logging.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
            raise
        except httpx.RequestError as e:
            logging.error(f"Request error: {e.request.url} - {str(e)}")
            raise

        epd = Epd(**response.json())
        # Populate indicators, phases
        epd.indicatorSet.populate_name(self.normes)
        epd.indicatorSet.populate_indicators(
            self.indicators[epd.indicatorSet.id],
            self.phases[epd.indicatorSet.id]
        )
        return epd

    async def async_func_with_retries(self, async_func, retries: int, **kwargs):
        for attempt in range(retries):
            async with self.semaphore:
                try:
                    return await async_func(**kwargs)
                except httpx.HTTPStatusError as e:
                    # If it's a 502 or similar transient error, we retry
                    if attempt < retries - 1 and e.response.status_code in [502, 503, 504]:
                        delay = 2 ** attempt
                        logging.warning(f"HTTP {e.response.status_code} error. Retrying attempt {attempt+2}/{retries} after {delay}s. Args: {kwargs}")
                        await asyncio.sleep(delay)
                    else:
                        logging.error(f"HTTP error cannot be resolved or no more retries: {e}. Args: {kwargs}")
                        raise
                except Exception as e:
                    if attempt < retries - 1:
                        delay = 2 ** attempt
                        logging.warning(f"Error {e}. Retrying attempt {attempt+2}/{retries} after {delay}s. Args: {kwargs}")
                        await asyncio.sleep(delay)
                    else:
                        logging.error(f"Final attempt failed. Args: {kwargs}, Error: {e}")
                        raise

    async def close(self):
        await self.client.aclose()

In [9]:
client = IniesClient()
try:
    all_epds = await client.get_all_epds()
finally:
    await client.close()

2024-12-19 18:31:35,381 [INFO] Logging in to https://base-inies.fr/ws/Login with payload: {'email': 'atelier@zefco.fr', 'apiKey': 'ht58kb23'}
2024-12-19 18:31:35,511 [INFO] Login successful.
2024-12-19 18:31:35,511 [INFO] Fetching normes...
2024-12-19 18:31:35,618 [INFO] ...Fetched normes successfully
2024-12-19 18:31:35,619 [INFO] Fetching indicators and phases...
2024-12-19 18:31:36,448 [INFO] ...Fetched all indicators and phases.
2024-12-19 18:31:36,466 [INFO] Fetching all EPDs (short).
2024-12-19 18:31:37,277 [INFO] Number of EPDs to retrieve: 7666
Processing EPDs: 100%|██████████| 7666/7666 [19:11<00:00,  6.66epd/s]


In [10]:
df = pd.DataFrame(epd.model_dump() for epd in all_epds)
df["responsibleOrganism"] = df["responsibleOrganism"].apply(lambda x: x["name"])
df["norme"] = df["indicatorSet"].apply(lambda x: x["name"])

normalized_indicator_set = pd.DataFrame(
    pd.json_normalize(df.indicatorSet)["indicatorQuantities"]
)
normalized_indicator_set["id"] = df["id"]
exploded = normalized_indicator_set.explode("indicatorQuantities").reset_index(
    drop=True
)
normalized_quantities = pd.json_normalize(exploded["indicatorQuantities"])
normalized_quantities["id"] = exploded["id"]
normalized_quantities = normalized_quantities[
    normalized_quantities.indicatorId.isin([2, 19, 45, 19, 57])
]
normalized_quantities.drop_duplicates(subset=["id", "phaseName"], inplace=True)

pivoted_quantities = normalized_quantities.pivot(
    index="id", columns="phaseName", values="quantity"
).reset_index()

df = df.merge(pivoted_quantities, on="id", how="left")
df.fuConstituantProducts = df.fuConstituantProducts.apply(
    lambda constituants: " ; ".join(
        f"{constituant["name"]} {constituant["quantity"]} {constituant["unit"]}"
        for constituant in constituants
    )
)
df.drop(columns="indicatorSet", inplace=True)
df.productionRegionFr = df.productionRegionFr.apply(
    lambda regions: None if regions == [] else " ; ".join(regions)
)
df.commercialReferences = (
    df.commercialReferences.str.replace("\r\n", " ; ")
    .str.replace("\r", " ; ")
    .str.replace("\n", " ; ")
)

cols = ("Étape de production", "Production")
df[cols[0]] = df.apply(
    lambda row: row[cols[0]] if pd.notna(row[cols[0]]) else row[cols[1]], axis=1
)

cols = ("A4-Transport", "Transport")
df[cols[0]] = df.apply(
    lambda row: row[cols[0]] if pd.notna(row[cols[0]]) else row[cols[1]], axis=1
)

cols = ("Étape d’utilisation", "Vie en oeuvre")
df[cols[0]] = df.apply(
    lambda row: row[cols[0]] if pd.notna(row[cols[0]]) else row[cols[1]], axis=1
)

cols = ("Étape de fin de vie", "Fin de vie")
df[cols[0]] = df.apply(
    lambda row: row[cols[0]] if pd.notna(row[cols[0]]) else row[cols[1]], axis=1
)

df["A"] = np.maximum(
    df["Étape de production"].fillna(0)
    + df["Étape du processus de construction"].fillna(0),
    df["Étape de production"].fillna(0)
    + df["A4-Transport"].fillna(0)
    + df["A5-Processus de construction – installation"].fillna(0),
)

df.issueDate = df.issueDate.apply(
    lambda x: x.strftime("%Y/%m/%d") if pd.notna(x) else x
)

In [11]:
columns_mapping = {
    "name": "Name_FDES",
    "serialIdentifier": "ID_FDES",
    "id": "Unique_ID_FDES_version",
    "version": "Version",
    "issueDate": "Issue_Date",
    "declarationType": "Declaration_Type",
    "declarationTypeName": "Declaration_Type_Name",
    "norme": "Norme",
    "responsibleOrganism": "Organisme_Name",
    "dvt": "DVT",
    "ufQuantity": "UF_Quantity",
    "ufUnit": "UF_Unit",
    "ufDescription": "UF_Description",
    "commercialReferences": "Commercial_References",
    "Total cycle de vie": "Total",
    "A": "A",
    "Étape d’utilisation": "B",
    "Étape de fin de vie": "C",
    "D-Bénéfices et charges au-delà des frontières du système": "D",
    "A1-Approvisionnement en matières premières": "A1",
    "A2-Transport": "A2",
    "A3-Fabrication": "A3",
    "Étape de production": "A1-A3",
    "A4-Transport": "A4",
    "A5-Processus de construction – installation": "A5",
    "Étape du processus de construction": "A4-A5",
    "B1-Utilisation": "B1",
    "B2-Maintenance": "B2",
    "B3-Réparation": "B3",
    "B4-Remplacement": "B4",
    "B5-Réhabilitation": "B5",
    "B6-Utilisation de l’énergie durant l’étape d’utilisation": "B6",
    "B7-Utilisation de l’eau durant l’étape d’utilisation": "B7",
    "C1-Déconstruction / démolition": "C1",
    "C2-Transport": "C2",
    "C3-Traitement des déchets": "C3",
    "C4-Élimination": "C4",
    "carbonBiogenicStorage": "CarbonBiogenicStorage",
    "packagingCarbonBiogenicStorage": "Packaging_Carbone_Biogenic_Storage",
    "distanceTransportA4Km": "Distance_Transport_A4_Km",
    "productionPlace": "Production_Place",
    "productionRegionFr": "Production_Region_FR",
    "fuConstituantProducts": "Constituant_Products",
}

df = df.rename(columns=columns_mapping)[columns_mapping.values()]

In [12]:
df.to_excel("./export_inies.xlsx", index=False)