In [92]:
import pandas as pd
from tqdm.asyncio import tqdm
import requests
from pydantic import BaseModel
from time import time
from datetime import datetime
import asyncio
import httpx
import os
from typing import List, Optional
from pydantic import BaseModel
from datetime import datetime

In [93]:
class LoginInfos(BaseModel):
    access_token: str
    refresh_token: str
    token_type: str


class EpdShort(BaseModel):
    id: int
    serialIdentifier: str
    name: str
    classificationIds: List[int]
    lastUpdate: datetime
    isArchived: bool


class FuConstituantProduct(BaseModel):
    name: str
    quantity: float
    unit: str
    unitId: int
    constituantType: int


class HealthData(BaseModel):
    airRating: str
    eCovFormaldehyde: Optional[str]
    eRadioactive: Optional[str]
    otherHealthInfo: Optional[str]
    isContactDrinkingWater: bool
    isContactNotDrinkingWater: bool
    healthNumber: Optional[str]
    infosDrinkingWater: Optional[str]
    infosNotDrinkingWater: Optional[str]


class ComfortData(BaseModel):
    comfortHygrothermal: Optional[str]
    comfortAcoustic: Optional[str]
    comfortVisual: Optional[str]
    comfortOlfactory: Optional[str]
    otherComfortInfo: Optional[str]


class ResponsibleOrganism(BaseModel):
    name: str
    acronym: str
    country: str
    address: str
    website: Optional[str]


class ResponsibleContact(BaseModel):
    lastName: Optional[str]
    firstName: Optional[str]
    phone: Optional[str]
    fax: Optional[str]
    email: Optional[str]


class IndicatorQuantity(BaseModel):
    indicatorId: int
    phaseId: int
    quantity: float


class IndicatorSet(BaseModel):
    id: int
    indicatorQuantities: List[IndicatorQuantity]


class EpdModel(BaseModel):
    id: int
    serialIdentifier: str
    statut: int
    statutName: str
    version: str
    onlineDate: datetime
    lastUpdateDate: datetime
    expirationDate: Optional[datetime]
    name: str
    isPep: bool
    declarationType: int
    declarationTypeName: str
    classificationId: int
    classificationId2: Optional[int]
    classificationId3: Optional[int]
    issueDate: Optional[datetime]
    isVerified: bool
    verificationDate: Optional[datetime]
    commercialReferences: Optional[str]
    commercialBrands: Optional[str]
    commercialReferencesNumber: int
    usageAbility: Optional[str]
    dvt: int
    ufQuantity: float
    ufUnit: str
    ufUnitId: int
    implementationFallRate: float
    maintenanceFrequency: float
    contentDeclaration: Optional[str]
    ufDescription: Optional[str]
    characteristicsNotInUf: Optional[str]
    fuConstituantProducts: List[FuConstituantProduct]
    healthData: HealthData
    comfortData: ComfortData
    responsibleOrganism: ResponsibleOrganism
    responsibleContact: ResponsibleContact
    indicatorSet: IndicatorSet
    productionPlace: str
    productionRegionFr: List[str]
    isBtoB: bool
    performanceUf: Optional[str]
    performanceUfQuantity: Optional[float]
    performanceUfUnit: Optional[str]
    performanceUfUnitId: Optional[int]
    distanceTransportA4Km: Optional[float]
    distanceTransportC2DechetsRecyclesKm: Optional[float]
    distanceTransportC2DechetsValorisesKm: Optional[float]
    distanceTransportC2DechetsEliminesKm: Optional[float]
    registrationDate: Optional[datetime]
    carbonBiogenicStorage: float
    packagingCarbonBiogenicStorage: Optional[float]

In [96]:
class IniesClient:
    def __init__(self, login_infos: LoginInfos = None):
        if not login_infos:
            self.login_infos = self.login()
        else:
            self.login_infos = login_infos
        self.login_infos_last_update = time()

    def login(self):
        url = "https://base-inies.fr/ws/Login"
        payload = {"email": os.getenv("API_LOGIN"), "apiKey": os.getenv("API_KEY")}
        headers = {"content-type": "application/json"}

        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()

        return LoginInfos(**response.json())

    async def refresh_token(self):
        url = "https://base-inies.fr/ws/RefreshToken"
        headers = {"content-type": "application/json"}
        payload = {
            "accessToken": self.login_infos.access_token,
            "refreshToken": self.login_infos.refresh_token,
        }

        async with httpx.AsyncClient() as client:
            response = await client.post(url, json=payload, headers=headers)

        response.raise_for_status()
        self.login_infos = LoginInfos(**response.json())
        self.login_infos_last_update = time()

    async def get_auth_headers(self):
        if not self.login_infos or time() - self.login_infos_last_update > 20 * 60:
            await self.refresh_token()
        return {"authorization": f"Bearer {self.login_infos.access_token}"}

    async def get_all_epds_full(self, since_date: datetime) -> List[EpdModel]:
        all_epds_short = await self.get_all_epds_short(since_date)

        all_epds_full = []
        batch_size = 50
        n_batches = len(all_epds_short) // batch_size

        for i in range(0, len(all_epds_short), batch_size):
            batch = all_epds_short[i : i + batch_size]
            tasks = [
                self.async_func_with_retries(
                    async_func=self.get_epd_full, retries=3, epd_id=epd.id
                )
                for epd in batch
            ]
            for result in tqdm.as_completed(tasks, desc=f"Processing batch {i // batch_size + 1} / {n_batches}", total=len(tasks)):
                try:
                    epd = await result
                    all_epds_full.append(epd)
                except Exception as e:
                    print(f"Error fetching EPD: {e}")

        return all_epds_full

    async def get_all_epds_short(self, since_date: datetime = None) -> List[EpdShort]:
        url = "https://base-inies.fr/ws/Epd"
        headers = await self.get_auth_headers()

        params = {"includeArchived": "false"}
        if since_date:
            params["referenceDateTime"] = since_date.strftime(r"%Y-%m-%d")

        async with httpx.AsyncClient() as client:
            response = await client.get(url, headers=headers, params=params)
        response.raise_for_status()

        return [EpdShort(**epd) for epd in response.json()]

    async def get_epd_full(self, epd_id: int) -> EpdModel:
        url = f"https://base-inies.fr/ws/Epd/{epd_id}"
        headers = await self.get_auth_headers()

        async with httpx.AsyncClient() as client:
            try:
                response = await client.get(url, headers=headers, timeout=10)
                response.raise_for_status()
                return EpdModel(**response.json())
            except httpx.HTTPStatusError as e:
                print(f"HTTP error: {e.response.status_code} - {e.response.text}")
                raise
            except httpx.RequestError as e:
                print(f"Request error: {e.request.url} - {str(e)}")
                raise

    async def async_func_with_retries(
        self, async_func, retries: int, **kwargs
    ) -> EpdModel:
        for attempt in range(retries):
            try:
                return await async_func(**kwargs)
            except Exception as e:
                print(f"Attempt {attempt + 1} failed with args {kwargs}: {e}")
                if attempt == retries - 1:
                    raise
                await asyncio.sleep(1)

In [97]:
client = IniesClient()

In [None]:
all_epds = await client.get_all_epds_full(datetime(2024, 12, 5))
len(all_epds)