Paramètrage notebook

In [161]:
#importation package
import pandas as pd
import duckdb

#config affichage
pd.set_option("display.max_columns", None)  # Affiche toutes les colonnes sans limitation
pd.set_option("display.max_colwidth", None)  # Affiche la largeur complète des colonnes sans couper le texte
pd.set_option("display.expand_frame_repr", False)  # Empêche le repliement des colonnes lors de l'affichage du DataFrame

# Connexion
from pipelines.tasks.config.common import DUCKDB_FILE
con = duckdb.connect(database=DUCKDB_FILE, read_only=True)

In [162]:
#affichage tables & vues
con.sql('show tables')

┌──────────────────────────────────┐
│               name               │
│             varchar              │
├──────────────────────────────────┤
│ ana__resultats_communes          │
│ cog_communes                     │
│ edc_communes                     │
│ edc_prelevements                 │
│ edc_resultats                    │
│ int__lien_cdreseau_refreneceprel │
│ int__lien_commune_cdreseau       │
│ int__mapping_category_simple     │
│ int__prelevements_uniques        │
│ int__resultats_udi_communes      │
│ laposte_communes                 │
│ mapping_categories               │
│ stg_communes__cog                │
│ stg_communes__laposte            │
│ stg_edc__communes                │
│ stg_edc__prevelevements          │
│ stg_edc__resultats               │
├──────────────────────────────────┤
│             17 rows              │
└──────────────────────────────────┘

Objectif : Création d'un modèle dbt pour le résultat des nitrites  
Tâches : création d'un fichier qui remprend le resultat du dernier prélèvement contenant les colonnes 
- cdreseau
- période
- catégorie
- résultat
- date

Catégorie Nitrates
3 paramètres à regarder:  
nitrates (en no3)  
nitrites (en no2)  
nitrates/50 + nitrites/3  

2 situations:  
Nitrates < 50 mg/L et nitrites < 0,5 mg/L et nitrate/50 + nitrites/3 < 1 mg/L (eau conforme)  
Nitrates > 50 mg/L et/ou nitrites > 0,5 mg/L et/ou nitrate/50 + nitrites/3 > 1 mg/L (eau non conforme) (fait passer l’affichage total polluant en rouge)


Filtre de la table sur les catégories nitrites

In [163]:
#paramètres à regarder NO2, NO3
query_nitrates = """ 
SELECT *, ROW_NUMBER() OVER(PARTITION BY cdreseau, cdparametresiseeaux  ORDER BY datetimeprel DESC) AS row_number
FROM int__resultats_udi_communes
WHERE cdparametresiseeaux IN ('NO2','NO3')
AND CURRENT_DATE - datetimeprel < INTERVAL 1 YEAR
ORDER BY cdreseau, cdparametresiseeaux
  """

nitrates = con.sql(query_nitrates).df()
nitrates

Unnamed: 0,referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel,row_number
0,00100143925,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000003,01007,2025-01-21 12:35:00,1
1,00100143925,NO3,14.00,<=50 mg/L,2025,50.0,mg/L,nitrite,001000003,01007,2025-01-21 12:35:00,1
2,00100143918,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000241,01437,2025-01-17 11:10:00,1
3,00100143918,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000241,01094,2025-01-17 11:10:00,2
4,00100143918,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000241,01231,2025-01-17 11:10:00,3
...,...,...,...,...,...,...,...,...,...,...,...,...
73513,97400140947,NO3,0.71,<=50 mg/L,2025,50.0,mg/L,nitrite,974004294,97411,2025-01-09 08:35:00,1
73514,97400140966,NO2,0.00,"<=0,1 mg/L",2025,0.1,mg/L,nitrite,974004295,97418,2025-01-20 07:47:00,1
73515,97400140966,NO3,3.40,<=50 mg/L,2025,50.0,mg/L,nitrite,974004295,97418,2025-01-20 07:47:00,1
73516,97400140948,NO2,0.00,"<=0,1 mg/L",2025,0.1,mg/L,nitrite,974004298,97420,2025-01-13 09:00:00,1


Récupération des derniers résultats

In [164]:
#dernier prélèvement
query_dernier_prel = """ 
SELECT *
FROM nitrates
WHERE row_number = 1
 """
dernier_prel = con.sql(query_dernier_prel).df()
dernier_prel


Unnamed: 0,referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel,row_number
0,00100143925,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000003,01007,2025-01-21 12:35:00,1
1,00100143925,NO3,14.00,<=50 mg/L,2025,50.0,mg/L,nitrite,001000003,01007,2025-01-21 12:35:00,1
2,00100143918,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000241,01437,2025-01-17 11:10:00,1
3,00100143918,NO3,3.30,<=50 mg/L,2025,50.0,mg/L,nitrite,001000241,01102,2025-01-17 11:10:00,1
4,00100143923,NO2,0.00,"<=0,5 mg/L",2025,0.5,mg/L,nitrite,001000248,01004,2025-01-21 12:10:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...
11823,97400140947,NO3,0.71,<=50 mg/L,2025,50.0,mg/L,nitrite,974004294,97411,2025-01-09 08:35:00,1
11824,97400140966,NO2,0.00,"<=0,1 mg/L",2025,0.1,mg/L,nitrite,974004295,97418,2025-01-20 07:47:00,1
11825,97400140966,NO3,3.40,<=50 mg/L,2025,50.0,mg/L,nitrite,974004295,97418,2025-01-20 07:47:00,1
11826,97400140948,NO2,0.00,"<=0,1 mg/L",2025,0.1,mg/L,nitrite,974004298,97420,2025-01-13 09:00:00,1


Aggrégation des résultats  
2 situations:  
Nitrates < 50 mg/L et nitrites < 0,5 mg/L et nitrate/50 + nitrites/3 < 1 mg/L (eau conforme)  
Nitrates > 50 mg/L et/ou nitrites > 0,5 mg/L et/ou nitrate/50 + nitrites/3 > 1 mg/L (eau non conforme) (fait passer l’affichage total polluant en rouge)

In [165]:
#définition des résultats
query_resultats_nitrites = """ 

SELECT referenceprel, cdreseau, MAX(datetimeprel) AS datetimeprel,
SUM(CASE
    WHEN cdparametresiseeaux = 'NO2' THEN valtraduite
    ELSE 0
END) AS valtraduite_2,

SUM(CASE
    WHEN cdparametresiseeaux = 'NO3' THEN valtraduite
    ELSE 0
END) AS valtraduite_3,

ROUND((valtraduite_2/50 + valtraduite_3/3),2) AS valtraduite_NO2_NO3

FROM dernier_prel
GROUP BY referenceprel, cdreseau
 """
resultats_nitrites = con.sql(query_resultats_nitrites).df()
resultats_nitrites


Unnamed: 0,referenceprel,cdreseau,datetimeprel,valtraduite_2,valtraduite_3,valtraduite_NO2_NO3
0,00100143932,001000290,2025-01-10 10:38:00,0.0,5.90,1.97
1,00100143879,001000305,2025-01-17 09:58:00,0.0,0.00,0.00
2,00100143995,001000358,2025-01-13 10:42:00,0.0,2.40,0.80
3,00100143927,001000363,2025-01-22 12:56:00,0.0,4.00,1.33
4,00100143986,001000411,2025-01-16 09:17:00,0.0,4.70,1.57
...,...,...,...,...,...,...
7585,97400141089,974003621,2025-01-07 08:00:00,0.0,0.81,0.27
7586,97400140890,974003649,2025-01-06 07:45:00,0.0,6.50,2.17
7587,97400141305,974003651,2025-01-30 09:25:00,0.0,9.50,3.17
7588,97400141231,974003834,2025-01-23 09:24:00,0.0,0.50,0.17


Création de la table avec le résultat final 

2 situations:  
Nitrates < 50 mg/L et nitrites < 0,5 mg/L et nitrate/50 + nitrites/3 < 1 mg/L (eau conforme)  
Nitrates > 50 mg/L et/ou nitrites > 0,5 mg/L et/ou nitrate/50 + nitrites/3 > 1 mg/L (eau non conforme) (fait passer l’affichage total polluant en rouge)  

La table d'origine présente des 'valtraduite' Null => traduit par 'aucun résultat' dans la table finale

In [166]:

query_resultat_nitrite_dernier = """

SELECT cdreseau, referenceprel, 'dernier relevé' AS periode, 'nitrites' AS categorie, 
CASE 
    WHEN valtraduite_2 < 50 AND valtraduite_3 < 0.5 AND valtraduite_NO2_NO3 < 1
    THEN 'eau conforme'
    WHEN valtraduite_2 >= 50 OR valtraduite_3 >= 0.5 OR valtraduite_NO2_NO3 >= 1
    THEN 'eau non conforme'
    ELSE 'aucun résultat'
END AS resultat ,
datetimeprel
FROM resultats_nitrites
ORDER BY datetimeprel
 """
resultat_nitrite_dernier = con.sql(query_resultat_nitrite_dernier).df()
resultat_nitrite_dernier


Unnamed: 0,cdreseau,referenceprel,periode,categorie,resultat,datetimeprel
0,086000622,08600133560,dernier relevé,nitrites,eau non conforme,2025-01-02 08:47:00
1,026000675,02600171794,dernier relevé,nitrites,eau non conforme,2025-01-02 08:51:00
2,028001276,02800125276,dernier relevé,nitrites,eau non conforme,2025-01-02 09:00:00
3,086000333,08600133561,dernier relevé,nitrites,eau non conforme,2025-01-02 09:03:00
4,083001260,08300290363,dernier relevé,nitrites,eau non conforme,2025-01-02 09:04:00
...,...,...,...,...,...,...
7585,034001311,03400327052,dernier relevé,nitrites,aucun résultat,2025-01-31 14:36:00
7586,062000735,06200288259,dernier relevé,nitrites,eau non conforme,2025-01-31 15:34:00
7587,062004264,06200288259,dernier relevé,nitrites,eau non conforme,2025-01-31 15:34:00
7588,072000572,07200139826,dernier relevé,nitrites,eau non conforme,2025-01-31 15:37:00


In [167]:
#liste des valtraduite NULL dans la table d'origine
con.sql('SELECT * FROM dernier_prel WHERE valtraduite IS NULL' ).df()

Unnamed: 0,referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel,row_number
0,03400327115,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034000005,34041,2025-01-10 11:32:00,1
1,03400327103,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034000006,34056,2025-01-07 10:44:00,1
2,03400327108,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034000009,34182,2025-01-10 09:58:00,1
3,03400327090,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034000012,34042,2025-01-27 11:19:00,1
4,03400327116,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034000021,34051,2025-01-28 10:01:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...
173,03400327135,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034007938,34148,2025-01-07 10:55:00,1
174,03400327259,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034008030,34256,2025-01-28 12:36:00,1
175,03400327145,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034008215,34191,2025-01-07 13:24:00,1
176,03400327269,NO3,,<=50 mg/L,2025,50.0,mg/L,nitrite,034008225,34179,2025-01-28 13:50:00,1
