# Calcul du nombre de prélèvements CVM non conforme par commune et par année

L'objectif de ce notebook est de partir de la liste de communes cog_communes, et pour chaque commune et chaque année, calculer le nombre de prélèvements non conformes pour le CVM.

Il y aura plusieurs aggrégations à faire :

- commune (inseecommune) peut avoir plusieurs UDIs (cdreseau) **ET** un UID peut avoir plusieurs communes (inseecommune) 
- un prélèvement (referenceprel) peut être rattaché à plusieurs UDIs (cdreseau)
- un prélèvement (referenceprel) peut être composé de plusieurs paramètres (cdparametresiseeaux) ; mais dans le cas du CVM, il y a un seul paramètre selon la catégorisation de Pauline, donc c'est plus simple




In [1]:
%load_ext sql
%sql duckdb:///../../database/data.duckdb
%config SqlMagic.displaylimit = 10

### Les communes 

#### Name and info

In [2]:
%%sql --save int_edc__commune_udi
WITH
udi AS (
    SELECT
        inseecommune,
        cdreseau,
        de_partition,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition
),

cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
)
    SELECT 
      udi.*,
      cog.code_departement,
      cog.code_region
    FROM
      udi
    LEFT JOIN 
      cog
    ON 
      udi.inseecommune = cog.commune_code_insee

inseecommune,cdreseau,de_partition,nomcommune,quartiers,nomreseaux,debutalim,code_departement,code_region
1001,1000556,2024,ABERGEMENT-CLEMENCIAT (L'),-,BDS ST DIDIER/CHALARONNE,2010-09-07,1,84
1002,1000369,2022,ABERGEMENT-DE-VAREY (L'),-,L'ABERGEMENT-DE-VAREY,2010-09-07,1,84
1004,1000248,2022,AMBERIEU-EN-BUGEY,Vareilles,AMBERIEU VAREILLES,2010-09-07,1,84
1005,1000850,2022,AMBERIEUX-EN-DOMBES,"Est, Ouest",BDS CHATANIER,2021-08-01,1,84
1006,1000235,2021,AMBLEON,-,AMBLEON,2010-09-07,1,84
1007,1000003,2020,AMBRONAY,-,AMBRONAY,2010-09-07,1,84
1008,1000254,2022,AMBUTRIX,Ambutrix centre,AMBUTRIX MAIRIE,2010-09-07,1,84
1009,1000338,2024,ANDERT-ET-CONDON,-,ANDERT-ET-CONDON-PUGIEU,2010-09-07,1,84
1010,1000260,2021,ANGLEFORT,le bourg,ANGLEFORT BOURG,2010-09-07,1,84
1011,1000870,2022,APREMONT,-,HBA LAC DE SYLANS,2010-09-07,1,84


In [3]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        cdreseau,
        de_partition,
        COUNT(*)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        cdreseau,
        de_partition
HAVING 
    COUNT(*) >1

inseecommune,cdreseau,de_partition,count_star()
1426,1001171,2021,2
3168,3000372,2023,2
1130,1000550,2021,2
2054,2000349,2021,2
4120,4000422,2022,2
4120,4000423,2020,2
1453,1000455,2022,2
1185,1000460,2023,2
5132,5000735,2022,2
5001,5000717,2022,2


In [4]:
%%sql 
WITH
cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
)
    
SELECT
        commune_code_insee,
        COUNT(code_departement) AS nb_code_departement,
        COUNT(code_region) AS nb_code_region,
FROM
    cog
GROUP BY 
       commune_code_insee
ORDER BY 
    2,3 DESC

commune_code_insee,nb_code_departement,nb_code_region
1039,0,0
1119,0,0
1122,0,0
1218,0,0
1221,0,0
2348,0,0
2479,0,0
2646,0,0
2669,0,0
2771,0,0


**Ignorons pour le moment les données COG qui semble apporter des doublons**

In [5]:
%%sql --save int_edc__commune_udi
    SELECT
        inseecommune,
        cdreseau,
        de_partition,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition

inseecommune,cdreseau,de_partition,nomcommune,quartiers,nomreseaux,debutalim
88495,88001572,2024,VAUDEVILLE,VAUDEVILLE,SDE DES BOLOTTES,2010-08-17
88500,88001425,2024,VENTRON,CENTRE,RESEAU PRINCIPAL,2010-08-17
88512,88001430,2024,VIMENIL,VIMENIL,VIMENIL,2010-08-17
88516,88001433,2024,VITTEL,VITTEL,VITTEL,2010-08-17
88522,88002443,2024,VOMECOURT-SUR-MADON,VOMECOURT SUR MADON,RESEAU AMBACOURT,2010-08-17
88523,88001605,2024,VOUXEY,VOUXEY,RESEAU REMOVILLE,2010-08-17
89016,89000435,2024,ARGENTENAY,-,ARGENTENAY,2010-08-03
89017,89000692,2024,ARGENTEUIL-SUR-ARMANCON,-,ARGENTEUIL-PACY,2010-08-03
89022,89000765,2024,ATHIE,-,TPM ST-AGNAN,2010-08-02
89023,89000439,2024,AUGY,totalité,AUGY,2010-08-02


In [6]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        de_partition,
        COUNT(nomcommune)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        de_partition
ORDER BY 
    1 DESC,2 DESC

inseecommune,de_partition,count(nomcommune)
97801,2024,1
97801,2023,1
97801,2022,1
97801,2021,1
97801,2020,1
97701,2024,1
97701,2023,1
97701,2022,1
97701,2021,1
97701,2020,1


In [7]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        cdreseau,
        de_partition,
        COUNT(*)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        cdreseau,
        de_partition
HAVING 
    COUNT(*) >1

inseecommune,cdreseau,de_partition,count_star()


#### LIST_REF_UDI_YEAR Pour chaque catégorie et années, on veut la liste complète des commmunes

In [8]:
%%sql  
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    )

SELECT
 annee, categorie
FROM 
annees
CROSS JOIN
cat

annee,categorie
2020,phénol
2021,phénol
2022,phénol
2023,phénol
2024,phénol
2020,non classé
2021,non classé
2022,non classé
2023,non classé
2024,non classé


In [9]:
%%sql --save LIST_REF_UDI_YEAR
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
)
    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee

annee,categorie,commune_code_insee
2024,nitrite,89168
2024,nitrite,89342
2024,nitrite,91156
2024,nitrite,95395
2020,nitrite,1033
2020,nitrite,1237
2020,nitrite,1248
2020,nitrite,2412
2020,nitrite,5040
2020,nitrite,6041


In [10]:
%%sql --with LIST_REF_UDI_YEAR
SELECT
    *
FROM
   LIST_REF_UDI_YEAR
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'

annee,categorie,commune_code_insee
2020,cvm,7194
2021,cvm,7194
2023,cvm,7194
2022,cvm,7194
2024,cvm,7194


### Les résultats

#### mesures_cat

In [11]:
%%sql
WITH
resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    )
    
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie
200179905,TCEYTCL,0.0,<=10 µg/L,10.0,µg/L,hydrocarbure
200179905,TCLC,0.0,,,,non classé
200179905,TCLEY,0.0,<=10 µg/L,10.0,µg/L,hydrocarbure
200179905,TE,0.0,,,,non classé
200179905,TEAU,15.0,,,,paramètre organoleptique
200179905,TED,0.0,,,,non classé
200179905,TEMP_PH,15.0,,,,paramètre organoleptique
200179905,THM4,25.6,<=100 µg/L,100.0,µg/L,sous produit désinfection
200179905,TI,0.0003,,,,non classé
200179905,TID,0.0003,,,,non classé


#### mesures_cat_communes : on associe aux resultats la ref de prelevement et les uid associés

In [12]:
%%sql --save mesures_cat_communes
WITH 
/* mesures_cat*/
udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
),
   
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
)
/* END mesures_cat*/    
    
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
        AND udi.year = extract( YEAR FROM prelevement.dateprel)

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie,dateprel,commune_code_insee
100119085,12DCLE,0.0,<=3 µg/L,3.0,µg/L,hydrocarbure,2020-02-14,1333
100119085,ACTIK40,0.034,,,,radioactivité,2020-02-14,1333
100119085,ACTITR,0.0,,,,radioactivité,2020-02-14,1333
100119085,ADET,0.013,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADET2,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADETD,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADSP,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ALTMICR,0.0,,,,minéral,2020-02-14,1333
100119085,AMTH,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides,2020-02-14,1333
100119085,ATRZ,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides,2020-02-14,1333


In [13]:
%%sql --with mesures_cat_communes 
 SELECT 
   *
 FROM
   mesures_cat_communes
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie,dateprel,commune_code_insee
700198480,CLVYL,0.26,<=0.5 µg/L,0.5,µg/L,cvm,2024-03-04,7194
700199229,CLVYL,1.2,<=0.5 µg/L,0.5,µg/L,cvm,2024-05-31,7194
700200950,CLVYL,1.2,<=0.5 µg/L,0.5,µg/L,cvm,2024-06-17,7194
700201108,CLVYL,1.3,<=0.5 µg/L,0.5,µg/L,cvm,2024-07-19,7194
700201378,CLVYL,0.031,<=0.5 µg/L,0.5,µg/L,cvm,2024-07-30,7194
700202885,CLVYL,2.8,<=0.5 µg/L,0.5,µg/L,cvm,2024-09-09,7194
700203262,CLVYL,0.0,<=0.5 µg/L,0.5,µg/L,cvm,2024-09-17,7194
700203782,CLVYL,0.038,<=0.5 µg/L,0.5,µg/L,cvm,2024-10-09,7194
700201413,CLVYL,0.0,<=0.5 µg/L,0.5,µg/L,cvm,2024-07-09,7194
700169098,CLVYL,0.0,<=0.5 µg/L,0.5,µg/L,cvm,2020-06-17,7194


#### mesures_cat_communes_year

In [14]:
%%sql --with mesures_cat_communes --save mesures_cat_communes_year

SELECT 
 extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
 categorie,
 commune_code_insee,
 SUM(1) AS nb_analyses,
 SUM(case
        when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
        else 0
    end ) AS nb_analyses_not_ok,
 SUM(case
        when limitequal_float is not NULL and valtraduite < limitequal_float then 1
        else 0
    end ) AS nb_analyses_ok,    
FROM
 mesures_cat_communes
GROUP BY 
    1,2,3

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,microbio,1047,66,22,0
2020,sous produit désinfection,1064,54,0,6
2020,paramètre organoleptique,1308,144,0,0
2020,sous produit désinfection,1399,99,0,6
2020,microbio,1283,396,132,0
2020,sous produit désinfection,1054,71,0,12
2020,paramètre organoleptique,1420,303,0,0
2020,sous produit désinfection,1198,58,0,11
2020,minéral,1146,65,0,3
2020,paramètre organoleptique,1331,63,0,0


In [15]:
%%sql --with mesures_cat_communes_year 
 SELECT 
   *
 FROM
   mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,cvm,7194,3,0,3
2021,cvm,7194,3,0,3
2022,cvm,7194,11,8,3
2023,cvm,7194,16,11,5
2024,cvm,7194,10,5,5


#### mesures_cat_communes_year_cvm

In [16]:
%%sql --with mesures_cat_communes_year 
SELECT
    annee,
    commune_code_insee,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 mesures_cat_communes_year
WHERE
 categorie = 'cvm'

annee,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,83119,10,0,10,"<= 0,5 µg/L"
2020,83151,1,0,1,"<= 0,5 µg/L"
2020,83132,3,0,3,"<= 0,5 µg/L"
2020,83027,8,0,8,"<= 0,5 µg/L"
2020,83055,3,0,3,"<= 0,5 µg/L"
2020,83023,12,0,12,"<= 0,5 µg/L"
2020,83122,1,0,1,"<= 0,5 µg/L"
2020,83076,2,0,2,"<= 0,5 µg/L"
2020,84122,4,0,4,"<= 0,5 µg/L"
2020,84006,1,0,1,"<= 0,5 µg/L"


In [17]:
%%sql --with mesures_cat_communes_year 
SELECT
    annee,
    commune_code_insee,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,3,0,3,"<= 0,5 µg/L"
2021,7194,3,0,3,"<= 0,5 µg/L"
2022,7194,11,8,3,"> 0,5 µg/L"
2023,7194,16,11,5,"> 0,5 µg/L"
2024,7194,10,5,5,"> 0,5 µg/L"


**Pour un cas plus générique il faudra ajouter une condition sur categorie dans le CASE WHEN resultat**

#### mesures_cat_communes_year_cvm + Joint list annee_cat_communes list

In [18]:
%%sql --save mesures_cat_communes_year_resultat
WITH 
/* LIST_REF_UDI_YEAR  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
),
    
LIST_REF_UDI_YEAR AS (    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee
),
/* END  LIST_REF_UDI_YEAR  */



    
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
),

 mesures_cat_communes AS (
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
            AND udi.year = extract( YEAR FROM prelevement.dateprel)

    ),

 mesures_cat_communes_year AS(
    SELECT 
     extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
     categorie,
     commune_code_insee,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
     mesures_cat_communes
    GROUP BY 
        1,2,3
    )

SELECT
    LIST_REF_UDI_YEAR.annee,
    LIST_REF_UDI_YEAR.commune_code_insee,
    LIST_REF_UDI_YEAR.categorie,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 LIST_REF_UDI_YEAR
LEFT JOIN 
 mesures_cat_communes_year
ON
    LIST_REF_UDI_YEAR.annee =  mesures_cat_communes_year.annee
    AND LIST_REF_UDI_YEAR.categorie =  mesures_cat_communes_year.categorie
    AND  LIST_REF_UDI_YEAR.commune_code_insee  =  mesures_cat_communes_year.commune_code_insee
WHERE
 LIST_REF_UDI_YEAR.categorie = 'cvm'

annee,commune_code_insee,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,29267,cvm,3,0,3,"<= 0,5 µg/L"
2020,29170,cvm,2,0,2,"<= 0,5 µg/L"
2020,29198,cvm,7,0,7,"<= 0,5 µg/L"
2020,29064,cvm,7,0,7,"<= 0,5 µg/L"
2020,29135,cvm,8,0,8,"<= 0,5 µg/L"
2020,29084,cvm,2,0,2,"<= 0,5 µg/L"
2020,29285,cvm,5,0,5,"<= 0,5 µg/L"
2020,29068,cvm,4,0,4,"<= 0,5 µg/L"
2020,2A090,cvm,3,0,3,"<= 0,5 µg/L"
2020,2A041,cvm,7,0,7,"<= 0,5 µg/L"


#### mesures_cat_communes_year_cvm + Joint list annee_cat_communes list + udi /*int_edc__commune_udi*/

In [19]:
%%sql --save mesures_cat_communes_year_cvm_with_cog
WITH 
/* int_edc__commune_udi */
udi AS (
    SELECT
        inseecommune AS commune_code_insee,
        cdreseau,
        de_partition AS year,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition
),
/*
cog AS (
    SELECT
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
),

int_edc__commune_udi AS (
    SELECT
      udi.*,
      cog.code_departement,
      cog.code_region
    FROM
      udi
    LEFT JOIN 
      cog
    ON 
      udi.commune_code_insee = cog.commune_code_insee
),*/
/* END int_edc__commune_udi */
    
    
/* LIST_REF_UDI_YEAR  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

/*udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
), */

LIST_REF_UDI_YEAR AS (    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee
),
/* END  LIST_REF_UDI_YEAR  */

    
   
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
),
/* END mesures_cat*/    


 mesures_cat_communes AS (
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
        AND udi.year = extract( YEAR FROM prelevement.dateprel)
    ),


    
 mesures_cat_communes_year AS(
    SELECT 
     extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
     categorie,
     commune_code_insee,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
     mesures_cat_communes
    GROUP BY 
        1,2,3
    )

SELECT
    LIST_REF_UDI_YEAR.annee,
    LIST_REF_UDI_YEAR.commune_code_insee,
    LIST_REF_UDI_YEAR.categorie,
    udi.nomcommune,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 LIST_REF_UDI_YEAR
LEFT JOIN 
 mesures_cat_communes_year
ON
    LIST_REF_UDI_YEAR.annee =  mesures_cat_communes_year.annee
    AND LIST_REF_UDI_YEAR.categorie =  mesures_cat_communes_year.categorie
    AND  LIST_REF_UDI_YEAR.commune_code_insee  =  mesures_cat_communes_year.commune_code_insee
LEFT JOIN 
 udi
ON
    mesures_cat_communes_year.annee =  udi.year
    AND  mesures_cat_communes_year.commune_code_insee  =  udi.commune_code_insee    
   /* AND  mesures_cat_communes_year.cdreseau  =  udi.cdreseau    */
WHERE
 LIST_REF_UDI_YEAR.categorie = 'cvm'

annee,commune_code_insee,categorie,nomcommune,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,1004,cvm,AMBERIEU-EN-BUGEY,11,0,11,"<= 0,5 µg/L"
2020,1058,cvm,BREGNIER-CORDON,2,0,2,"<= 0,5 µg/L"
2020,1022,cvm,ARTEMARE,2,0,2,"<= 0,5 µg/L"
2020,1431,cvm,VAUX-EN-BUGEY,2,0,2,"<= 0,5 µg/L"
2020,1155,cvm,EVOSGES,3,0,3,"<= 0,5 µg/L"
2020,1175,cvm,GORREVOD,8,0,8,"<= 0,5 µg/L"
2020,1332,cvm,SAINT-ANDRE-DE-BAGE,4,0,4,"<= 0,5 µg/L"
2020,1284,cvm,OZAN,8,0,8,"<= 0,5 µg/L"
2020,1094,cvm,CHAVANNES-SUR-REYSSOUZE,8,0,8,"<= 0,5 µg/L"
2020,1269,cvm,NANTUA,2,0,2,"<= 0,5 µg/L"


# Check

In [20]:
%%sql
SELECT
    de_partition as year,
    COUNT(DISTINCT inseecommune)
FROM 
 edc_communes
GROUP BY 
    1 
ORDER BY 
   1

year,count(DISTINCT inseecommune)
2020,34788
2021,34833
2022,34874
2023,34852
2024,34809


In [21]:
%%sql --with mesures_cat_communes_year_resultat  
 SELECT 
    annee,
    COUNT(DISTINCT commune_code_insee)
 FROM
   mesures_cat_communes_year_resultat
GROUP BY 
    1

annee,count(DISTINCT commune_code_insee)
2021,34833
2023,34852
2020,34788
2022,34874
2024,34809


In [22]:
%%sql --with mesures_cat_communes_year 
 SELECT 
   *
 FROM
   mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,cvm,7194,3,0,3
2021,cvm,7194,3,0,3
2022,cvm,7194,11,8,3
2023,cvm,7194,16,11,5
2024,cvm,7194,10,5,5


In [23]:
%%sql --with mesures_cat_communes_year_resultat 
 SELECT 
   *
 FROM
   mesures_cat_communes_year_resultat
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,3,0,3,"<= 0,5 µg/L"
2022,7194,cvm,11,8,3,"> 0,5 µg/L"
2023,7194,cvm,16,11,5,"> 0,5 µg/L"
2024,7194,cvm,10,5,5,"> 0,5 µg/L"


In [24]:
%%sql --with mesures_cat_communes_year_cvm_with_cog 
 SELECT 
   *
 FROM
   mesures_cat_communes_year_cvm_with_cog
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,categorie,nomcommune,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2020,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,11,8,3,"> 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,11,8,3,"> 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,16,11,5,"> 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,16,11,5,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,10,5,5,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,10,5,5,"> 0,5 µg/L"


**Il y a encore des doublon avec le dernier JOIN : plusieurs nom de commune par inseecommune car plusieurs cdreaux ??**

In [25]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        de_partition,
        COUNT(nomcommune)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        de_partition
HAVING 
    COUNT(nomcommune) >1

inseecommune,de_partition,count(nomcommune)
89003,2024,3
89055,2024,2
89063,2024,2
89091,2024,2
89130,2024,2
89155,2024,2
89206,2024,2
89344,2024,2
89392,2024,2
89411,2024,2


**Il faudra voir aussi voir comment ajouter les informations COG UTILES**