# Calcul du nombre de prélèvements CVM non conforme par commune et par année

L'objectif de ce notebook est de partir de la liste de communes cog_communes, et pour chaque commune et chaque année, calculer le nombre de prélèvements non conformes pour le CVM.

Il y aura plusieurs aggrégations à faire :

- commune (inseecommune) peut avoir plusieurs UDIs (cdreseau) **ET** un UDI peut avoir plusieurs communes (inseecommune) 
- un prélèvement (referenceprel) peut être rattaché à plusieurs UDIs (cdreseau)
- un prélèvement (referenceprel) peut être composé de plusieurs paramètres (cdparametresiseeaux) ; mais dans le cas du CVM, il y a un seul paramètre selon la catégorisation de Pauline, donc c'est plus simple




In [1]:
%load_ext sql
%sql duckdb:///../../database/data.duckdb
%config SqlMagic.displaylimit = 10

### Les communes 

#### Name and info

In [2]:
%%sql --save int_edc__commune_udi
WITH
udi AS (
    SELECT
        inseecommune,
        cdreseau,
        de_partition,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune)  AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition
),

cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
)
    SELECT 
      udi.*,
      cog.code_departement,
      cog.code_region
    FROM
      udi
    LEFT JOIN 
      cog
    ON 
      udi.inseecommune = cog.commune_code_insee

inseecommune,cdreseau,de_partition,nomcommune,quartiers,nomreseaux,debutalim,code_departement,code_region
1001,1000556,2024,ABERGEMENT-CLEMENCIAT (L'),-,BDS ST DIDIER/CHALARONNE,2010-09-07,1,84
1002,1000369,2022,ABERGEMENT-DE-VAREY (L'),-,L'ABERGEMENT-DE-VAREY,2010-09-07,1,84
1004,1000249,2024,AMBERIEU-EN-BUGEY,St Germain_Brédevent,AMBERIEU SAINT GERMAIN DOUVRES,2010-09-07,1,84
1005,1000850,2022,AMBERIEUX-EN-DOMBES,"Est, Ouest",BDS CHATANIER,2021-08-01,1,84
1006,1000235,2021,AMBLEON,-,AMBLEON,2010-09-07,1,84
1007,1000003,2020,AMBRONAY,-,AMBRONAY,2010-09-07,1,84
1008,1000254,2022,AMBUTRIX,Ambutrix centre,AMBUTRIX MAIRIE,2010-09-07,1,84
1009,1000338,2024,ANDERT-ET-CONDON,-,ANDERT-ET-CONDON-PUGIEU,2010-09-07,1,84
1010,1000260,2021,ANGLEFORT,le bourg,ANGLEFORT BOURG,2010-09-07,1,84
1011,1000870,2022,APREMONT,-,HBA LAC DE SYLANS,2010-09-07,1,84


In [3]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        cdreseau,
        de_partition,
        COUNT(*)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        cdreseau,
        de_partition
HAVING 
    COUNT(*) >1

inseecommune,cdreseau,de_partition,count_star()
1025,1000483,2024,2
2053,2001482,2022,2
4120,4000472,2020,2
1036,1000466,2020,2
1185,1000464,2023,2
1080,1001051,2022,2
1185,1000457,2023,2
1338,1000363,2021,2
8116,8001183,2021,2
7165,7001527,2020,2


In [4]:
%%sql 
WITH
cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
)
    
SELECT
        commune_code_insee,
        COUNT(code_departement) AS nb_code_departement,
        COUNT(code_region) AS nb_code_region,
FROM
    cog
GROUP BY 
       commune_code_insee
ORDER BY 
    2,3 DESC

commune_code_insee,nb_code_departement,nb_code_region
1059,0,0
1120,0,0
1137,0,0
1154,0,0
1182,0,0
1186,0,0
1205,0,0
1300,0,0
1413,0,0
1414,0,0


**Ignorons pour le moment les données COG qui semble apporter des doublons**

In [5]:
%%sql --save int_edc__commune_udi
    SELECT
        inseecommune,
        cdreseau,
        de_partition,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition

inseecommune,cdreseau,de_partition,nomcommune,quartiers,nomreseaux,debutalim
88495,88001572,2024,VAUDEVILLE,VAUDEVILLE,SDE DES BOLOTTES,2010-08-17
88500,88001425,2024,VENTRON,CENTRE,RESEAU PRINCIPAL,2010-08-17
88512,88001430,2024,VIMENIL,VIMENIL,VIMENIL,2010-08-17
88516,88001433,2024,VITTEL,VITTEL,VITTEL,2010-08-17
88522,88002443,2024,VOMECOURT-SUR-MADON,VOMECOURT SUR MADON,RESEAU AMBACOURT,2010-08-17
88523,88001605,2024,VOUXEY,VOUXEY,RESEAU REMOVILLE,2010-08-17
89016,89000435,2024,ARGENTENAY,-,ARGENTENAY,2010-08-03
89017,89000692,2024,ARGENTEUIL-SUR-ARMANCON,-,ARGENTEUIL-PACY,2010-08-03
89022,89000765,2024,ATHIE,-,TPM ST-AGNAN,2010-08-02
89023,89000439,2024,AUGY,totalité,AUGY,2010-08-02


In [6]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        de_partition,
        COUNT(nomcommune)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        de_partition
ORDER BY 
    1 DESC,2 DESC

inseecommune,de_partition,count(nomcommune)
97801,2024,1
97801,2023,1
97801,2022,1
97801,2021,1
97801,2020,1
97701,2024,1
97701,2023,1
97701,2022,1
97701,2021,1
97701,2020,1


In [7]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        cdreseau,
        de_partition,
        COUNT(*)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        cdreseau,
        de_partition
HAVING 
    COUNT(*) >1

inseecommune,cdreseau,de_partition,count_star()


In [8]:
%%sql --with int_edc__commune_udi
SELECT
        inseecommune,
        de_partition,
        COUNT(nomcommune)
FROM
    int_edc__commune_udi
GROUP BY 
        inseecommune,
        de_partition
HAVING 
    COUNT(nomcommune) >1

inseecommune,de_partition,count(nomcommune)
89015,2024,2
89068,2024,3
89095,2024,2
89145,2024,5
89196,2024,2
89246,2024,2
89335,2024,2
89425,2024,2
90041,2024,2
90065,2024,3


**Utilisation de MIN(nomcommune) OVER (PARTITION BY inseecommune) AS nomcommune ??**<br>
**Utilisation de ANY_VALUE(nomcommune) ?**

#### LIST_REF_UDI_YEAR Pour chaque catégorie et années, on veut la liste complète des commmunes

In [9]:
%%sql  
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    )

SELECT
 annee, categorie
FROM 
annees
CROSS JOIN
cat

annee,categorie
2020,pesticides
2021,pesticides
2022,pesticides
2023,pesticides
2024,pesticides
2020,sous produit désinfection
2021,sous produit désinfection
2022,sous produit désinfection
2023,sous produit désinfection
2024,sous produit désinfection


In [10]:
%%sql --save LIST_REF_UDI_YEAR
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
)
    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee

annee,categorie,commune_code_insee
2024,médicament,97125
2024,médicament,97312
2024,médicament,97416
2022,médicament,48070
2022,médicament,50277
2022,médicament,50528
2022,médicament,51146
2022,médicament,51457
2022,médicament,52253
2022,médicament,55177


In [11]:
%%sql --with LIST_REF_UDI_YEAR
SELECT
    *
FROM
   LIST_REF_UDI_YEAR
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'

annee,categorie,commune_code_insee
2022,cvm,7194
2024,cvm,7194
2020,cvm,7194
2021,cvm,7194
2023,cvm,7194


### Les résultats

#### mesures_cat

In [12]:
%%sql
WITH
resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    )
    
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie
400124254,PROSULF,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PROTHIO,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PRP2H,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide
400124254,PRPA,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PRPZ,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PRQT,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PRT,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PRXP,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PYCL,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides
400124254,PYDAP,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides


#### mesures_cat_communes : on associe aux resultats la ref de prelevement et les udi associés

In [13]:
%%sql --save mesures_cat_communes
WITH 
/* mesures_cat*/
udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
),
   
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
)
/* END mesures_cat*/    
    
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
        AND udi.year = extract( YEAR FROM prelevement.dateprel)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie,dateprel,commune_code_insee
100119085,12DCLE,0.0,<=3 µg/L,3.0,µg/L,hydrocarbure,2020-02-14,1333
100119085,ACTIK40,0.034,,,,radioactivité,2020-02-14,1333
100119085,ACTITR,0.0,,,,radioactivité,2020-02-14,1333
100119085,ADET,0.013,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADET2,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADETD,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ADSP,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,métabolite de pesticide,2020-02-14,1333
100119085,ALTMICR,0.0,,,,minéral,2020-02-14,1333
100119085,AMTH,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides,2020-02-14,1333
100119085,ATRZ,0.0,"<=0,1 µg/L",0.1000000014901161,µg/L,pesticides,2020-02-14,1333


In [14]:
%%sql --with mesures_cat_communes 
 SELECT 
   *
 FROM
   mesures_cat_communes
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

referenceprel,cdparametresiseeaux,valtraduite,limitequal,limitequal_float,unite,categorie,dateprel,commune_code_insee
700187059,CLVYL,3.6,<=0.5 µg/L,0.5,µg/L,cvm,2022-08-31,7194
700187150,CLVYL,1.6,<=0.5 µg/L,0.5,µg/L,cvm,2022-09-14,7194
700187169,CLVYL,0.37,<=0.5 µg/L,0.5,µg/L,cvm,2022-09-21,7194
700187170,CLVYL,1.2,<=0.5 µg/L,0.5,µg/L,cvm,2022-09-21,7194
700187171,CLVYL,3.1,<=0.5 µg/L,0.5,µg/L,cvm,2022-09-21,7194
700187172,CLVYL,3.4,<=0.5 µg/L,0.5,µg/L,cvm,2022-09-21,7194
700187629,CLVYL,0.0,<=0.5 µg/L,0.5,µg/L,cvm,2022-10-27,7194
700187753,CLVYL,2.7,<=0.5 µg/L,0.5,µg/L,cvm,2022-10-05,7194
700187813,CLVYL,0.9,<=0.5 µg/L,0.5,µg/L,cvm,2022-10-19,7194
700166838,CLVYL,0.0,<=0.5 µg/L,0.5,µg/L,cvm,2020-02-05,7194


#### mesures_cat_communes_year

In [15]:
%%sql --with mesures_cat_communes --save mesures_cat_communes_year

SELECT 
 extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
 categorie,
 commune_code_insee,
 SUM(1) AS nb_analyses,
 SUM(case
        when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
        else 0
    end ) AS nb_analyses_not_ok,
 SUM(case
        when limitequal_float is not NULL and valtraduite < limitequal_float then 1
        else 0
    end ) AS nb_analyses_ok,    
FROM
 mesures_cat_communes
GROUP BY 
    1,2,3

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,radioactivité,4217,6,0,0
2020,nitrite,4217,30,0,30
2020,sous produit désinfection,4178,168,0,64
2020,nitrite,4206,7,0,7
2020,cvm,4086,1,0,1
2020,minéral,4229,20,0,1
2020,hydrocarbure,4192,21,0,20
2020,hydrocarbure,4160,14,0,13
2020,non classé,4054,2,0,0
2020,microbio,4018,198,66,0


In [16]:
%%sql --with mesures_cat_communes_year 
 SELECT 
   *
 FROM
   mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,cvm,7194,3,0,3
2021,cvm,7194,3,0,3
2022,cvm,7194,11,8,3
2023,cvm,7194,16,11,5
2024,cvm,7194,10,5,5


#### mesures_cat_communes_year_cvm

In [17]:
%%sql --with mesures_cat_communes_year 
SELECT
    annee,
    commune_code_insee,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 mesures_cat_communes_year
WHERE
 categorie = 'cvm'

annee,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,83032,2,0,2,"<= 0,5 µg/L"
2020,83003,4,0,4,"<= 0,5 µg/L"
2020,83111,3,0,3,"<= 0,5 µg/L"
2020,83102,2,0,2,"<= 0,5 µg/L"
2020,83084,2,0,2,"<= 0,5 µg/L"
2020,83048,10,0,10,"<= 0,5 µg/L"
2020,83028,3,0,3,"<= 0,5 µg/L"
2020,83011,3,0,3,"<= 0,5 µg/L"
2020,83118,17,0,17,"<= 0,5 µg/L"
2020,84056,3,0,3,"<= 0,5 µg/L"


In [18]:
%%sql --with mesures_cat_communes_year 
SELECT
    annee,
    commune_code_insee,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,3,0,3,"<= 0,5 µg/L"
2021,7194,3,0,3,"<= 0,5 µg/L"
2022,7194,11,8,3,"> 0,5 µg/L"
2023,7194,16,11,5,"> 0,5 µg/L"
2024,7194,10,5,5,"> 0,5 µg/L"


**Pour un cas plus générique il faudra ajouter une condition sur categorie dans le CASE WHEN resultat**

#### mesures_cat_communes_year_cvm + Joint list annee_cat_communes list

In [19]:
%%sql --save mesures_cat_communes_year_resultat
WITH 
/* LIST_REF_UDI_YEAR  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
),
    
LIST_REF_UDI_YEAR AS (    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee
),
/* END  LIST_REF_UDI_YEAR  */



    
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
),

 mesures_cat_communes AS (
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
            AND udi.year = extract( YEAR FROM prelevement.dateprel)

    ),

 mesures_cat_communes_year AS(
    SELECT 
     extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
     categorie,
     commune_code_insee,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
     mesures_cat_communes
    GROUP BY 
        1,2,3
    )

SELECT
    LIST_REF_UDI_YEAR.annee,
    LIST_REF_UDI_YEAR.commune_code_insee,
    LIST_REF_UDI_YEAR.categorie,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 LIST_REF_UDI_YEAR
LEFT JOIN 
 mesures_cat_communes_year
ON
    LIST_REF_UDI_YEAR.annee =  mesures_cat_communes_year.annee
    AND LIST_REF_UDI_YEAR.categorie =  mesures_cat_communes_year.categorie
    AND  LIST_REF_UDI_YEAR.commune_code_insee  =  mesures_cat_communes_year.commune_code_insee
WHERE
 LIST_REF_UDI_YEAR.categorie = 'cvm'

annee,commune_code_insee,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,1004,cvm,11,0,11,"<= 0,5 µg/L"
2020,1058,cvm,2,0,2,"<= 0,5 µg/L"
2020,1022,cvm,2,0,2,"<= 0,5 µg/L"
2020,1431,cvm,2,0,2,"<= 0,5 µg/L"
2020,1155,cvm,3,0,3,"<= 0,5 µg/L"
2020,1332,cvm,4,0,4,"<= 0,5 µg/L"
2020,1094,cvm,8,0,8,"<= 0,5 µg/L"
2020,1175,cvm,8,0,8,"<= 0,5 µg/L"
2020,1284,cvm,8,0,8,"<= 0,5 µg/L"
2020,1375,cvm,10,0,10,"<= 0,5 µg/L"


#### mesures_cat_communes_year_cvm + Joint list annee_cat_communes list + udi /*int_edc__commune_udi*/

In [20]:
%%sql --save mesures_cat_communes_year_cvm_with_cog
WITH 
/* int_edc__commune_udi */
udi AS (
    SELECT
        inseecommune AS commune_code_insee,
        cdreseau,
        de_partition AS year,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition
),
/*
cog AS (
    SELECT
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
),

int_edc__commune_udi AS (
    SELECT
      udi.*,
      cog.code_departement,
      cog.code_region
    FROM
      udi
    LEFT JOIN 
      cog
    ON 
      udi.commune_code_insee = cog.commune_code_insee
),*/
/* END int_edc__commune_udi */
    
    
/* LIST_REF_UDI_YEAR  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

/*udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
), */

LIST_REF_UDI_YEAR AS (    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee
),
/* END  LIST_REF_UDI_YEAR  */

    
   
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
),
/* END mesures_cat*/    


 mesures_cat_communes AS (
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
        AND udi.year = extract( YEAR FROM prelevement.dateprel)
    ),


    
 mesures_cat_communes_year AS(
    SELECT 
     extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
     categorie,
     commune_code_insee,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
     mesures_cat_communes
    GROUP BY 
        1,2,3
    )

SELECT
    LIST_REF_UDI_YEAR.annee,
    LIST_REF_UDI_YEAR.commune_code_insee,
    LIST_REF_UDI_YEAR.categorie,
    udi.nomcommune,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 LIST_REF_UDI_YEAR
LEFT JOIN 
 mesures_cat_communes_year
ON
    LIST_REF_UDI_YEAR.annee =  mesures_cat_communes_year.annee
    AND LIST_REF_UDI_YEAR.categorie =  mesures_cat_communes_year.categorie
    AND  LIST_REF_UDI_YEAR.commune_code_insee  =  mesures_cat_communes_year.commune_code_insee
LEFT JOIN 
 udi
ON
    mesures_cat_communes_year.annee =  udi.year
    AND  mesures_cat_communes_year.commune_code_insee  =  udi.commune_code_insee    
   /* AND  mesures_cat_communes_year.cdreseau  =  udi.cdreseau    */
WHERE
 LIST_REF_UDI_YEAR.categorie = 'cvm'

annee,commune_code_insee,categorie,nomcommune,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,1004,cvm,AMBERIEU-EN-BUGEY,11,0,11,"<= 0,5 µg/L"
2020,1431,cvm,VAUX-EN-BUGEY,2,0,2,"<= 0,5 µg/L"
2020,1058,cvm,BREGNIER-CORDON,2,0,2,"<= 0,5 µg/L"
2020,1155,cvm,EVOSGES,3,0,3,"<= 0,5 µg/L"
2020,1022,cvm,ARTEMARE,2,0,2,"<= 0,5 µg/L"
2020,1094,cvm,CHAVANNES-SUR-REYSSOUZE,8,0,8,"<= 0,5 µg/L"
2020,1332,cvm,SAINT-ANDRE-DE-BAGE,4,0,4,"<= 0,5 µg/L"
2020,1284,cvm,OZAN,8,0,8,"<= 0,5 µg/L"
2020,1175,cvm,GORREVOD,8,0,8,"<= 0,5 µg/L"
2020,1430,cvm,VARAMBON,6,0,6,"<= 0,5 µg/L"


**Faisons le JOIn par communes ET cdreseau**

In [22]:
%%sql --save mesures_cat_communes_cdreseau_year_cvm_with_cog
WITH 
/* int_edc__commune_udi */
udi AS (
    SELECT
        inseecommune AS commune_code_insee,
        cdreseau,
        de_partition AS year,
        -- Prenons toujours le même nom de commune pour une inseecommune donnée
        MIN(nomcommune) AS nomcommune,
        -- Agréger les différentes valeurs de quartier en une liste sans doublons
        STRING_AGG(DISTINCT quartier, ', ') FILTER (WHERE quartier IS NOT NULL AND quartier != '') AS quartiers,
        -- Agréger les différentes valeurs de nomreseau en une liste sans doublons
        STRING_AGG(DISTINCT nomreseau, ', ') FILTER (WHERE nomreseau IS NOT NULL AND nomreseau != '') AS nomreseaux,
        -- Prendre la première date de début d'alimentation
        MIN(debutalim) AS debutalim
    FROM 
       edc_communes
    GROUP BY
        inseecommune,
        cdreseau,
        de_partition
),
/*
cog AS (
    SELECT
      DEP AS code_departement,
      REG AS code_region,
      COM AS commune_code_insee,
    FROM 
      cog_communes
),

int_edc__commune_udi AS (
    SELECT
      udi.*,
      cog.code_departement,
      cog.code_region
    FROM
      udi
    LEFT JOIN 
      cog
    ON 
      udi.commune_code_insee = cog.commune_code_insee
),*/
/* END int_edc__commune_udi */
    
    
/* LIST_REF_UDI_YEAR  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

/*udi AS (
    SELECT
      de_partition AS year,
      inseecommune AS commune_code_insee,
      cdreseau,
    FROM
      edc_communes
    GROUP BY 
    1,2,3
), */

LIST_REF_UDI_YEAR AS (    
SELECT DISTINCT
    annee, 
    categorie,
    commune_code_insee ,
 FROM
      udi
 FULL OUTER JOIN
       year_cat
 ON
    udi.year = year_cat.annee
),
/* END  LIST_REF_UDI_YEAR  */

    
   
prelevement AS (
	SELECT
	  referenceprel,
	  cdreseau,
      dateprel,
	FROM   
      edc_prelevements
),

resultats AS (
    SELECT
      referenceprel,
      cdparametresiseeaux,
      valtraduite,
      limitequal,
      CAST(regexp_extract(REPLACE(limitequal, ',', '.'), '-?\d+(\.\d+)?') AS FLOAT) AS limitequal_float,
      regexp_extract(limitequal, '[a-zA-Zµg]+/?[a-zA-Z/L]+$') AS unite,
    FROM  
        edc_resultats 
    ),

mesures_cat AS (
    SELECT
        resultats.*,
        int__mapping_category_simple.categorie
    FROM 
        resultats
    LEFT JOIN 
        int__mapping_category_simple 
    ON
        resultats.cdparametresiseeaux = int__mapping_category_simple .cdparametresiseeaux
),
/* END mesures_cat*/    


 mesures_cat_communes AS (
    SELECT
        mesures_cat.*,
        prelevement.dateprel,
        prelevement.cdreseau,
        udi.commune_code_insee AS commune_code_insee,
    FROM 
        mesures_cat 
    LEFT JOIN
    	prelevement
    ON
       mesures_cat.referenceprel = prelevement.referenceprel
    LEFT JOIN
        udi
    ON
    	udi.cdreseau = prelevement.cdreseau
        AND udi.year = extract( YEAR FROM prelevement.dateprel)
    ),


    
 mesures_cat_communes_year AS(
    SELECT 
     extract( YEAR FROM mesures_cat_communes.dateprel) as annee,
     categorie,
     commune_code_insee,
     cdreseau,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
     mesures_cat_communes
    GROUP BY 
        1,2,3,4
    )

SELECT
    LIST_REF_UDI_YEAR.annee,
    LIST_REF_UDI_YEAR.commune_code_insee,
    LIST_REF_UDI_YEAR.categorie,
    udi.nomcommune,
    udi.cdreseau,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END AS resultat
FROM  
 LIST_REF_UDI_YEAR
LEFT JOIN 
 mesures_cat_communes_year
ON
    LIST_REF_UDI_YEAR.annee =  mesures_cat_communes_year.annee
    AND LIST_REF_UDI_YEAR.categorie =  mesures_cat_communes_year.categorie
    AND  LIST_REF_UDI_YEAR.commune_code_insee  =  mesures_cat_communes_year.commune_code_insee
LEFT JOIN 
 udi
ON
    mesures_cat_communes_year.annee =  udi.year
    AND  mesures_cat_communes_year.commune_code_insee  =  udi.commune_code_insee    
    AND  mesures_cat_communes_year.cdreseau  =  udi.cdreseau 
WHERE
 LIST_REF_UDI_YEAR.categorie = 'cvm'

annee,commune_code_insee,categorie,nomcommune,cdreseau,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,83050,cvm,DRAGUIGNAN,83000152,2,0,2,"<= 0,5 µg/L"
2020,83032,cvm,CARCES,83000190,1,0,1,"<= 0,5 µg/L"
2020,83062,cvm,GARDE (LA),83000329,7,0,7,"<= 0,5 µg/L"
2020,83069,cvm,HYERES,83000383,2,0,2,"<= 0,5 µg/L"
2020,83123,cvm,SANARY-SUR-MER,83000477,5,0,5,"<= 0,5 µg/L"
2020,83059,cvm,FORCALQUEIRET,83000488,3,0,3,"<= 0,5 µg/L"
2020,83128,cvm,SILLANS-LA-CASCADE,83000711,2,0,2,"<= 0,5 µg/L"
2020,83085,cvm,MOTTE (LA),83000799,3,0,3,"<= 0,5 µg/L"
2020,83090,cvm,OLLIOULES,83000912,2,0,2,"<= 0,5 µg/L"
2020,83023,cvm,BRIGNOLES,83000947,4,0,4,"<= 0,5 µg/L"


# Check

In [23]:
%%sql
SELECT
    de_partition as year,
    COUNT(DISTINCT inseecommune)
FROM 
 edc_communes
GROUP BY 
    1 
ORDER BY 
   1

year,count(DISTINCT inseecommune)
2020,34788
2021,34833
2022,34874
2023,34852
2024,34809


In [24]:
%%sql --with mesures_cat_communes_year_resultat  
 SELECT 
    annee,
    COUNT(DISTINCT commune_code_insee)
 FROM
   mesures_cat_communes_year_resultat
GROUP BY 
    1

annee,count(DISTINCT commune_code_insee)
2022,34874
2024,34809
2020,34788
2021,34833
2023,34852


In [25]:
%%sql --with mesures_cat_communes_year 
 SELECT 
   *
 FROM
   mesures_cat_communes_year
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,categorie,commune_code_insee,nb_analyses,nb_analyses_not_ok,nb_analyses_ok
2020,cvm,7194,3,0,3
2021,cvm,7194,3,0,3
2022,cvm,7194,11,8,3
2023,cvm,7194,16,11,5
2024,cvm,7194,10,5,5


In [26]:
%%sql --with mesures_cat_communes_year_resultat 
 SELECT 
   *
 FROM
   mesures_cat_communes_year_resultat
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,3,0,3,"<= 0,5 µg/L"
2022,7194,cvm,11,8,3,"> 0,5 µg/L"
2023,7194,cvm,16,11,5,"> 0,5 µg/L"
2024,7194,cvm,10,5,5,"> 0,5 µg/L"


In [27]:
%%sql --with mesures_cat_communes_year_cvm_with_cog 
 SELECT 
   *
 FROM
   mesures_cat_communes_year_cvm_with_cog
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,categorie,nomcommune,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2020,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,11,8,3,"> 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,11,8,3,"> 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,16,11,5,"> 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,16,11,5,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,10,5,5,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,10,5,5,"> 0,5 µg/L"


**Il y a encore des doublon avec le dernier JOIN : plusieurs nom de commune par inseecommune car plusieurs cdreaux**

Faisons le JOIN par communes ET cdreseau : mesures_cat_communes_cdreseau_year_cvm_with_cog

In [28]:
%%sql --with mesures_cat_communes_cdreseau_year_cvm_with_cog 
 SELECT 
   *
 FROM
   mesures_cat_communes_cdreseau_year_cvm_with_cog
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,commune_code_insee,categorie,nomcommune,cdreseau,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,ROCHESSAUVE,7001388,2,0,2,"<= 0,5 µg/L"
2020,7194,cvm,ROCHESSAUVE,7001617,1,0,1,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,7001388,2,0,2,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,7001617,1,0,1,"<= 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,7001617,9,8,1,"> 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,7001388,2,0,2,"<= 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,7001388,2,0,2,"<= 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,7001617,14,11,3,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,7001617,8,5,3,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,7001388,2,0,2,"<= 0,5 µg/L"


In [40]:
%%sql --with mesures_cat_communes_cdreseau_year_cvm_with_cog 
SELECT 
   annee,
   commune_code_insee,	
    categorie,	
    nomcommune,	
    SUM(nb_analyses) AS nb_analyses,
    SUM(nb_analyses_not_ok) AS nb_analyses_not_ok,
    SUM(nb_analyses_ok) AS nb_analyses_ok,
    (CASE WHEN SUM(nb_analyses) = 0 THEN 'Pas recherché'
         WHEN SUM(nb_analyses) > 0  AND SUM(nb_analyses_ok) = 0 THEN 'jamais quantifié'
         WHEN SUM(nb_analyses) > 0  AND SUM(nb_analyses_not_ok) > 1 THEN '> 0,5 µg/L'
         WHEN SUM(nb_analyses) > 0  AND SUM(nb_analyses_ok) > 0 THEN '<= 0,5 µg/L'
         ELSE 'check case when'
    END) AS resultat  
FROM
   mesures_cat_communes_cdreseau_year_cvm_with_cog
WHERE
    commune_code_insee = '07194'
    AND categorie = 'cvm'
GROUP BY  
   1,2,3,4
ORDER BY 
  annee

annee,commune_code_insee,categorie,nomcommune,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat
2020,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2021,7194,cvm,ROCHESSAUVE,3,0,3,"<= 0,5 µg/L"
2022,7194,cvm,ROCHESSAUVE,11,8,3,"> 0,5 µg/L"
2023,7194,cvm,ROCHESSAUVE,16,11,5,"> 0,5 µg/L"
2024,7194,cvm,ROCHESSAUVE,10,5,5,"> 0,5 µg/L"


**Il faudra voir aussi voir comment ajouter les informations COG UTILES**