# Calcul du nombre de prélèvements CVM non conforme par commune et par année

L'objectif de ce notebook est de partir de la liste de communes cog_communes, et pour chaque commune et chaque année, calculer le nombre de prélèvements non conformes pour le CVM.

Il y aura plusieurs aggrégations à faire :

- commune (inseecommune) peut avoir plusieurs UDIs (cdreseau) **ET** un UDI peut avoir plusieurs communes (inseecommune) 
- un prélèvement (referenceprel) peut être rattaché à plusieurs UDIs (cdreseau)
- un prélèvement (referenceprel) peut être composé de plusieurs paramètres (cdparametresiseeaux) ; mais dans le cas du CVM, il y a un seul paramètre selon la catégorisation de Pauline, donc c'est plus simple




In [1]:
%load_ext sql
%sql duckdb:///../../database/data.duckdb
%config SqlMagic.displaylimit = 10

In [4]:
%%sql
SELECT * FROM int__resultats_udi_communes
LIMIT 5

referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel
400121459,CHINE,0.0,"<=0,1 µg/L",2020,0.1000000014901161,µg/L,pesticides,4000129,4058,2020-04-09 11:49:00
400121459,CHINOME,0.0,"<=0,1 µg/L",2020,0.1000000014901161,µg/L,pesticides,4000129,4058,2020-04-09 11:49:00
400121459,CHLORB,0.0,"<=0,1 µg/L",2020,0.1000000014901161,µg/L,pesticides,4000129,4058,2020-04-09 11:49:00
400121459,CHLPM,0.0,"<=0,1 µg/L",2020,0.1000000014901161,µg/L,pesticides,4000129,4058,2020-04-09 11:49:00
400121459,CHLX,0.0,"<=0,1 µg/L",2020,0.1000000014901161,µg/L,pesticides,4000129,4058,2020-04-09 11:49:00


## Faire la liste complètes des communes (pour chaque catégorie et années)

In [5]:
%%sql --save list_communes
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
)
    
SELECT DISTINCT
    year_cat.annee AS annee, 
    year_cat.categorie AS categorie,
    inseecommune ,
 FROM
      int__lien_commune_cdreseau
 FULL OUTER JOIN
       year_cat
 ON
    int__lien_commune_cdreseau.de_partition = year_cat.annee

annee,categorie,inseecommune
2020,nitrite,56090
2020,nitrite,59026
2020,nitrite,60100
2020,nitrite,60241
2020,nitrite,60311
2020,nitrite,61344
2020,nitrite,61450
2020,nitrite,62180
2020,nitrite,63010
2020,nitrite,63134


In [6]:
%%sql --with list_communes
SELECT
    *
FROM
   list_communes
WHERE
    inseecommune = '07194'
    AND categorie = 'cvm'

annee,categorie,inseecommune
2022,cvm,7194
2024,cvm,7194
2021,cvm,7194
2023,cvm,7194
2020,cvm,7194


## Test cog pour le final

In [7]:
%%sql 
SELECT 
    *
FROM 
  cog_communes
WHERE
    TYPECOM = 'COM'

TYPECOM,COM,REG,DEP,CTCD,ARR,TNCC,NCC,NCCENR,LIBELLE,CAN,COMPARENT,de_partition,de_ingestion_date,de_dataset_datetime
COM,1001,84,1,01D,12,5,ABERGEMENT CLEMENCIAT,Abergement-Clémenciat,L'Abergement-Clémenciat,108,,2024,2025-03-16,20240220
COM,1002,84,1,01D,11,5,ABERGEMENT DE VAREY,Abergement-de-Varey,L'Abergement-de-Varey,101,,2024,2025-03-16,20240220
COM,1004,84,1,01D,11,1,AMBERIEU EN BUGEY,Ambérieu-en-Bugey,Ambérieu-en-Bugey,101,,2024,2025-03-16,20240220
COM,1005,84,1,01D,12,1,AMBERIEUX EN DOMBES,Ambérieux-en-Dombes,Ambérieux-en-Dombes,122,,2024,2025-03-16,20240220
COM,1006,84,1,01D,11,1,AMBLEON,Ambléon,Ambléon,104,,2024,2025-03-16,20240220
COM,1007,84,1,01D,11,1,AMBRONAY,Ambronay,Ambronay,101,,2024,2025-03-16,20240220
COM,1008,84,1,01D,11,1,AMBUTRIX,Ambutrix,Ambutrix,101,,2024,2025-03-16,20240220
COM,1009,84,1,01D,11,1,ANDERT ET CONDON,Andert-et-Condon,Andert-et-Condon,104,,2024,2025-03-16,20240220
COM,1010,84,1,01D,11,1,ANGLEFORT,Anglefort,Anglefort,110,,2024,2025-03-16,20240220
COM,1011,84,1,01D,14,1,APREMONT,Apremont,Apremont,114,,2024,2025-03-16,20240220


In [8]:
%%sql 
WITH
cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS inseecommune,
      LIBELLE AS name_commune,
    FROM 
      cog_communes
    WHERE
        TYPECOM = 'COM'
)
    
SELECT
        inseecommune,
        COUNT(code_departement) AS nb_code_departement,
        COUNT(code_region) AS nb_code_region,
        COUNT(name_commune) AS nb_name_commune,
FROM
    cog
GROUP BY 
       inseecommune
HAVING 
    nb_code_departement > 1 OR nb_code_region >1 OR nb_name_commune>1

inseecommune,nb_code_departement,nb_code_region,nb_name_commune


## Les résultats par communes - Bilan annuel

###  SQL resultats_ALL_communes : Join avec list_communes_uid pour avoir la liste complète

###  Join avec cog pour avoir le nom des communes

In [18]:
%%sql --save resultats_ALL_communes_name
WITH 
/* list_communes_uid  */
annees AS (
    SELECT unnest(generate_series(2020, 2024)) as annee
    ),

cat AS (
    SELECT categorie FROM int__mapping_category_simple GROUP BY 1 
    ),

year_cat AS (   
    SELECT
     annee, categorie
    FROM 
    annees
    CROSS JOIN
    cat 
),

list_communes_uid AS (    
SELECT DISTINCT
    year_cat.annee AS annee, 
    year_cat.categorie AS categorie,
    inseecommune ,
 FROM
      int__lien_commune_cdreseau
 FULL OUTER JOIN
       year_cat
 ON
    int__lien_commune_cdreseau.de_partition = year_cat.annee
),
/* END  list_communes_uid  */


/* resultats_udi_communes */
    communes_year AS (
    SELECT 
     de_partition as annee,
     categorie,
     inseecommune,
     SUM(1) AS nb_analyses,
     SUM(case
            when limitequal_float is not NULL and valtraduite >= limitequal_float then 1
            else 0
        end ) AS nb_analyses_not_ok,
     SUM(case
            when limitequal_float is not NULL and valtraduite < limitequal_float then 1
            else 0
        end ) AS nb_analyses_ok,    
    FROM
      int__resultats_udi_communes
    GROUP BY 
        1,2,3
),

resultats_communes AS ( 
    SELECT
    annee,
    inseecommune,
    categorie,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    CASE WHEN coalesce(nb_analyses,0) = 0 THEN 'Pas recherché'
         WHEN coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) = 0 THEN 'jamais quantifié'
         WHEN categorie='cvm' AND coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_not_ok,0) > 1 THEN '> 0,5 µg/L'
         WHEN categorie='cvm' AND coalesce(nb_analyses,0) > 0  AND coalesce(nb_analyses_ok,0) > 0 THEN '<= 0,5 µg/L'
         ELSE 'Not CVM - other'
    END AS resultat
FROM  
    communes_year
ORDER BY 
    annee
    ),
/* resultats_udi_communes END */


    
/* resultats_ALL_communes */
resultats_ALL_communes AS (
SELECT
    list_communes_uid.annee,
    list_communes_uid.inseecommune,
    list_communes_uid.categorie,
    coalesce(nb_analyses,0) AS nb_analyses,
    coalesce(nb_analyses_not_ok,0) AS nb_analyses_not_ok ,
    coalesce(nb_analyses_ok,0) AS nb_analyses_ok,
    coalesce(resultat,'Pas recherché') AS resultat
FROM  
 list_communes_uid
LEFT JOIN 
 resultats_communes
ON
    list_communes_uid.annee =  resultats_communes.annee
    AND list_communes_uid.categorie =  resultats_communes.categorie
    AND  list_communes_uid.inseecommune  =  resultats_communes.inseecommune
),
/* resultats_ALL_communes END */

cog AS (
    SELECT 
      DEP AS code_departement,
      REG AS code_region,
      COM AS inseecommune,
      LIBELLE AS name_commune,
    FROM 
      cog_communes
    WHERE
        TYPECOM = 'COM'
)
    
SELECT
  resultats_ALL_communes.*,
  code_departement,
  code_region,
  name_commune,
FROM
  resultats_ALL_communes
LEFT JOIN  
  cog
ON 
  resultats_ALL_communes.inseecommune = cog.inseecommune

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

annee,inseecommune,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat,code_departement,code_region,name_commune
2020,56090,nitrite,246,0,246,Not CVM - other,56,53,Inzinzac-Lochrist
2020,59026,nitrite,14,0,14,Not CVM - other,59,32,Aubigny-au-Bac
2020,60100,nitrite,16,0,16,Not CVM - other,60,32,Brasseuse
2020,60241,nitrite,22,0,22,Not CVM - other,60,32,Fontaine-Chaalis
2020,60311,nitrite,6,0,6,Not CVM - other,60,32,La Hérelle
2020,61344,nitrite,44,0,44,Not CVM - other,61,28,Rânes
2020,61450,nitrite,32,0,32,Not CVM - other,61,28,Saint-Quentin-de-Blavou
2020,62180,nitrite,7,0,7,Not CVM - other,62,32,Brias
2020,63010,nitrite,15,0,15,Not CVM - other,63,84,Arlanc
2020,63134,nitrite,7,0,7,Not CVM - other,63,84,Dauzat-sur-Vodable


In [19]:
%%sql --with resultats_ALL_communes_name
SELECT 
    *
FROM
    resultats_ALL_communes_name
WHERE
     categorie = 'cvm'

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

annee,inseecommune,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat,code_departement,code_region,name_commune
2020,54018,cvm,7,0,7,"<= 0,5 µg/L",54,44,Anoux
2020,54027,cvm,2,0,2,"<= 0,5 µg/L",54,44,Atton
2020,54054,cvm,2,0,2,"<= 0,5 µg/L",54,44,Bayon
2020,54071,cvm,3,0,3,"<= 0,5 µg/L",54,44,Bezange-la-Grande
2020,54113,cvm,4,0,4,"<= 0,5 µg/L",54,44,Champenoux
2020,54153,cvm,3,0,3,"<= 0,5 µg/L",54,44,Dampvitoux
2020,54154,cvm,1,0,1,"<= 0,5 µg/L",54,44,Deneuvre
2020,54164,cvm,1,0,1,"<= 0,5 µg/L",54,44,Dommarie-Eulmont
2020,54175,cvm,3,0,3,"<= 0,5 µg/L",54,44,Einvaux
2020,54183,cvm,3,0,3,"<= 0,5 µg/L",54,44,Essey-la-Côte


# Check

In [20]:
%%sql
SELECT
    de_partition as year,
    COUNT(DISTINCT inseecommune) AS nb_inseecommune
FROM 
 edc_communes
GROUP BY 
    1 
ORDER BY 
   1

year,nb_inseecommune
2020,34788
2021,34833
2022,34874
2023,34852
2024,34809


In [22]:
%%sql --with resultats_ALL_communes_name  
 SELECT 
    annee,
    COUNT(DISTINCT inseecommune) AS nb_inseecommune
 FROM
   resultats_ALL_communes_name
GROUP BY 
    1
ORDER BY 
   1

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

annee,nb_inseecommune
2020,34788
2021,34833
2022,34874
2023,34852
2024,34809


#### commune_code_insee = '07194'

In [33]:
%%sql
 SELECT 
   de_partition, 
   COUNT(*)
 FROM
   int__resultats_udi_communes
WHERE
    inseecommune = '07194'
    AND categorie = 'cvm'
GROUP BY 
    de_partition
ORDER BY 
    de_partition

de_partition,count_star()
2020,3
2021,3
2022,11
2023,16
2024,10


In [25]:
%%sql --with resultats_ALL_communes_name 
 SELECT 
   *
 FROM
   resultats_ALL_communes_name
WHERE
    inseecommune = '07194'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,inseecommune,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat,code_departement,code_region,name_commune
2020,7194,cvm,3,0,3,"<= 0,5 µg/L",7,84,Rochessauve
2021,7194,cvm,3,0,3,"<= 0,5 µg/L",7,84,Rochessauve
2022,7194,cvm,11,8,3,"> 0,5 µg/L",7,84,Rochessauve
2023,7194,cvm,16,11,5,"> 0,5 µg/L",7,84,Rochessauve
2024,7194,cvm,10,5,5,"> 0,5 µg/L",7,84,Rochessauve


In [26]:
%%sql --with resultats_ALL_communes_name 
 SELECT 
   resultat, MIN(inseecommune)
 FROM
   resultats_ALL_communes_name
WHERE 
    categorie = 'cvm'
GROUP BY 1

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

resultat,min(inseecommune)
"> 0,5 µg/L",4018
Pas recherché,1002
"<= 0,5 µg/L",1001
jamais quantifié,8031


#### commune_code_insee = '01002'

In [27]:
%%sql 
 SELECT 
   *
 FROM
   int__resultats_udi_communes
WHERE
    inseecommune = '01002'
    AND categorie = 'cvm'
ORDER BY 
    de_partition

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel
100130490,CLVYL,0.0,<=0.5 µg/L,2022,0.5,µg/L,cvm,1000369,1002,2022-04-26 09:56:00
100139937,CLVYL,0.0,<=0.5 µg/L,2024,0.5,µg/L,cvm,1000369,1002,2024-03-26 15:10:00


In [28]:
%%sql --with resultats_ALL_communes_name 
 SELECT 
   *
 FROM
   resultats_ALL_communes_name
WHERE
    inseecommune = '01002'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,inseecommune,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat,code_departement,code_region,name_commune
2020,1002,cvm,0,0,0,Pas recherché,1,84,L'Abergement-de-Varey
2021,1002,cvm,0,0,0,Pas recherché,1,84,L'Abergement-de-Varey
2022,1002,cvm,1,0,1,"<= 0,5 µg/L",1,84,L'Abergement-de-Varey
2023,1002,cvm,0,0,0,Pas recherché,1,84,L'Abergement-de-Varey
2024,1002,cvm,1,0,1,"<= 0,5 µg/L",1,84,L'Abergement-de-Varey


#### commune_code_insee = '08031'

In [29]:
%%sql 
 SELECT 
   *
 FROM
   int__resultats_udi_communes
WHERE
    inseecommune = '08031'
    AND categorie = 'cvm'
ORDER BY 
    de_partition

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

referenceprel,cdparametresiseeaux,valtraduite,limitequal,de_partition,limitequal_float,unite,categorie,cdreseau,inseecommune,datetimeprel
800092855,CLVYL,0.0,<=0.5 µg/L,2020,0.5,µg/L,cvm,8000543,8031,2020-12-17 12:45:00
800092483,CLVYL,0.7,<=0.5 µg/L,2020,0.5,µg/L,cvm,8000543,8031,2020-11-17 12:44:00
800094118,CLVYL,0.0,<=0.5 µg/L,2021,0.5,µg/L,cvm,8000543,8031,2021-05-27 09:19:00
800102498,CLVYL,0.0,<=0.5 µg/L,2023,0.5,µg/L,cvm,8000543,8031,2023-09-22 14:14:00
800106109,CLVYL,0.9,<=0.5 µg/L,2024,0.5,µg/L,cvm,8000543,8031,2024-09-24 12:24:00
800105054,CLVYL,0.8,<=0.5 µg/L,2024,0.5,µg/L,cvm,8000543,8031,2024-06-10 12:21:00


In [30]:
%%sql --with resultats_ALL_communes_name 
 SELECT 
   *
 FROM
   resultats_ALL_communes_name
WHERE
    inseecommune = '08031'
    AND categorie = 'cvm'
ORDER BY 
    annee

annee,inseecommune,categorie,nb_analyses,nb_analyses_not_ok,nb_analyses_ok,resultat,code_departement,code_region,name_commune
2020,8031,cvm,2,1,1,"<= 0,5 µg/L",8,44,Aure
2021,8031,cvm,1,0,1,"<= 0,5 µg/L",8,44,Aure
2022,8031,cvm,0,0,0,Pas recherché,8,44,Aure
2023,8031,cvm,1,0,1,"<= 0,5 µg/L",8,44,Aure
2024,8031,cvm,2,2,0,jamais quantifié,8,44,Aure


# Final : ana__resultats_communes

In [10]:
%%sql
WITH
annees AS (
    SELECT unnest(generate_series(2020, 2024)) AS annee
),

cat AS (
    SELECT DISTINCT categorie
    FROM
        int__mapping_category_simple
),

year_cat AS (
    SELECT
        annees.annee,
        cat.categorie
    FROM
        annees
    CROSS JOIN
        cat
),

list_communes_uid AS (
    SELECT DISTINCT
        year_cat.annee,
        year_cat.categorie,
        com.inseecommune
    FROM
        int__lien_commune_cdreseau AS com
    FULL OUTER JOIN
        year_cat
        ON
            com.de_partition = year_cat.annee
),

int_communes_year AS (
    SELECT
        de_partition AS annee,
        categorie,
        inseecommune,
        sum(1) AS nb_analyses,
        sum(CASE
            WHEN
                limitequal_float IS NOT NULL AND valtraduite >= limitequal_float
                THEN 1
            ELSE 0
        END) AS nb_analyses_not_ok,
        sum(CASE
            WHEN
                limitequal_float IS NOT NULL AND valtraduite < limitequal_float
                THEN 1
            ELSE 0
        END) AS nb_analyses_ok
    FROM
       int__resultats_udi_communes
    GROUP BY
        annee,
        categorie,
        inseecommune
),

int__resultats_communes AS (
    SELECT
        annee,
        inseecommune,
        categorie,
        coalesce(nb_analyses, 0) AS nb_analyses,
        coalesce(nb_analyses_not_ok, 0) AS nb_analyses_not_ok,
        coalesce(nb_analyses_ok, 0) AS nb_analyses_ok,
        CASE
            WHEN coalesce(nb_analyses, 0) = 0 THEN 'Pas recherché'
            WHEN
                coalesce(nb_analyses, 0) > 0 AND coalesce(nb_analyses_ok, 0) = 0
                THEN 'jamais quantifié'
            WHEN
                categorie = 'cvm'
                AND coalesce(nb_analyses, 0) > 0
                AND coalesce(nb_analyses_not_ok, 0) > 1
                THEN '> 0,5 µg/L'
            WHEN
                categorie = 'cvm'
                AND coalesce(nb_analyses, 0) > 0
                AND coalesce(nb_analyses_ok, 0) > 0
                THEN '<= 0,5 µg/L'
            ELSE 'Not CVM - other'
        END AS resultat
    FROM
        int_communes_year
),

int__resultats_all_communes AS (
    SELECT
        list_communes_uid.annee,
        list_communes_uid.inseecommune,
        list_communes_uid.categorie,
        coalesce(int__resultats_communes.nb_analyses, 0) AS nb_analyses,
        coalesce(int__resultats_communes.nb_analyses_not_ok, 0)
            AS nb_analyses_not_ok,
        coalesce(int__resultats_communes.nb_analyses_ok, 0) AS nb_analyses_ok,
        coalesce(int__resultats_communes.resultat, 'Pas recherché') AS resultat
    FROM
        list_communes_uid
    LEFT JOIN
        int__resultats_communes
        ON
            list_communes_uid.annee = int__resultats_communes.annee
            AND list_communes_uid.categorie = int__resultats_communes.categorie
            AND list_communes_uid.inseecommune
            = int__resultats_communes.inseecommune
),

cog AS (
    SELECT
        dep AS code_departement,
        reg AS code_region,
        com AS inseecommune,
        libelle AS commune_nom
    FROM
        stg_communes__cog
    WHERE
        typecom = 'COM'
)

SELECT
    int__resultats_all_communes.inseecommune,
    cog.commune_nom,
    'bilan annuel' AS periode,
    int__resultats_all_communes.categorie,
    int__resultats_all_communes.resultat,
    CAST(CONCAT(int__resultats_all_communes.annee, '-01-01') AS DATE) AS date_prvl
FROM
    int__resultats_all_communes
LEFT JOIN
    cog
    ON
        int__resultats_all_communes.inseecommune = cog.inseecommune


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

inseecommune,commune_nom,periode,categorie,resultat,date_prvl
54488,Saint-Sauveur,bilan annuel,nitrite,Not CVM - other,2020-01-01
56248,Surzur,bilan annuel,nitrite,Not CVM - other,2020-01-01
57030,Arry,bilan annuel,nitrite,Not CVM - other,2020-01-01
57305,Havange,bilan annuel,nitrite,Not CVM - other,2020-01-01
59097,Boursies,bilan annuel,nitrite,Not CVM - other,2020-01-01
59285,Haspres,bilan annuel,nitrite,Not CVM - other,2020-01-01
62012,Agnières,bilan annuel,nitrite,Not CVM - other,2020-01-01
62360,Fréthun,bilan annuel,nitrite,Not CVM - other,2020-01-01
62372,Givenchy-le-Noble,bilan annuel,nitrite,Not CVM - other,2020-01-01
62670,Preures,bilan annuel,nitrite,Not CVM - other,2020-01-01
