In [None]:
import sqlite3 as sql3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame

%load_ext sql

In [None]:
conn = sql3.connect('data/prtr_en.db')

In [None]:
cursor = conn.execute('')

### precondition 1 - to retrieve data for air pollutants compartment 'air' has to be set

In [None]:
# example:

In [None]:
SELECT substance_name, annual_load/1000000 AS "annual_load/1000000" FROM releases, activities
WHERE substance_name = 'Nitrogen oxides (NOx/NO2)' AND releases.year = 2018 AND business_sector = 'Chemical industry'
AND compartment = "Air" 
ORDER BY annual_load

### precondition 2 - facilities.year == releases.year for all joins on facilities.id

In [None]:
# the year in the table facilities should be the same as in the table releases
# example - the following two queries should have the same results:

SELECT NAME, facilities.YEAR
FROM facilities
    JOIN releases ON facilities.id = releases.facility_ID
    JOIN activities ON releases.facility_ID = activities.facility_ID
ORDER BY name


SELECT NAME, releases.YEAR
FROM facilities
    JOIN releases ON facilities.id = releases.facility_ID
    JOIN activities ON releases.facility_ID = activities.facility_ID
ORDER BY name

### precondition 3 - all substance names are correctly register, no misspellings

In [None]:
# the substance_names are:
'''
substance_names:

Nitrogen oxides (NOx/NO2)
Non-methane volatile organic compounds (NMVOC)
Sulphur oxides (SOx/SO2)
Ammonia (NH3)
Particulate matter (PM2.5)
Particulate matter (PM10)
Carbon monoxide (CO)
Lead and compounds (as Pb)
Cadmium and compounds (as Cd)
Mercury and compounds (as Hg)
PCDD + PCDF (dioxins + furans)(as Teq)
Polycyclic aromatic hydrocarbons (PAHs)
Hexachlorobenzene (HCB)
Polychlorinated biphenyls (PCBs)
'''

### preconditon 4 - depending on the data type (TEXT) in the database, parentheses '' must be used for data queries

In [None]:
# example:

business_sector = 'Chemical industry'
name = 'afu GmbH'
substance_name = 'Nitrogen oxides (NOx/NO2)'

### precondition 5 - all release annual load given in kg

In [None]:
# annual_load can be converted to t as follows:

annual_load/1000

# annual_load can be converted to kt as follows:

annual_load/1000000

### precondition 6 - GROUP BY facilities_id and main_activity = 1

In [None]:
# when filtering for the LPS and the associated further data, the GROUP BY statement must be used for the facilities_id 
# and main_activity=1 in order to avoid double output of the LPS

In [None]:
%%sql sqlite:///data/prtr_en.db sql_query_result << 

SELECT 
    facilities.name AS LPS, 
    activities.business_sector AS business_sector, 
    activities.prtr_key AS 'PRTR activity',
    nace_code || ': ' || nace_text AS 'NACE',
    activities.prtr_key AS 'GNFR', 
    facilities.administrative_number AS 'E-PRTR/PRTR Facility ID', 
    'TODO' AS 'Height class',
    facilities.wgs84_x AS 'Longitude (deg)', 
    facilities.wgs84_y AS 'Latitude (deg)',

    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Nitrogen oxides (NOx/NO2)') AS "NOx (as NO2) (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Non-methane volatile organic compounds (NMVOC)') 
    AS "NMVOC (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Sulphur oxides (SOx/SO2)') AS "SOx (as SO2) (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Ammonia (NH3)') AS "NH3 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Particulate matter (PM2.5)') AS "PM2.5 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Particulate matter (PM10)') AS "PM10 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Carbon monoxide (CO)') AS "CO (kt)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Lead and compounds (as Pb)') AS "Pb (t)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Cadmium and compounds (as Cd)') AS "Cd (t)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Mercury and compounds (as Hg)') AS "Hg (t)",
    SUM(releases.annual_load*1000) FILTER (WHERE substance_name = 'PCDD + PCDF (dioxins + furans)(as Teq)') 
    AS "PCDD/ PCDF (dioxins/ furans)(g I-Teq)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Polycyclic aromatic hydrocarbons (PAHs)') AS "PAHs (t)",
    SUM(releases.annual_load) FILTER (WHERE substance_name = 'Hexachlorobenzene (HCB)') AS "HCB (kg)",
    SUM(releases.annual_load) FILTER (WHERE substance_name = 'Polychlorinated biphenyls') AS "PCBs (kg)"

FROM facilities
    JOIN releases ON facilities.id = releases.facility_ID
    JOIN activities ON releases.facility_ID = activities.facility_ID

WHERE facilities.year = :year AND releases.compartment = 'Air' AND activities.main_activity = 1
GROUP BY facilities.id
ORDER BY name

### precondition 7 - to use '.' and not ',' when filtering by data with datatype REAL  

In [None]:
# this is the case, for example, with wgs84_x and wgs84_y and with annual_load
# example:

SELECT wgs84_x, name
FROM facilities
WHERE wgs84_x BETWEEN 6.0 AND 6.5