In [None]:
# import DataFrame from pandas in order to convert the sql query in a DataFrame
from pandas import DataFrame

In [None]:
# The ipython-sql library is loaded using the %load_ext iPython extension
%load_ext sql

In [None]:
year = '2018'

In [None]:
%%sql sqlite:///data/prtr_en.db sql_query_result << 

SELECT 
    facilities.name AS LPS, 
    activities.business_sector AS business_sector, 
    activities.prtr_key AS 'PRTR activity',
    nace_code || ': ' || nace_text AS 'NACE',
    'TODO' AS 'GNFR', 
    facilities.administrative_number AS 'E-PRTR/PRTR Facility ID', 
    'TODO' AS 'Height class',
    facilities.wgs84_x AS 'Longitude (deg)', 
    facilities.wgs84_y AS 'Latitude (deg)',

    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Nitrogen oxides (NOx/NO2)') AS "NOx (as NO2) (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Non-methane volatile organic compounds (NMVOC)') 
    AS "NMVOC (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Sulphur oxides (SOx/SO2)') AS "SOx (as SO2) (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Ammonia (NH3)') AS "NH3 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Particulate matter (PM2.5)') AS "PM2.5 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Particulate matter (PM10)') AS "PM10 (kt)",
    SUM(releases.annual_load/1000000) FILTER (WHERE substance_name = 'Carbon monoxide (CO)') AS "CO (kt)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Lead and compounds (as Pb)') AS "Pb (t)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Cadmium and compounds (as Cd)') AS "Cd (t)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Mercury and compounds (as Hg)') AS "Hg (t)",
    SUM(releases.annual_load*1000) FILTER (WHERE substance_name = 'PCDD + PCDF (dioxins + furans)(as Teq)') 
    AS "PCDD/ PCDF (dioxins/ furans)(g I-Teq)",
    SUM(releases.annual_load/1000) FILTER (WHERE substance_name = 'Polycyclic aromatic hydrocarbons (PAHs)') AS "PAHs (t)",
    SUM(releases.annual_load) FILTER (WHERE substance_name = 'Hexachlorobenzene (HCB)') AS "HCB (kg)",
    SUM(releases.annual_load) FILTER (WHERE substance_name = 'Polychlorinated biphenyls') AS "PCBs (kg)"

FROM facilities
    JOIN releases ON facilities.id = releases.facility_ID
    JOIN activities ON releases.facility_ID = activities.facility_ID

WHERE facilities.year = :year AND releases.compartment = 'Air' AND activities.main_activity = 1
GROUP BY facilities.id
ORDER BY name

In [None]:
data = sql_query_result.DataFrame().fillna("")
data

In [None]:
data.to_csv('output/Convert ePRTR data to CLRTAP LPS.csv', sep='|', index=False, columns=['LPS', 'GNFR', 'E-PRTR/PRTR Facility ID', 'Height class', 'Longitude (deg)', 'Latitude (deg)', 'NOx (as NO2) (kt)', 'NMVOC (kt)', 'SOx (as SO2) (kt)', 'NH3 (kt)', 'PM2.5 (kt)', 'PM10 (kt)', 'CO (kt)', 'Pb (t)', 'Cd (t)', 'Hg (t)', 'PCDD/ PCDF (dioxins/ furans)(g I-Teq)', 'PAHs (t)', 'HCB (kg)', 'PCBs (kg)'])