In [None]:
import sqlite3 as sql3

In [None]:
# create the connection object
conn = sql3.connect('data/prtr_en.db')

In [None]:
# you will then get a connection object “conn”, which represents the connection to the database
conn

In [None]:
# you can create a database cursor, using the connection object’s cursor method
cursor = conn.execute("SELECT id from facilities")
cursor

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame

In [None]:
# do not run this cell, this is a sql-query

SELECT f.name AS LPS, a.business_sector AS GNFR, f.administrative_number AS E_PRTR_ID, f.wgs84_x AS Longitude, f.wgs84_y AS Latitude,

SUM( CASE WHEN substance_name = 'Nitrogen oxides (NOx/NO2)' THEN annual_load/1000000 ELSE NULL end)
AS  'NOx/NO2 (kt)',
SUM( CASE WHEN substance_name = 'Non-methane volatile organic compounds (NMVOC)' THEN annual_load/1000000 ELSE NULL end)
AS  'Non-methane volatile organic compounds (NMVOC) (kt)',
SUM( CASE WHEN substance_name = 'Sulphur oxides (SOx/SO2)' THEN annual_load/1000000 ELSE NULL end) 
AS  'SOx/SO2 (kt)',
SUM( CASE WHEN substance_name = 'Ammonia (NH3)' THEN annual_load/1000000 ELSE NULL end) 
AS  'NH3 (kt)',
SUM( CASE WHEN substance_name = 'Particulate matter (PM2.5)' THEN annual_load/1000000 ELSE NULL end) 
AS  'PM2.5 (kt)',
SUM( CASE WHEN substance_name = 'Particulate matter (PM10)' THEN annual_load/1000000 ELSE NULL end) 
AS  'PM10 (kt)',
SUM( CASE WHEN substance_name = 'Carbon monoxide (CO)' THEN annual_load/1000000 ELSE NULL end) 
AS  'CO (kt)',
SUM( CASE WHEN substance_name = 'Lead and compounds (as Pb)' THEN annual_load/1000 ELSE NULL end) 
AS  'Pb (t)',
SUM( CASE WHEN substance_name = 'Cadmium and compounds (as Cd)' THEN annual_load/1000 ELSE NULL end) 
AS  'Cd (t)',
SUM( CASE WHEN substance_name = 'Mercury and compounds (as Hg)' THEN annual_load/1000 ELSE NULL end) 
AS  'Hg (t)',
SUM( CASE WHEN substance_name = 'PCDD + PCDF (dioxins + furans)(as Teq)' THEN annual_load*1000 ELSE NULL end) 
AS  'PCDD/ PCDF (dioxins/ furans)(g I-Teq)',
SUM( CASE WHEN substance_name = 'Polycyclic aromatic hydrocarbons (PAHs)' THEN annual_load/1000 ELSE NULL end) 
AS  'PAHs (t)',
SUM( CASE WHEN substance_name = 'Hexachlorobenzene (HCB)' THEN annual_load ELSE NULL end) 
AS  'HCB (kg)',
SUM( CASE WHEN substance_name = 'Polychlorinated biphenyls ' THEN annual_load ELSE NULL end) 
AS  'PCBs(kg)'
                            
FROM facilities AS f
JOIN releases AS r ON f.id = r.facility_ID
JOIN activities AS a USING (facility_ID)

WHERE a.year = '2015' AND r.compartment = 'Air' AND main_activity = 1 

In [None]:
# in order to convert the above-noted sql-query in a dataframe you can use the read_sql_query function

data = pd.read_sql_query('SELECT f.name AS LPS, a.business_sector AS GNFR, \
f.administrative_number AS E_PRTR_ID, f.wgs84_x AS Longitude, f.wgs84_y AS Latitude, \
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Nitrogen oxides (NOx/NO2)") AS "NOx/NO2 (kt)", \
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Non-methane volatile organic compounds (NMVOC)") AS "Non-methane volatile organic compounds (NMVOC) (kt)",\
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Sulphur oxides (SOx/SO2)") AS "SOx/SO2 (kt)",\
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Ammonia (NH3)") AS "NH3 (kt)",\
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Particulate matter (PM2.5)") AS "PM2.5 (kt)",\
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Particulate matter (PM10)") AS "PM10 (kt)",\
SUM(r.annual_load/1000000) FILTER (WHERE substance_name = "Carbon monoxide (CO)") AS "CO (kt)",\
SUM(r.annual_load/1000) FILTER (WHERE substance_name = "Lead and compounds (as Pb)") AS "Pb (t)",\
SUM(r.annual_load/1000) FILTER (WHERE substance_name = "Cadmium and compounds (as Cd)") AS "Cd (t)",\
SUM(r.annual_load/1000) FILTER (WHERE substance_name = "Mercury and compounds (as Hg)") AS "Hg (t)",\
SUM(r.annual_load*1000) FILTER (WHERE substance_name = "PCDD + PCDF (dioxins + furans)(as Teq)") AS "PCDD/ PCDF (dioxins/ furans)(g I-Teq)",\
SUM(r.annual_load/1000) FILTER (WHERE substance_name = "Polycyclic aromatic hydrocarbons (PAHs)") AS "PAHs (t)",\
SUM(r.annual_load) FILTER (WHERE substance_name = "Hexachlorobenzene (HCB)") AS "HCB (kg)",\
SUM(r.annual_load) FILTER (WHERE substance_name = "Polychlorinated biphenyls") AS "PCBs (kg)"\
FROM facilities AS f \
INNER JOIN releases AS r ON f.id = r.facility_ID \
INNER JOIN activities AS a ON r.facility_ID = a.facility_ID \
WHERE a.year = 2015 AND r.compartment = "Air" AND a.main_activity = 1', conn)
data

In [None]:
# in order to remove the NaN's you can use fillna()
data.fillna('')

In [None]:
# data export into a csv file
data.to_csv('output/LPS_query_data.csv', index=False)

In [None]:
# don't forget to close the database connection:
conn.close()