# Fetching air quality data from the FMI open data timeseries API

[API documentation](https://github.com/fmidev/smartmet-plugin-timeseries/blob/master/docs/Using-the-Timeseries-API.md),
[API examples](https://github.com/fmidev/smartmet-plugin-timeseries/blob/master/docs/Examples.md),
[JSON API example call.](https://opendata.fmi.fi/timeseries?format=json&groupareas=0&producer=airquality_urban&area=Helsinki&param=time,fmisid,PM10_PT1H_avg,PM25_PT1H_avg,O3_PT1H_avg,CO_PT1H_avg,SO2_PT1H_avg,NO2_PT1H_avg,TRSC_PT1H_avg)

In [1]:
import requests
import datetime
import pandas as pd
import numpy as np

In [2]:
end_time = datetime.datetime.utcnow()
start_time = end_time - datetime.timedelta(days=1)

aq_fields = {
    'fmisid': np.int32,
    'time': np.datetime64,
    'AQINDEX_PT1H_avg': np.float64,
    'PM10_PT1H_avg': np.float64,
    'PM25_PT1H_avg': np.float64,
    'O3_PT1H_avg': np.float64,
    'CO_PT1H_avg': np.float64,
    'SO2_PT1H_avg': np.float64,
    'NO2_PT1H_avg': np.float64,
    'TRSC_PT1H_avg': np.float64,
}

url = 'https://opendata.fmi.fi/timeseries'

params = {
    'format': 'json',
    'precision': 'double',
    'groupareas': '0',
    'producer': 'airquality_urban',
    'area': 'Uusimaa',
    'param': ','.join(aq_fields.keys()),
    'starttime': start_time.isoformat(timespec="seconds"),
    'endtime': end_time.isoformat(timespec="seconds"),
    'tz': 'UTC',
}

data = requests.get(url, params=params).json()

In [3]:
df = pd.DataFrame(data).astype(aq_fields)
# df = df.set_index(['fmisid', 'time'])
df[0:10]

Unnamed: 0,fmisid,time,AQINDEX_PT1H_avg,PM10_PT1H_avg,PM25_PT1H_avg,O3_PT1H_avg,CO_PT1H_avg,SO2_PT1H_avg,NO2_PT1H_avg,TRSC_PT1H_avg
0,100662,2022-08-25 09:00:00,2.0,8.4,4.1,61.9,,1.2,5.3,
1,100662,2022-08-25 10:00:00,2.0,10.1,5.9,60.2,,1.1,7.4,
2,100662,2022-08-25 11:00:00,1.0,10.2,5.8,54.2,,0.9,6.4,
3,100662,2022-08-25 12:00:00,1.0,10.6,5.3,44.9,,1.1,8.0,
4,100662,2022-08-25 13:00:00,1.0,13.3,4.6,41.2,,1.2,8.5,
5,100662,2022-08-25 14:00:00,1.0,12.8,4.4,42.0,,1.7,9.3,
6,100662,2022-08-25 15:00:00,1.0,6.8,4.2,46.7,,1.4,8.4,
7,100662,2022-08-25 16:00:00,1.0,5.6,2.8,49.2,,1.2,7.4,
8,100662,2022-08-25 17:00:00,1.0,6.8,3.9,44.1,,1.2,11.5,
9,100662,2022-08-25 18:00:00,1.0,6.6,3.8,50.2,,1.1,8.2,


In [4]:
df.to_parquet('data/airquality.parquet', compression='zstd')

# DuckDB

In [5]:
import duckdb
con = duckdb.connect(database=':memory:')

In [6]:
con.execute('CREATE OR REPLACE TABLE airquality_urban AS SELECT * FROM df')

<duckdb.DuckDBPyConnection at 0x7f138a8f5d30>

In [7]:
df2 = con.execute('SELECT * FROM airquality_urban').fetchdf()
df2.sample(10)

Unnamed: 0,fmisid,time,AQINDEX_PT1H_avg,PM10_PT1H_avg,PM25_PT1H_avg,O3_PT1H_avg,CO_PT1H_avg,SO2_PT1H_avg,NO2_PT1H_avg,TRSC_PT1H_avg
306,104083,2022-08-26 07:00:00,1.0,12.8,5.8,,,,17.1,
253,104048,2022-08-26 02:00:00,1.0,5.1,3.3,,,,1.9,
121,100763,2022-08-25 11:00:00,1.0,7.6,4.0,,,,8.1,
52,100723,2022-08-25 13:00:00,1.0,4.4,2.4,42.3,,0.5,1.2,
302,104083,2022-08-26 03:00:00,1.0,7.9,5.7,,,,5.7,
239,104048,2022-08-25 12:00:00,1.0,7.2,3.1,,,,9.2,
334,107399,2022-08-25 11:00:00,1.0,8.6,4.3,,,,5.4,
179,103139,2022-08-25 21:00:00,1.0,,,38.0,,-0.2,1.2,
200,103140,2022-08-25 19:00:00,1.0,,,,,0.2,,-0.1
206,103140,2022-08-26 01:00:00,1.0,,,,,0.2,,0.2
