### 1. Import pyebas

In [1]:
from pyebas import *

### 2. Download EBAS data (.nc files)

In [2]:
# set selection conditions
# if you need the whole EBAS database, set conditions as None
conditions = {
    "start_year": 1990,
    "end_year": 2021,
    "site": ['ES0010R', 'ES0011R'],
    "matrix": ['air'],
    "components": ['NOx'],
}
# set local stroage path
db_dir = r'ebas_db'
downloader = EbasDownloader(loc=db_dir)
# download requires multiprocessing, error may occurs because of multiprocessing
# use command line or Jupyter Notebook to prevent errors
downloader.get_raw_files(conditions=conditions, download=True)

Make data folder ebas_db\raw_data...
0 raw data (*.nc) files have been downloaded.
Requesting data from ebas sever...
13126 files found on ftp server.
0 files need to be deleted...


selecting ftp files...: 100%|██████████| 13126/13126 [00:00<00:00, 375040.43it/s]

5 files need to be downloaded...
Start downloading files...
Using 5 threads...






Download completed.


### 3. Export to .csv file

In [3]:
# export all the downloaded .nc files in the output path to .csv 
# important: .csv file might be very large.
csv_exporter = csvExporter(loc=db_dir)
csv_exporter.export_csv('export.csv')

Processing files...: 100%|██████████| 5/5 [00:00<00:00, 19.52it/s]


Exporting...
Data has been exported to ebas_db\export.csv.


### 4. Create local database

In [4]:
# set local stroage path, must be the same as previous path
db_dir = r'ebas_db'
# local database object
db = EbasDB(dir=db_dir, dump='xz', detailed=True)
# create/update database with new files
db.update_db()

Make data folder ebas_db\dumps...
Gathering site information...
Using 5 threads...


100%|██████████| 5/5 [00:01<00:00,  3.76it/s]


Collected site number: 2
No bad files were found.
creating value index...
Dumping data to to 'value_index.xz'...
Dumping data to to 'site_index.xz'...
Importing datafile for each site...
Using 2 threads...


100%|██████████| 2/2 [00:01<00:00,  1.31it/s]


### 5.Open local database

In [2]:
# set local stroage path
db_dir = r'ebas_db'
# local database object
db = EbasDB(dir=db_dir, dump='xz', detailed=True)
# open database if it is created
db.init_db()

  0%|          | 0/2 [00:00<?, ?it/s]

init database...
Load value index...
Load site index...
Load site data...
Using 2 threads...


100%|██████████| 2/2 [00:01<00:00,  1.03it/s]


2          sites included in current database.
3          components included in current database.
1          matrix included in current database.
1          country included in current database.
Database is loaded.


### 6. Query data from local database as pandas.DataFrame

In [11]:
condition = {
    "id":["AM0001R", "EE0009R", 'ES0010R', 'ES0011R'],
    "component":["NOx", "nitrate", "nitric_acid"],
    "matrix":["air", "aerosol"],
    "stat":['arithmetic mean',"median"],
    "st":np.datetime64("1970-01-01"),
    "ed":np.datetime64("2021-10-01"),
    # if you want to include all, just remove the condition
    #"country":["Denmark","France"],
}
df = db.query(condition, use_number_indexing=False)
df.head(20)

seraching...: 100%|██████████| 2/2 [00:00<?, ?it/s]


Gathering data to dataframe...


100%|██████████| 2/2 [00:00<00:00, 333.34it/s]


Unnamed: 0,st,ed,val,site,component,unit,matrix
0,2004-01-01 00:00:00,2004-01-01 01:00:00,1.27,ES0010R,NOx,ug N/m3,air
1,2004-01-01 01:00:00,2004-01-01 02:00:00,1.3,ES0010R,NOx,ug N/m3,air
2,2004-01-01 02:00:00,2004-01-01 03:00:00,1.17,ES0010R,NOx,ug N/m3,air
3,2004-01-01 03:00:00,2004-01-01 04:00:00,0.88,ES0010R,NOx,ug N/m3,air
4,2004-01-01 04:00:00,2004-01-01 05:00:00,1.09,ES0010R,NOx,ug N/m3,air
5,2004-01-01 05:00:00,2004-01-01 06:00:00,2.65,ES0010R,NOx,ug N/m3,air
6,2004-01-01 06:00:00,2004-01-01 07:00:00,3.07,ES0010R,NOx,ug N/m3,air
7,2004-01-01 07:00:00,2004-01-01 08:00:00,1.1,ES0010R,NOx,ug N/m3,air
8,2004-01-01 08:00:00,2004-01-01 09:00:00,0.85,ES0010R,NOx,ug N/m3,air
9,2004-01-01 09:00:00,2004-01-01 10:00:00,0.66,ES0010R,NOx,ug N/m3,air


### 7. Access detail information

In [10]:
# access information for one site
db.site_index["ES0011R"]
db.site_index["ES0011R"]["components"].keys()
db.site_index["ES0011R"]["files"].keys()

dict_keys(['ES0011R.20130101000000.20150109112917.chemiluminescence_photolytic.NOx.air.1y.1h.ES04L_TIN-009.ES04L_chemilum..nc'])

### 8. Get summary

In [3]:
# get summary information
db.list_sites()
# possible keys are: "id","name","country","station_setting", "lat", "lon","alt","land_use", "file_num","components"
db.list_sites(keys=["name","lat","lon"])
# if components are selected, set list_time=True to see the starting and ending time
db.list_sites(keys=["name", "components"], list_time=True)

[{'name': 'Cabo de Creus',
  'components': [('NOx',
    'air',
    '1h',
    numpy.datetime64('2004-01-01T00:00:00.000000000'),
    numpy.datetime64('2008-01-01T00:00:00.000000000')),
   ('NOx',
    'air',
    '1h',
    numpy.datetime64('2008-01-01T00:00:00.000000000'),
    numpy.datetime64('2011-01-01T00:00:00.000000000')),
   ('NOx',
    'air',
    '1h',
    numpy.datetime64('2013-01-01T00:00:00.000000000'),
    numpy.datetime64('2014-01-01T00:00:00.000000000')),
   ('NOx',
    'air',
    '1h',
    numpy.datetime64('2014-01-01T00:00:00.000000000'),
    numpy.datetime64('2015-01-01T00:00:00.000000000'))]},
 {'name': 'Barcarrota',
  'components': [('NOx',
    'air',
    '1h',
    numpy.datetime64('2013-01-01T00:00:00.000000000'),
    numpy.datetime64('2014-01-01T00:00:00.000000000'))]}]