# VarnaAir - Open Programme

## Introduction ℹ️


## Libraries 📚
First, let's begin by importing the libraries and checking their versions.

In [1]:
import pandas
import requests

## Data provisioning 📦

In [2]:
url = "https://api.openaq.org/v3/locations"

headers = {
    "X-API-Key": "c1061fd64a9bd88562b4bb823b7900ccdfd263a9a7aa54614de834dac611a4ca"
}
params = {
    "coordinates": "43.224389,27.915733",  # center of Varna
    "radius": 5000,                        # 5 km radius
    "limit": 100,                         
}

response = requests.get(url, headers=headers, params=params)

data = response.json().get("results", [])
df_nearby_stations = pandas.DataFrame(data)
print(df_nearby_stations[["id", "name", "locality"]])

        id                         name              locality
0     8843  AMS SOU Angel Kanchev-Varna                 Варна
1  2162113           AMS Chaika - Varna  National air network


## Angel Kanchev-Varna

In [3]:
url = "https://api.openaq.org/v3/locations/8843/latest"
        
response = requests.get(url, headers=headers)
response.raise_for_status()
    
data = response.json()
results = data.get("results", [])
    
if results:
    df_latest1 = pandas.DataFrame(results)
        
    df_latest1 = df_latest1.assign(
        sensor_id=df_latest1['sensorsId'],
        utc_time=df_latest1['datetime'].apply(lambda x: x['utc']),
        local_time=df_latest1['datetime'].apply(lambda x: x['local']),
        latitude=df_latest1['coordinates'].apply(lambda x: x['latitude']),
        longitude=df_latest1['coordinates'].apply(lambda x: x['longitude'])
    ).drop(columns=['datetime', 'coordinates'])
else:
    df_latest1 = pandas.DataFrame()

df_latest1

Unnamed: 0,value,sensorsId,locationsId,sensor_id,utc_time,local_time,latitude,longitude
0,59.7,25777,8843,25777,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
1,10.75,25778,8843,25778,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
2,28.91,25776,8843,25776,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
3,360.0,25779,8843,25779,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
4,10.47,25774,8843,25774,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
5,7.42,25775,8843,25775,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.224389,27.915733
6,2.47,4272879,8843,4272879,2024-03-11T11:00:00Z,2024-03-11T13:00:00+02:00,43.224389,27.915733


In [4]:
base_url = "https://api.openaq.org/v3/sensors/{sensor_id}/measurements"

# sensor ids for Angel Kanchev
sensor_ids = [25775, 25778, 25776, 25779, 25774, 25777, 4272879]  

sensor_data_frames = []

for sensor_id in sensor_ids:
    url = base_url.format(sensor_id=sensor_id)
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    data = response.json()
    results = data.get("results", [])

    if results:
        df1 = pandas.DataFrame(results)

        df1 = df1.assign(
            sensor_id=sensor_id,
            datetime_from_utc=df1['period'].apply(lambda x: x['datetimeFrom']['utc'] if x else None),
            datetime_from_local=df1['period'].apply(lambda x: x['datetimeFrom']['local'] if x else None),
            datetime_to_utc=df1['period'].apply(lambda x: x['datetimeTo']['utc'] if x else None),
            datetime_to_local=df1['period'].apply(lambda x: x['datetimeTo']['local'] if x else None),
            parameter_name=df1['parameter'].apply(lambda x: x.get('name') if isinstance(x, dict) else None),
            parameter_units=df1['parameter'].apply(lambda x: x.get('units') if isinstance(x, dict) else None)
        ).drop(columns=['flagInfo', 'parameter', 'period', 'summary', 'coverage', 'coordinates'])

        sensor_data_frames.append(df1)

combined_df1 = pandas.concat(sensor_data_frames, ignore_index=True)
combined_df1

Unnamed: 0,value,sensor_id,datetime_from_utc,datetime_from_local,datetime_to_utc,datetime_to_local,parameter_name,parameter_units
0,18.38,25775,2020-04-20T17:00:00Z,2020-04-20T20:00:00+03:00,2020-04-20T18:00:00Z,2020-04-20T21:00:00+03:00,pm25,µg/m³
1,19.34,25775,2020-04-20T18:00:00Z,2020-04-20T21:00:00+03:00,2020-04-20T19:00:00Z,2020-04-20T22:00:00+03:00,pm25,µg/m³
2,18.33,25775,2020-04-20T19:00:00Z,2020-04-20T22:00:00+03:00,2020-04-20T20:00:00Z,2020-04-20T23:00:00+03:00,pm25,µg/m³
3,17.23,25775,2020-04-20T20:00:00Z,2020-04-20T23:00:00+03:00,2020-04-20T21:00:00Z,2020-04-21T00:00:00+03:00,pm25,µg/m³
4,19.66,25775,2020-04-20T21:00:00Z,2020-04-21T00:00:00+03:00,2020-04-20T22:00:00Z,2020-04-21T01:00:00+03:00,pm25,µg/m³
...,...,...,...,...,...,...,...,...
695,14.49,4272879,2023-04-02T16:00:00Z,2023-04-02T19:00:00+03:00,2023-04-02T17:00:00Z,2023-04-02T20:00:00+03:00,no,µg/m³
696,1.68,4272879,2023-04-02T17:00:00Z,2023-04-02T20:00:00+03:00,2023-04-02T18:00:00Z,2023-04-02T21:00:00+03:00,no,µg/m³
697,1.76,4272879,2023-04-02T18:00:00Z,2023-04-02T21:00:00+03:00,2023-04-02T19:00:00Z,2023-04-02T22:00:00+03:00,no,µg/m³
698,2.85,4272879,2023-04-02T19:00:00Z,2023-04-02T22:00:00+03:00,2023-04-02T20:00:00Z,2023-04-02T23:00:00+03:00,no,µg/m³


In [5]:
pivot_df1 = combined_df1.pivot_table(
    index='datetime_from_local',
    columns='parameter_name',
    values='value'
).reset_index()

pivot_df1

parameter_name,datetime_from_local,co,no,no2,o3,pm10,pm25,so2
0,2020-04-20T20:00:00+03:00,400.0,,23.33,56.93,24.27,18.38,20.16
1,2020-04-20T21:00:00+03:00,530.0,,51.60,25.44,24.27,19.34,19.85
2,2020-04-20T22:00:00+03:00,560.0,,44.01,23.04,23.85,18.33,20.77
3,2020-04-20T23:00:00+03:00,500.0,,27.78,30.51,21.48,17.23,20.10
4,2020-04-21T00:00:00+03:00,480.0,,25.94,25.95,21.46,19.66,19.95
...,...,...,...,...,...,...,...,...
197,2023-04-02T19:00:00+03:00,,14.49,,,,,
198,2023-04-02T20:00:00+03:00,,1.68,,,,,
199,2023-04-02T21:00:00+03:00,,1.76,,,,,
200,2023-04-02T22:00:00+03:00,,2.85,,,,,


## AMS Chaika - Varna

In [6]:
url = "https://api.openaq.org/v3/locations/2162113/latest"
        
response = requests.get(url, headers=headers)
response.raise_for_status()
    
data = response.json()
results = data.get("results", [])
    
if results:
    df_latest2 = pandas.DataFrame(results)
        
    df_latest2 = df_latest2.assign(
        sensor_id=df_latest2['sensorsId'],
        utc_time=df_latest2['datetime'].apply(lambda x: x['utc']),
        local_time=df_latest2['datetime'].apply(lambda x: x['local']),
        latitude=df_latest2['coordinates'].apply(lambda x: x['latitude']),
        longitude=df_latest2['coordinates'].apply(lambda x: x['longitude'])
    ).drop(columns=['datetime', 'coordinates'])
else:
    df_latest2 = pandas.DataFrame()

df_latest2

Unnamed: 0,value,sensorsId,locationsId,sensor_id,utc_time,local_time,latitude,longitude
0,2.46,7774820,2162113,7774820,2024-03-11T11:00:00Z,2024-03-11T13:00:00+02:00,43.21728,27.93596
1,-1.0,7774916,2162113,7774916,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.21728,27.93596
2,470.0,7775044,2162113,7775044,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.21728,27.93596
3,7.03,7775045,2162113,7775045,2025-06-18T07:00:00Z,2025-06-18T10:00:00+03:00,43.21728,27.93596
4,-1.0,8613114,2162113,8613114,2024-06-06T06:00:00Z,2024-06-06T09:00:00+03:00,43.21728,27.93596


In [7]:
base_url = "https://api.openaq.org/v3/sensors/{sensor_id}/measurements"

# sensor ids for Chaika
sensor_ids = [7774820, 7774916, 7775044, 7775045, 8613114]  

sensor_data_frames = []

for sensor_id in sensor_ids:
    url = base_url.format(sensor_id=sensor_id)
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    data = response.json()
    results = data.get("results", [])

    if results:
        df2 = pandas.DataFrame(results)

        df2 = df2.assign(
            sensor_id=sensor_id,
            datetime_from_utc=df2['period'].apply(lambda x: x['datetimeFrom']['utc'] if x else None),
            datetime_from_local=df2['period'].apply(lambda x: x['datetimeFrom']['local'] if x else None),
            datetime_to_utc=df2['period'].apply(lambda x: x['datetimeTo']['utc'] if x else None),
            datetime_to_local=df2['period'].apply(lambda x: x['datetimeTo']['local'] if x else None),
            parameter_name=df2['parameter'].apply(lambda x: x.get('name') if isinstance(x, dict) else None),
            parameter_units=df2['parameter'].apply(lambda x: x.get('units') if isinstance(x, dict) else None)
        ).drop(columns=['flagInfo', 'parameter', 'period', 'summary', 'coverage', 'coordinates'])

        sensor_data_frames.append(df2)

combined_df2 = pandas.concat(sensor_data_frames, ignore_index=True)
combined_df2

Unnamed: 0,value,sensor_id,datetime_from_utc,datetime_from_local,datetime_to_utc,datetime_to_local,parameter_name,parameter_units
0,1.51,7774820,2024-01-28T22:00:00Z,2024-01-29T00:00:00+02:00,2024-01-28T23:00:00Z,2024-01-29T01:00:00+02:00,no,µg/m³
1,1.65,7774820,2024-01-28T23:00:00Z,2024-01-29T01:00:00+02:00,2024-01-29T00:00:00Z,2024-01-29T02:00:00+02:00,no,µg/m³
2,1.54,7774820,2024-01-29T00:00:00Z,2024-01-29T02:00:00+02:00,2024-01-29T01:00:00Z,2024-01-29T03:00:00+02:00,no,µg/m³
3,1.62,7774820,2024-01-29T01:00:00Z,2024-01-29T03:00:00+02:00,2024-01-29T02:00:00Z,2024-01-29T04:00:00+02:00,no,µg/m³
4,1.66,7774820,2024-01-29T02:00:00Z,2024-01-29T04:00:00+02:00,2024-01-29T03:00:00Z,2024-01-29T05:00:00+02:00,no,µg/m³
...,...,...,...,...,...,...,...,...
495,-1.00,8613114,2024-03-29T19:00:00Z,2024-03-29T21:00:00+02:00,2024-03-29T20:00:00Z,2024-03-29T22:00:00+02:00,pm10,µg/m³
496,-1.00,8613114,2024-03-29T20:00:00Z,2024-03-29T22:00:00+02:00,2024-03-29T21:00:00Z,2024-03-29T23:00:00+02:00,pm10,µg/m³
497,-1.00,8613114,2024-03-29T21:00:00Z,2024-03-29T23:00:00+02:00,2024-03-29T22:00:00Z,2024-03-30T00:00:00+02:00,pm10,µg/m³
498,-1.00,8613114,2024-03-29T22:00:00Z,2024-03-30T00:00:00+02:00,2024-03-29T23:00:00Z,2024-03-30T01:00:00+02:00,pm10,µg/m³


In [8]:
pivot_df2 = combined_df2.pivot_table(
    index='datetime_from_local',
    columns='parameter_name',
    values='value'
).reset_index()

pivot_df2

parameter_name,datetime_from_local,co,no,no2,pm10,so2
0,2024-01-29T00:00:00+02:00,320.0,1.51,14.48,,8.58
1,2024-01-29T01:00:00+02:00,320.0,1.65,14.48,,8.46
2,2024-01-29T02:00:00+02:00,310.0,1.54,14.02,,8.53
3,2024-01-29T03:00:00+02:00,310.0,1.62,14.40,,8.35
4,2024-01-29T04:00:00+02:00,310.0,1.66,14.19,,8.13
...,...,...,...,...,...,...
195,2024-03-29T21:00:00+02:00,,,,-1.0,
196,2024-03-29T22:00:00+02:00,,,,-1.0,
197,2024-03-29T23:00:00+02:00,,,,-1.0,
198,2024-03-30T00:00:00+02:00,,,,-1.0,


In [9]:
combined_df = pandas.concat([pivot_df1, pivot_df2], ignore_index=True)

In [10]:
combined_df.shape

(402, 8)