In [1]:
import pandas as pd

In [2]:
#load air quality data extracted via API
airquality_data = pd.read_csv("./AQ_APIdata_localtime.csv")

In [3]:
display(airquality_data)

Unnamed: 0.1,Unnamed: 0,pm10,pm2_5,nitrogen_dioxide,sulphur_dioxide,ozone,localtime
0,0,85.9,76.6,38.5,4.5,2.0,2019-12-31 01:00:00
1,1,84.5,76.9,35.2,4.9,4.0,2019-12-31 02:00:00
2,2,83.0,76.4,35.1,5.1,5.0,2019-12-31 03:00:00
3,3,80.7,74.9,33.5,4.8,2.0,2019-12-31 04:00:00
4,4,77.6,73.1,29.6,3.9,3.0,2019-12-31 05:00:00
...,...,...,...,...,...,...,...
35082,35082,35.9,33.6,26.4,1.8,7.0,2023-12-31 19:00:00
35083,35083,38.1,31.8,27.2,2.0,2.0,2023-12-31 20:00:00
35084,35084,32.1,29.4,31.0,2.0,1.0,2023-12-31 21:00:00
35085,35085,34.6,30.6,32.2,2.2,1.0,2023-12-31 22:00:00


In [4]:
#To aggregate, create new columns
airquality_data['localtime'] = pd.to_datetime(airquality_data['localtime'])
airquality_data['date'] = airquality_data['localtime'].dt.date
airquality_data['time'] = airquality_data['localtime'].dt.time

In [5]:
display(airquality_data)

Unnamed: 0.1,Unnamed: 0,pm10,pm2_5,nitrogen_dioxide,sulphur_dioxide,ozone,localtime,date,time
0,0,85.9,76.6,38.5,4.5,2.0,2019-12-31 01:00:00,2019-12-31,01:00:00
1,1,84.5,76.9,35.2,4.9,4.0,2019-12-31 02:00:00,2019-12-31,02:00:00
2,2,83.0,76.4,35.1,5.1,5.0,2019-12-31 03:00:00,2019-12-31,03:00:00
3,3,80.7,74.9,33.5,4.8,2.0,2019-12-31 04:00:00,2019-12-31,04:00:00
4,4,77.6,73.1,29.6,3.9,3.0,2019-12-31 05:00:00,2019-12-31,05:00:00
...,...,...,...,...,...,...,...,...,...
35082,35082,35.9,33.6,26.4,1.8,7.0,2023-12-31 19:00:00,2023-12-31,19:00:00
35083,35083,38.1,31.8,27.2,2.0,2.0,2023-12-31 20:00:00,2023-12-31,20:00:00
35084,35084,32.1,29.4,31.0,2.0,1.0,2023-12-31 21:00:00,2023-12-31,21:00:00
35085,35085,34.6,30.6,32.2,2.2,1.0,2023-12-31 22:00:00,2023-12-31,22:00:00


In [6]:
#drop unused column
airquality_data.drop(columns=["Unnamed: 0"], inplace=True)

In [7]:
#now we want to increase air quality granularity to daily by aggregating, goal is to integrate with weather data

In [8]:
def daily_pollutant_extremes(df,pollutant):
    """
    Find the hours at which the maximum and minimum pollutant values occurred for each day.

    Parameters:
        df (pd.DataFrame): A pandas DataFrame with the following columns:
            - 'datetime': A datetime object representing the date and time.
            - pollutant: A float representing the pollutant value.

    Returns:
        result: A DataFrame where each row corresponds to a date and contains:
                      - 'date' : the date in YYYY-MM-DD format
                      - 'max_time': The hour at which the maximum pollutant value occurred.
                      - 'min_time': The hour at which the minimum pollutant value occurred.
    """
    # Find max and min pollutant values indexes
    maxs = df.loc[df.groupby('date')[pollutant].idxmax()].reset_index()
    mins = df.loc[df.groupby('date')[pollutant].idxmin()].reset_index()

    # Rename for clarity
    maxs = maxs.rename({'time':'max_time_'+pollutant},axis=1)
    mins = mins.rename({'time':'min_time_'+pollutant},axis=1)
    #maxs and mins return with all columns plus new one (max/min_time), but we only need those two and date column, so drop others
    #only keep the date and max/min_time columns
    maxs = maxs[['date','max_time_'+pollutant]]
    mins = mins[['date','min_time_'+pollutant]]

    # Merge maxs and mins to retunr single object
    result = pd.merge(maxs,mins,on='date')

    return result

In [9]:
def daily_pollutant_values(data,pollutant):
    """
    Calculate average, maximum, and minimum pollutant values for each day, for a given pollutant.

    Parameters:
        df (pd.DataFrame): A pandas DataFrame with the following columns:
            - 'localtime': A datetime object representing the date and time.
            - pollutant: A float representing the pollutant value.

    Returns:
        pd.DataFrame: A DataFrame where each row corresponds to a date and contains
                      the 'average', 'max', and 'min' temperatures for that day.
    """
    # Group by day and calculate the mean, max, and min
    values = data.groupby('date')[pollutant].agg(['mean', 'max', 'min']).reset_index()
    values.rename(columns={'localtime': 'date', 'mean': 'average_daily_'+pollutant, 'max': 'max_daily_'+pollutant, 'min': 'min_daily_'+pollutant}, inplace=True)

    return values

In [10]:
display(airquality_data)

Unnamed: 0,pm10,pm2_5,nitrogen_dioxide,sulphur_dioxide,ozone,localtime,date,time
0,85.9,76.6,38.5,4.5,2.0,2019-12-31 01:00:00,2019-12-31,01:00:00
1,84.5,76.9,35.2,4.9,4.0,2019-12-31 02:00:00,2019-12-31,02:00:00
2,83.0,76.4,35.1,5.1,5.0,2019-12-31 03:00:00,2019-12-31,03:00:00
3,80.7,74.9,33.5,4.8,2.0,2019-12-31 04:00:00,2019-12-31,04:00:00
4,77.6,73.1,29.6,3.9,3.0,2019-12-31 05:00:00,2019-12-31,05:00:00
...,...,...,...,...,...,...,...,...
35082,35.9,33.6,26.4,1.8,7.0,2023-12-31 19:00:00,2023-12-31,19:00:00
35083,38.1,31.8,27.2,2.0,2.0,2023-12-31 20:00:00,2023-12-31,20:00:00
35084,32.1,29.4,31.0,2.0,1.0,2023-12-31 21:00:00,2023-12-31,21:00:00
35085,34.6,30.6,32.2,2.2,1.0,2023-12-31 22:00:00,2023-12-31,22:00:00


In [11]:
pollutants = ['nitrogen_dioxide','ozone',"sulphur_dioxide",'pm2_5','pm10']

In the following cell, we get the new daily values for nitrogen_oxide and merge them on date with the new daily values for the other pollutants

The result is a daily dataset with aggregated values for each pollutant

In [12]:
daily_aq = pd.merge(daily_pollutant_values(airquality_data,'nitrogen_dioxide'), daily_pollutant_extremes(airquality_data,'nitrogen_dioxide'),on='date')
for pollutant in pollutants[1:]:
    values = daily_pollutant_values(airquality_data,pollutant)
    extremes = daily_pollutant_extremes(airquality_data,pollutant)
    add = pd.merge(values,extremes,on='date')
    daily_aq = pd.merge(daily_aq, add, on='date')

In [13]:
display(daily_aq)

Unnamed: 0,date,average_daily_nitrogen_dioxide,max_daily_nitrogen_dioxide,min_daily_nitrogen_dioxide,max_time_nitrogen_dioxide,min_time_nitrogen_dioxide,average_daily_ozone,max_daily_ozone,min_daily_ozone,max_time_ozone,...,average_daily_pm2_5,max_daily_pm2_5,min_daily_pm2_5,max_time_pm2_5,min_time_pm2_5,average_daily_pm10,max_daily_pm10,min_daily_pm10,max_time_pm10,min_time_pm10
0,2019-12-31,45.673913,65.1,29.6,18:00:00,05:00:00,6.565217,17.0,2.0,15:00:00,...,71.995652,82.3,59.3,20:00:00,16:00:00,79.034783,85.9,70.4,01:00:00,15:00:00
1,2020-01-01,50.612500,69.3,35.9,19:00:00,07:00:00,5.458333,25.0,0.0,15:00:00,...,44.391667,69.6,30.0,02:00:00,16:00:00,54.550000,76.3,38.7,22:00:00,06:00:00
2,2020-01-02,56.670833,69.5,45.0,22:00:00,12:00:00,3.875000,16.0,0.0,14:00:00,...,45.808333,57.8,34.0,00:00:00,15:00:00,59.916667,73.5,45.9,00:00:00,06:00:00
3,2020-01-03,59.312500,80.0,42.5,20:00:00,07:00:00,1.958333,7.0,0.0,14:00:00,...,49.833333,61.6,39.9,23:00:00,15:00:00,57.645833,66.5,43.1,19:00:00,05:00:00
4,2020-01-04,49.558333,66.7,40.4,18:00:00,06:00:00,4.500000,15.0,0.0,15:00:00,...,26.741667,62.6,10.3,00:00:00,23:00:00,30.941667,66.1,11.8,00:00:00,23:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1457,2023-12-27,38.341667,49.3,25.9,20:00:00,13:00:00,3.583333,17.0,0.0,13:00:00,...,36.820833,47.6,23.3,23:00:00,13:00:00,45.125000,55.3,29.2,21:00:00,12:00:00
1458,2023-12-28,38.650000,46.8,30.5,16:00:00,07:00:00,0.958333,4.0,0.0,11:00:00,...,39.220833,52.8,32.3,01:00:00,18:00:00,46.720833,58.3,42.2,02:00:00,15:00:00
1459,2023-12-29,47.962500,56.5,38.9,21:00:00,06:00:00,0.833333,3.0,0.0,14:00:00,...,40.816667,45.1,36.3,23:00:00,05:00:00,44.525000,50.6,38.8,00:00:00,05:00:00
1460,2023-12-30,39.741667,55.1,32.0,20:00:00,12:00:00,2.708333,13.0,0.0,13:00:00,...,42.775000,57.5,34.0,02:00:00,18:00:00,45.391667,50.2,40.2,23:00:00,17:00:00


In [14]:
daily_aq.dtypes

date                               object
average_daily_nitrogen_dioxide    float64
max_daily_nitrogen_dioxide        float64
min_daily_nitrogen_dioxide        float64
max_time_nitrogen_dioxide          object
min_time_nitrogen_dioxide          object
average_daily_ozone               float64
max_daily_ozone                   float64
min_daily_ozone                   float64
max_time_ozone                     object
min_time_ozone                     object
average_daily_sulphur_dioxide     float64
max_daily_sulphur_dioxide         float64
min_daily_sulphur_dioxide         float64
max_time_sulphur_dioxide           object
min_time_sulphur_dioxide           object
average_daily_pm2_5               float64
max_daily_pm2_5                   float64
min_daily_pm2_5                   float64
max_time_pm2_5                     object
min_time_pm2_5                     object
average_daily_pm10                float64
max_daily_pm10                    float64
min_daily_pm10                    

 Now we want to compute the EAQI according to the EEA definition

note:to compute the EAQI we need pollutant values in a particular moment, but for pm10 and pm2_5 we need to use a 24hr rolling avg

In [15]:
airquality_data['pm2_5_24hr_avg'] = airquality_data['pm2_5'].rolling(window=24).mean()
airquality_data['pm10_24hr_avg'] = airquality_data['pm10'].rolling(window=24).mean()
display(airquality_data)

Unnamed: 0,pm10,pm2_5,nitrogen_dioxide,sulphur_dioxide,ozone,localtime,date,time,pm2_5_24hr_avg,pm10_24hr_avg
0,85.9,76.6,38.5,4.5,2.0,2019-12-31 01:00:00,2019-12-31,01:00:00,,
1,84.5,76.9,35.2,4.9,4.0,2019-12-31 02:00:00,2019-12-31,02:00:00,,
2,83.0,76.4,35.1,5.1,5.0,2019-12-31 03:00:00,2019-12-31,03:00:00,,
3,80.7,74.9,33.5,4.8,2.0,2019-12-31 04:00:00,2019-12-31,04:00:00,,
4,77.6,73.1,29.6,3.9,3.0,2019-12-31 05:00:00,2019-12-31,05:00:00,,
...,...,...,...,...,...,...,...,...,...,...
35082,35.9,33.6,26.4,1.8,7.0,2023-12-31 19:00:00,2023-12-31,19:00:00,38.879167,44.200000
35083,38.1,31.8,27.2,2.0,2.0,2023-12-31 20:00:00,2023-12-31,20:00:00,38.533333,43.954167
35084,32.1,29.4,31.0,2.0,1.0,2023-12-31 21:00:00,2023-12-31,21:00:00,38.004167,43.350000
35085,34.6,30.6,32.2,2.2,1.0,2023-12-31 22:00:00,2023-12-31,22:00:00,37.462500,42.916667


In [None]:
#The first 23 hours(Dec 31st 2019 contain null rolling avgs, but we don't need them)
airquality_data = airquality_data.dropna()
display(airquality_data)

Unnamed: 0,pm10,pm2_5,nitrogen_dioxide,sulphur_dioxide,ozone,localtime,date,time,pm2_5_24hr_avg,pm10_24hr_avg
23,71.4,69.3,54.2,6.6,5.0,2020-01-01 00:00:00,2020-01-01,00:00:00,71.883333,78.716667
24,72.7,58.9,50.8,5.9,0.0,2020-01-01 01:00:00,2020-01-01,01:00:00,71.145833,78.166667
25,71.2,69.6,50.4,4.6,0.0,2020-01-01 02:00:00,2020-01-01,02:00:00,70.841667,77.612500
26,56.4,52.0,47.1,5.5,1.0,2020-01-01 03:00:00,2020-01-01,03:00:00,69.825000,76.504167
27,51.8,48.3,41.3,5.8,0.0,2020-01-01 04:00:00,2020-01-01,04:00:00,68.716667,75.300000
...,...,...,...,...,...,...,...,...,...,...
35082,35.9,33.6,26.4,1.8,7.0,2023-12-31 19:00:00,2023-12-31,19:00:00,38.879167,44.200000
35083,38.1,31.8,27.2,2.0,2.0,2023-12-31 20:00:00,2023-12-31,20:00:00,38.533333,43.954167
35084,32.1,29.4,31.0,2.0,1.0,2023-12-31 21:00:00,2023-12-31,21:00:00,38.004167,43.350000
35085,34.6,30.6,32.2,2.2,1.0,2023-12-31 22:00:00,2023-12-31,22:00:00,37.462500,42.916667


Explanation of our "Daily EAQI" metric:
- Goal is to compare daily pollutant values with level thresholds
- For each pollutant(except pm2.5 and pm10), the daily average is used
- For pm2.5 and pm10 the average of the 24hr rollings averages is used(we call this variable avgdRolls)

In [17]:
#Now we can express daily PM averages in a relevant way for computing the daily EAQI
# Group 24hr averages by day and compute the mean
daily_aq = daily_aq.merge(airquality_data.groupby('date')['pm2_5_24hr_avg'].mean(),on='date')
daily_aq = daily_aq.merge(airquality_data.groupby('date')['pm10_24hr_avg'].mean(),on='date')
#Rename for clarity
daily_aq.rename(columns={'pm2_5_24hr_avg':'pm2_5_avgdRolls','pm10_24hr_avg':'pm10_avgdRolls'}, inplace=True)
daily_aq

Unnamed: 0,date,average_daily_nitrogen_dioxide,max_daily_nitrogen_dioxide,min_daily_nitrogen_dioxide,max_time_nitrogen_dioxide,min_time_nitrogen_dioxide,average_daily_ozone,max_daily_ozone,min_daily_ozone,max_time_ozone,...,min_daily_pm2_5,max_time_pm2_5,min_time_pm2_5,average_daily_pm10,max_daily_pm10,min_daily_pm10,max_time_pm10,min_time_pm10,pm2_5_avgdRolls,pm10_avgdRolls
0,2020-01-01,50.612500,69.3,35.9,19:00:00,07:00:00,5.458333,25.0,0.0,15:00:00,...,30.0,02:00:00,16:00:00,54.550000,76.3,38.7,22:00:00,06:00:00,57.906944,65.789931
1,2020-01-02,56.670833,69.5,45.0,22:00:00,12:00:00,3.875000,16.0,0.0,14:00:00,...,34.0,00:00:00,15:00:00,59.916667,73.5,45.9,00:00:00,06:00:00,45.204687,57.765972
2,2020-01-03,59.312500,80.0,42.5,20:00:00,07:00:00,1.958333,7.0,0.0,14:00:00,...,39.9,23:00:00,15:00:00,57.645833,66.5,43.1,19:00:00,05:00:00,47.180903,58.237326
3,2020-01-04,49.558333,66.7,40.4,18:00:00,06:00:00,4.500000,15.0,0.0,15:00:00,...,10.3,00:00:00,23:00:00,30.941667,66.1,11.8,00:00:00,23:00:00,41.885417,48.472917
4,2020-01-05,54.425000,83.8,38.5,21:00:00,03:00:00,8.791667,34.0,0.0,15:00:00,...,4.6,22:00:00,04:00:00,24.233333,51.3,4.9,21:00:00,04:00:00,15.741493,19.141493
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2023-12-27,38.341667,49.3,25.9,20:00:00,13:00:00,3.583333,17.0,0.0,13:00:00,...,23.3,23:00:00,13:00:00,45.125000,55.3,29.2,21:00:00,12:00:00,36.530903,42.035937
1457,2023-12-28,38.650000,46.8,30.5,16:00:00,07:00:00,0.958333,4.0,0.0,11:00:00,...,32.3,01:00:00,18:00:00,46.720833,58.3,42.2,02:00:00,15:00:00,38.868229,46.771701
1458,2023-12-29,47.962500,56.5,38.9,21:00:00,06:00:00,0.833333,3.0,0.0,14:00:00,...,36.3,23:00:00,05:00:00,44.525000,50.6,38.8,00:00:00,05:00:00,38.611979,44.421181
1459,2023-12-30,39.741667,55.1,32.0,20:00:00,12:00:00,2.708333,13.0,0.0,13:00:00,...,34.0,02:00:00,18:00:00,45.391667,50.2,40.2,23:00:00,17:00:00,43.038542,45.834028


In [18]:
# Thresholds for pollutants based on European Environment Agency table
THRESHOLDS = {
    "pm2_5": [(0, 10), (10, 20), (20, 25), (25, 50), (50, 75), (75, 800)],
    "pm10": [(0, 20), (20, 40), (40, 50), (50, 100), (100, 150), (150, 1200)],
    "nitrogen_dioxide": [(0, 40), (40, 90), (90, 120), (120, 230), (230, 340), (340, 1000)],
    "ozone": [(0, 50), (50, 100), (100, 130), (130, 240), (240, 380), (380, 800)],
    "sulphur_dioxide": [(0, 100), (100, 200), (200, 350), (350, 500), (500, 750), (750, 1250)],
}

In [19]:
# Function to compute AQI for a single pollutant
def get_aqi(value, thresholds):
    for i, (low, high) in enumerate(thresholds, start=1):
        if low <= value < high:
            return i
    return len(thresholds) + 1  # If value exceeds the highest threshold, assign the highest AQI level, yet no instances of this in data

# Function to compute the overall AQI for a row
def compute_aqi(row):    
    # Compute AQI for each pollutant
    aqi_values = {
        "pm2_5": get_aqi(row['pm2_5_avgdRolls'], THRESHOLDS["pm2_5"]),
        "pm10": get_aqi(row['pm10_avgdRolls'], THRESHOLDS["pm10"]),
        "no2": get_aqi(row["average_daily_nitrogen_dioxide"], THRESHOLDS["nitrogen_dioxide"]),
        "o3": get_aqi(row["average_daily_ozone"], THRESHOLDS["ozone"]),
        "so2": get_aqi(row["average_daily_sulphur_dioxide"], THRESHOLDS["sulphur_dioxide"]),
    }
    
    # Overall AQI is the maximum AQI value among all pollutants
    return max(aqi_values.values())

In [20]:
pollutants = ['nitrogen_dioxide','ozone',"sulphur_dioxide",'pm2_5','pm10']

In the next cell:
- We compute the overall daily EAQI by considering all averages and avgdRolls, the value chosen is the highest threshold level among pollutant levels
- For every pollutant, we compute its specific daily EAQI level(using avgs or avgdRolls), to get an idea of how severe it individually was on a given day
- Overall daily EAQI will be in new column "eaqi", while single pollutant EAQIs will be in "pollutant"_eaqi columns 

In [21]:
#get a daily EAQI considering all pollutants
daily_aq['eaqi'] = daily_aq.apply(lambda row: compute_aqi(row), axis=1) 

for pollutant in pollutants[:-2]: #excluding rolled pollutants, get EAQI levels for singular pollutants
    daily_aq[pollutant+'_eaqi'] = daily_aq.apply(lambda row: get_aqi(row['average_daily_'+pollutant], THRESHOLDS[pollutant]), axis=1)
    
#Now consider the daily average of rolling averages, to compute the EAQI level for pm10 and pm2.5 pollutants
daily_aq['pm2_5'+'_eaqi'] = daily_aq.apply(lambda row: get_aqi(row['pm2_5_avgdRolls'], THRESHOLDS['pm2_5']), axis=1)
daily_aq['pm10'+'_eaqi'] = daily_aq.apply(lambda row: get_aqi(row['pm10_avgdRolls'], THRESHOLDS['pm10']), axis=1)
daily_aq.columns

Index(['date', 'average_daily_nitrogen_dioxide', 'max_daily_nitrogen_dioxide',
       'min_daily_nitrogen_dioxide', 'max_time_nitrogen_dioxide',
       'min_time_nitrogen_dioxide', 'average_daily_ozone', 'max_daily_ozone',
       'min_daily_ozone', 'max_time_ozone', 'min_time_ozone',
       'average_daily_sulphur_dioxide', 'max_daily_sulphur_dioxide',
       'min_daily_sulphur_dioxide', 'max_time_sulphur_dioxide',
       'min_time_sulphur_dioxide', 'average_daily_pm2_5', 'max_daily_pm2_5',
       'min_daily_pm2_5', 'max_time_pm2_5', 'min_time_pm2_5',
       'average_daily_pm10', 'max_daily_pm10', 'min_daily_pm10',
       'max_time_pm10', 'min_time_pm10', 'pm2_5_avgdRolls', 'pm10_avgdRolls',
       'eaqi', 'nitrogen_dioxide_eaqi', 'ozone_eaqi', 'sulphur_dioxide_eaqi',
       'pm2_5_eaqi', 'pm10_eaqi'],
      dtype='object')

In [22]:
daily_aq

Unnamed: 0,date,average_daily_nitrogen_dioxide,max_daily_nitrogen_dioxide,min_daily_nitrogen_dioxide,max_time_nitrogen_dioxide,min_time_nitrogen_dioxide,average_daily_ozone,max_daily_ozone,min_daily_ozone,max_time_ozone,...,max_time_pm10,min_time_pm10,pm2_5_avgdRolls,pm10_avgdRolls,eaqi,nitrogen_dioxide_eaqi,ozone_eaqi,sulphur_dioxide_eaqi,pm2_5_eaqi,pm10_eaqi
0,2020-01-01,50.612500,69.3,35.9,19:00:00,07:00:00,5.458333,25.0,0.0,15:00:00,...,22:00:00,06:00:00,57.906944,65.789931,5,2,1,1,5,4
1,2020-01-02,56.670833,69.5,45.0,22:00:00,12:00:00,3.875000,16.0,0.0,14:00:00,...,00:00:00,06:00:00,45.204687,57.765972,4,2,1,1,4,4
2,2020-01-03,59.312500,80.0,42.5,20:00:00,07:00:00,1.958333,7.0,0.0,14:00:00,...,19:00:00,05:00:00,47.180903,58.237326,4,2,1,1,4,4
3,2020-01-04,49.558333,66.7,40.4,18:00:00,06:00:00,4.500000,15.0,0.0,15:00:00,...,00:00:00,23:00:00,41.885417,48.472917,4,2,1,1,4,3
4,2020-01-05,54.425000,83.8,38.5,21:00:00,03:00:00,8.791667,34.0,0.0,15:00:00,...,21:00:00,04:00:00,15.741493,19.141493,2,2,1,1,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2023-12-27,38.341667,49.3,25.9,20:00:00,13:00:00,3.583333,17.0,0.0,13:00:00,...,21:00:00,12:00:00,36.530903,42.035937,4,1,1,1,4,3
1457,2023-12-28,38.650000,46.8,30.5,16:00:00,07:00:00,0.958333,4.0,0.0,11:00:00,...,02:00:00,15:00:00,38.868229,46.771701,4,1,1,1,4,3
1458,2023-12-29,47.962500,56.5,38.9,21:00:00,06:00:00,0.833333,3.0,0.0,14:00:00,...,00:00:00,05:00:00,38.611979,44.421181,4,2,1,1,4,3
1459,2023-12-30,39.741667,55.1,32.0,20:00:00,12:00:00,2.708333,13.0,0.0,13:00:00,...,23:00:00,17:00:00,43.038542,45.834028,4,1,1,1,4,3


In [23]:
daily_aq.columns

Index(['date', 'average_daily_nitrogen_dioxide', 'max_daily_nitrogen_dioxide',
       'min_daily_nitrogen_dioxide', 'max_time_nitrogen_dioxide',
       'min_time_nitrogen_dioxide', 'average_daily_ozone', 'max_daily_ozone',
       'min_daily_ozone', 'max_time_ozone', 'min_time_ozone',
       'average_daily_sulphur_dioxide', 'max_daily_sulphur_dioxide',
       'min_daily_sulphur_dioxide', 'max_time_sulphur_dioxide',
       'min_time_sulphur_dioxide', 'average_daily_pm2_5', 'max_daily_pm2_5',
       'min_daily_pm2_5', 'max_time_pm2_5', 'min_time_pm2_5',
       'average_daily_pm10', 'max_daily_pm10', 'min_daily_pm10',
       'max_time_pm10', 'min_time_pm10', 'pm2_5_avgdRolls', 'pm10_avgdRolls',
       'eaqi', 'nitrogen_dioxide_eaqi', 'ozone_eaqi', 'sulphur_dioxide_eaqi',
       'pm2_5_eaqi', 'pm10_eaqi'],
      dtype='object')

In [24]:
#daily_aq.to_csv("./dailyAQ_APIdata.csv")