This notebook is being used to determine the best approach to calculating AQI.  The methods developed here will be used in the pipeline.

In [251]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3  
from aqipy import aqi_us
from aqipy import aqi_cn
from aqipy import aqi_in
from aqipy import aqi_au
from aqipy import caqi_eu
from aqipy import cai_kr
from aqipy import daqi_uk
from aqipy import aqhi_hk
from aqipy import aqhi_ca
from aqipy import psi_sg

In [252]:
# Load data
import os

# connect to the db and load the data

conn = sqlite3.connect('./data/bkk_aqi.db')
c = conn.cursor()
aqi_data = pd.read_sql_query("SELECT * FROM aqi_data", conn)
aqi_data['time_iso'] = pd.to_datetime(aqi_data['time_iso'])
aqi_data = aqi_data.set_index('time_iso')

print(aqi_data.head())

                           id  aqi   co     h  no2    o3  pressure  pm10  \
time_iso                                                                   
2025-02-13 20:00:00+07:00   1  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   2  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   3  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   4  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   5  138  0.1  48.0  2.4  19.4    1010.0    58   

                           pm25  so2  temperature  wind  city_geo_lat  \
time_iso                                                                
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   1

In [253]:
# Pull the most recent 1 hours of data
recent_data_1h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=1))]

#calculate the mean of the most recent 1 hour of data for o3, no2, and so2
mean_o3_1h = recent_data_1h['o3'].mean()
mean_no2_1h = recent_data_1h['no2'].mean()
mean_so2_1h = recent_data_1h['so2'].mean()

# Calculate the max of the most recent 1 hour of data for o3, no2, and so2
max_o3_1h = recent_data_1h['o3'].max()
max_no2_1h = recent_data_1h['no2'].max()
max_so2_1h = recent_data_1h['so2'].max()

# Pull the most recent 3 hours of data
recent_data_3h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=3))]

# Calculate the mean of the recent 3 hours of data for pm10, pm25, o3, so2, no2 and co
mean_pm10_3h = recent_data_3h['pm10'].mean()
mean_pm25_3h = recent_data_3h['pm25'].mean()
mean_o3_3h = recent_data_3h['o3'].mean()
mean_so2_3h = recent_data_3h['so2'].mean()
mean_no2_3h = recent_data_3h['no2'].mean()
mean_co_3h = recent_data_3h['co'].mean()

# Pull the most recent 4 hours of data
recent_data_4h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=4))]

# Calculate the mean of the recent 4 hours of data for o3
mean_o3_4h = recent_data_4h['o3'].mean()

# Pull the most recent 8 hours of data
recent_data_8h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=8))]

# Calculate the mean of the recent 8 hours of data for o3 and co
mean_o3_8h = recent_data_8h['o3'].mean()
mean_co_8h = recent_data_8h['co'].mean()

# Pull the most recent 24 hours of data
recent_data_24h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=24))]

# Calculate the mean of the recent 24 hours of data for pm10, pm25, and so2
mean_pm10_24h = recent_data_24h['pm10'].mean()
mean_pm25_24h = recent_data_24h['pm25'].mean()
mean_so2_24h = recent_data_24h['so2'].mean()

In [254]:
# Get PSI using the sg calculations 
aqi_sg, aqi_data_sg = psi_sg.get_aqi(
    o3_8h=mean_o3_8h, 
    co_8h=mean_co_8h, 
    pm10_24h=mean_pm10_24h, 
    pm25_24h=mean_pm25_24h, 
    so2_24h=mean_so2_24h, 
    no2_1h=mean_no2_1h
)

print('Singapore PSI:', aqi_sg)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_sg, key=lambda k: aqi_data_sg[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum AQI:', max_pollutant)
print('Singapore PSI General Message:', aqi_data_sg[max_pollutant][1])
print('Singapore PSI Risk Message:', aqi_data_sg[max_pollutant][2])

Singapore PSI: 500
Pollutant with maximum AQI: o3_8h
Singapore PSI General Message: Healthy people may experience adverse symptoms that affect normal activity.
Singapore PSI Risk Message: PSI levels above 400 may be life-threatening to ill and elderly persons


In [255]:
# Get AQI using the us calculations 
aqi_us, aqi_data_us = aqi_us.get_aqi(o3_8h=mean_o3_8h, co_8h=mean_co_8h, pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_24h=mean_so2_24h, no2_1h=mean_no2_1h, o3_1h=max_o3_1h)
print('US AQI:', aqi_us)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_us, key=lambda k: aqi_data_us[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum AQI:', max_pollutant)
print('US AQI General Message:', aqi_data_us[max_pollutant][1])
print('US AQI Risk Message:', aqi_data_us[max_pollutant][2])


US AQI: 500
Pollutant with maximum AQI: o3_1h
US AQI General Message: Severe respiratory effects and impaired breathing likely in people with lung disease (such as asthma), children, older adults, people who are active outdoors (including outdoor workers), people with certain genetic variants, and people with diets limited in certain nutrients; increasingly severe respiratory effects likely in general population.
US AQI Risk Message: Everyone should avoid all outdoor exertion.


In [256]:
# AQHI using the hk calculations
aqhi_hk_value, aqhi_data_hk_dict, *_ = aqhi_hk.get_aqhi(pm25_3h=mean_pm25_3h, pm10_3h=mean_pm10_3h, o3_3h=mean_o3_3h, no2_3h=mean_no2_3h, so2_3h=mean_so2_3h)
print('Hong Kong AQHI:', aqhi_hk_value)



Hong Kong AQHI: 11


In [257]:
# AQI using the Australia calculations
aqi_au, aqi_data_au = aqi_au.get_aqi(pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_24h=mean_so2_24h, no2_1h=mean_no2_1h, o3_1h=max_o3_1h, o3_4h=mean_o3_4h, co_8h=mean_co_8h)
print('Australia AQI:', aqi_au)

Australia AQI: 201
