# This notebook is being used to determine the best approach to calculating AQI.  The methods developed here will be used in the pipeline.  This is not a final product!  This is a work in progress.

In [122]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3  
from aqipy import aqi_us
from aqipy import aqi_cn
from aqipy import aqi_au
from aqipy import caqi_eu
from aqipy import cai_kr
from aqipy import daqi_uk
from aqipy import aqhi_ca
from aqipy import psi_sg

In [123]:
try:
    conn = sqlite3.connect('./data/bkk_aqi.db')
    aqi_data = pd.read_sql_query("SELECT * FROM aqi_data", conn)
    aqi_data['time_iso'] = pd.to_datetime(aqi_data['time_iso'])
    aqi_data = aqi_data.set_index('time_iso')
    print(aqi_data.head())
except sqlite3.Error as e:
    print(f"Database error: {e}")
except FileNotFoundError:
    print("Database file not found.")
except Exception as e: # Catching other potential errors
    print(f"An error occurred: {e}")
finally: # Ensure connection is closed even if there is an error
        if 'conn' in locals() and conn:
            conn.close() # Close the connection in a finally block

                           id  aqi   co     h  no2    o3  pressure  pm10  \
time_iso                                                                   
2025-02-13 20:00:00+07:00   1  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   2  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   3  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   4  138  0.1  48.0  2.4  19.4    1010.0    58   
2025-02-13 20:00:00+07:00   5  138  0.1  48.0  2.4  19.4    1010.0    58   

                           pm25  so2  temperature  wind  city_geo_lat  \
time_iso                                                                
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   138  0.6         30.0   2.0     13.756331   
2025-02-13 20:00:00+07:00   1

In [124]:
# note: the following is all of the necessary intervals for the different country's AQI calculations.  They are done this way so that they can be plotted later is I elect to do so.
# Pull the most recent so2 data
recent_so2 = aqi_data['so2'].iloc[-1]

# Pull the most recent 1 hours of data
recent_data_1h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=1))]

#calculate the mean of the most recent 1 hour of data for o3, no2, pm10 and so2
mean_o3_1h = recent_data_1h['o3'].mean()
mean_no2_1h = recent_data_1h['no2'].mean()
mean_so2_1h = recent_data_1h['so2'].mean()
mean_pm10_1h = recent_data_1h['pm10'].mean()
mean_pm25_1h = recent_data_1h['pm25'].mean()
mean_co_1h = recent_data_1h['co'].mean()

# Calculate the max of the most recent 1 hour of data for o3, no2, and so2
max_o3_1h = recent_data_1h['o3'].max()
max_no2_1h = recent_data_1h['no2'].max()
max_so2_1h = recent_data_1h['so2'].max()

# Pull the most recent 3 hours of data
recent_data_3h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=3))]

# Calculate the mean of the recent 3 hours of data for pm10, pm25, o3, so2, no2 and co
mean_pm10_3h = recent_data_3h['pm10'].mean()
mean_pm25_3h = recent_data_3h['pm25'].mean()
mean_o3_3h = recent_data_3h['o3'].mean()
mean_so2_3h = recent_data_3h['so2'].mean()
mean_no2_3h = recent_data_3h['no2'].mean()
mean_co_3h = recent_data_3h['co'].mean()

# Pull the most recent 4 hours of data
recent_data_4h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=4))]

# Calculate the mean of the recent 4 hours of data for o3
mean_o3_4h = recent_data_4h['o3'].mean()

# Pull the most recent 8 hours of data
recent_data_8h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=8))]

# Calculate the mean of the recent 8 hours of data for o3 and co
mean_o3_8h = recent_data_8h['o3'].mean()
mean_co_8h = recent_data_8h['co'].mean()

# Pull the most recent 24 hours of data
recent_data_24h = aqi_data[aqi_data.index >= (aqi_data.index.max() - pd.Timedelta(hours=24))]

# Calculate the mean of the recent 24 hours of data for pm10, pm25, co2 and so2
mean_pm10_24h = recent_data_24h['pm10'].mean()
mean_pm25_24h = recent_data_24h['pm25'].mean()
mean_so2_24h = recent_data_24h['so2'].mean()
mean_co_24h = recent_data_24h['co'].mean()

In [125]:
# Get PSI using the sg calculations 
aqi_sg, aqi_data_sg = psi_sg.get_aqi(
    o3_8h=mean_o3_8h, 
    co_8h=mean_co_8h, 
    pm10_24h=mean_pm10_24h, 
    pm25_24h=mean_pm25_24h, 
    so2_24h=mean_so2_24h, 
    no2_1h=mean_no2_1h
)

print('Singapore PSI:', aqi_sg)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_sg, key=lambda k: aqi_data_sg[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum PSI:', max_pollutant)
print('Singapore PSI General Message:', aqi_data_sg[max_pollutant][1])
print('Singapore PSI Risk Message:', aqi_data_sg[max_pollutant][2])

Singapore PSI: 500
Pollutant with maximum PSI: o3_8h
Singapore PSI General Message: Healthy people may experience adverse symptoms that affect normal activity.
Singapore PSI Risk Message: PSI levels above 400 may be life-threatening to ill and elderly persons


In [126]:
# Get AQI using the us calculations 
aqi_us, aqi_data_us = aqi_us.get_aqi(o3_8h=mean_o3_8h, co_8h=mean_co_8h, pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_24h=mean_so2_24h, no2_1h=mean_no2_1h, o3_1h=mean_o3_1h)
print('US AQI:', aqi_us)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_us, key=lambda k: aqi_data_us[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum AQI:', max_pollutant)
print('US AQI General Message:', aqi_data_us[max_pollutant][1])
print('US AQI Risk Message:', aqi_data_us[max_pollutant][2])


US AQI: 500
Pollutant with maximum AQI: o3_1h
US AQI General Message: Severe respiratory effects and impaired breathing likely in people with lung disease (such as asthma), children, older adults, people who are active outdoors (including outdoor workers), people with certain genetic variants, and people with diets limited in certain nutrients; increasingly severe respiratory effects likely in general population.
US AQI Risk Message: Everyone should avoid all outdoor exertion.


In [127]:
# AQI using the Australia calculations
aqi_au, aqi_data_au = aqi_au.get_aqi(pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_24h=mean_so2_24h, no2_1h=mean_no2_1h, o3_1h=max_o3_1h, o3_4h=mean_o3_4h, co_8h=mean_co_8h)
print('Australia AQI:', aqi_au)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_au, key=lambda k: aqi_data_au[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum AQI:', max_pollutant)
print('Australia AQI General Message:', aqi_data_au[max_pollutant][1])
print('Australia AQI Risk Message:', aqi_data_au[max_pollutant][2])

Australia AQI: 201
Pollutant with maximum AQI: o3_1h
Australia AQI General Message: Adults should avoid strenuous outdoor activities.
Australia AQI Risk Message: Sensitive groups should avoid all outdoor activities.


In [128]:
# AQI using the EU calculations
caqi_eu, aqi_data_eu = caqi_eu.get_caqi(pm10_24h=mean_pm10_24h, pm10_1h=mean_pm10_1h, pm25_24h=mean_pm25_24h, pm25_1h=mean_pm25_1h, so2_max_1h=max_so2_1h, no2_max_1h=max_no2_1h, o3_max_1h=max_o3_1h, co_1h=mean_co_1h)
print('EU CAQI:', caqi_eu)

# note: EU doesn't have general or risk messages


EU CAQI: 100


In [129]:
# AQI using the China calculations
aqi_cn, aqi_data_cn = aqi_cn.get_aqi(pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_24h=mean_so2_24h, no2_24h=mean_no2_3h, o3_8h=mean_o3_8h, co_24h=mean_co_24h, o3_1h=mean_o3_1h)
print('China AQI:', aqi_cn)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_cn, key=lambda k: aqi_data_cn[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum AQI:', max_pollutant)
print('China AQI General Message:', aqi_data_cn[max_pollutant][1])
print('China AQI Risk Message:', aqi_data_cn[max_pollutant][2])


China AQI: 300
Pollutant with maximum AQI: o3_1h
China AQI General Message: Healthy people will experience reduced endurance in activities and may also show noticeably strong symptoms. Other illnesses may be triggered in healthy people. Elders and the sick should remain indoors and avoid exercise. Healthy individuals should avoid outdoor activities.
China AQI Risk Message: Children, seniors and the sick should stay indoors and avoid physical exertion. General population should avoid outdoor activities.


In [130]:
# using the uk calculations
daqi_uk, aqi_data_uk = daqi_uk.get_daqi(pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, so2_15m=recent_so2, no2_1h=mean_no2_1h, o3_1h=mean_o3_1h)
print('UK DAQI:', daqi_uk)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_uk, key=lambda k: aqi_data_uk[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum DAQI:', max_pollutant)
print('UK DAQI General Message:', aqi_data_uk[max_pollutant][1])
print('UK DAQI Risk Message:', aqi_data_uk[max_pollutant][2])

UK DAQI: 10
Pollutant with maximum DAQI: o3_1h
UK DAQI General Message: Reduce physical exertion, particularly outdoors, especially if you experience symptoms such as cough or sore throat.
UK DAQI Risk Message: Adults and children with lung problems, adults with heart problems, and older people, should avoid strenuous physical activity. People with asthma may find they need to use their reliever inhaler more often.


In [131]:
# AQI using the Korean calculations
cai_kr, aqi_data_kr = cai_kr.get_aqi(pm10_24h=mean_pm10_24h, pm25_24h=mean_pm25_24h, no2_1h=mean_no2_1h, so2_1h=mean_so2_1h, o3_1h=mean_o3_1h)
print('Korean CAI:', cai_kr)

# Find the pollutant with the maximum AQI value
max_pollutant = max(aqi_data_kr, key=lambda k: aqi_data_kr[k][0])

# Print the general message and risk message for the pollutant with the maximum AQI value
print('Pollutant with maximum CAI:', max_pollutant)
print('Korean CAI General Message:', aqi_data_kr[max_pollutant][1])
print('Korean CAI Risk Message:', aqi_data_kr[max_pollutant][2])

Korean CAI: 500
Pollutant with maximum CAI: o3_1h
Korean CAI General Message: A level which may need to take emergency measures for patients and members of sensitive groups and have harmful impacts on the general public
Korean CAI Risk Message: A level which may need to take emergency measures for patients and members of sensitive groups and have harmful impacts on the general public


In [132]:
# AQI using the Canadian calculations
aqhi_ca, aqi_data_ca, _ = aqhi_ca.get_aqhi(pm10_3h=mean_pm10_3h, pm25_3h=mean_pm25_3h, no2_3h=mean_no2_3h, o3_3h=mean_o3_3h)
print('Canadian AQHI:', aqhi_ca)



Canadian AQHI: 11
