In [2]:
import pandas as pd

# Sample data
data = {
    "Date": ["1-Jun-2019", "2-Jun-2019", "3-Jun-2019", "4-Jun-2019", "5-Jun-2019", 
             "6-Jun-2019", "7-Jun-2019", "8-Jun-2019", "9-Jun-2019"],
    "PM2.5": [222, 170, 223, 153, 157, 129, 149, 156, 160],
    "PM10": [190, 153, 263, 126, 130, 77, 78, 112, 112],
    "O3": [121, 85, 94, 110, 111, 114, 97, 99, 93],
    "NO2": [108, 93, 74, 41, 40, 26, 61, 75, 78],
    "SO2": [42, 26, 37, 4, 8, 2, 5, 35, 19],
}

df = pd.DataFrame(data)

# Define breakpoints for each pollutant based on NEQS
breakpoints = {
    'PM2.5': [
        (0, 35, 0, 50),
        (36, 75, 51, 100),
        (76, 115, 101, 150),
        (116, 150, 151, 200),
        (151, 250, 201, 300),
        (251, 350, 301, 400),
        (351, 500, 401, 500),
    ],
    'PM10': [
        (0, 50, 0, 50),
        (51, 100, 51, 100),
        (101, 250, 101, 200),
        (251, 350, 201, 300),
        (351, 430, 301, 400),
        (431, 500, 401, 500),
    ],
    'O3': [
        (0, 50, 0, 50),
        (51, 100, 51, 100),
        (101, 168, 101, 200),
        (169, 208, 201, 300),
        (209, 748, 301, 400),
    ],
    'NO2': [
        (0, 40, 0, 50),
        (41, 80, 51, 100),
        (81, 180, 101, 200),
        (181, 280, 201, 300),
        (281, 400, 301, 400),
        (401, 1000, 401, 500),
    ],
    'SO2': [
        (0, 40, 0, 50),
        (41, 80, 51, 100),
        (81, 380, 101, 200),
        (381, 800, 201, 300),
        (801, 1600, 301, 400),
        (1601, 2000, 401, 500),
    ],
}

def calculate_aqi(concentration, bp):
    for c_low, c_high, i_low, i_high in bp:
        if c_low <= concentration <= c_high:
            return round(((i_high - i_low) / (c_high - c_low)) * (concentration - c_low) + i_low)
    return None

# Calculate AQI for each row
aqi_values = []
for _, row in df.iterrows():
    sub_indices = []
    for pollutant in ['PM2.5', 'PM10', 'O3', 'NO2', 'SO2']:
        bp = breakpoints[pollutant]
        concentration = row[pollutant]
        aqi = calculate_aqi(concentration, bp)
        if aqi is not None:
            sub_indices.append(aqi)
    overall_aqi = max(sub_indices) if sub_indices else None
    aqi_values.append(overall_aqi)

df['AQI'] = aqi_values

print(df[['Date', 'PM2.5', 'PM10', 'O3', 'NO2', 'SO2', 'AQI']])


         Date  PM2.5  PM10   O3  NO2  SO2  AQI
0  1-Jun-2019    222   190  121  108   42  272
1  2-Jun-2019    170   153   85   93   26  220
2  3-Jun-2019    223   263   94   74   37  273
3  4-Jun-2019    153   126  110   41    4  203
4  5-Jun-2019    157   130  111   40    8  207
5  6-Jun-2019    129    77  114   26    2  170
6  7-Jun-2019    149    78   97   61    5  199
7  8-Jun-2019    156   112   99   75   35  206
8  9-Jun-2019    160   112   93   78   19  210
