## Initialization

In [None]:
import pandas as pd
import numpy as np

Read the file

In [None]:
path_to_file = './04-estacoes-selecionadas/medicoes-longo_seleccionadas.csv'

path_to_dir_destination = './05-Indice-Qualidade-Ar/'

#Load the file to a DataFrame
df = pd.read_csv(path_to_file)
df

Convert the date column to datetime type

In [None]:
df['data'] = pd.to_datetime(df['data'])
df.set_index('data', inplace= True)
print(df.index)
df

Group data by station, pollutant and day to calculate the daily air quality index 

In [None]:
df['dia'] = df.index.date
df

Function to calculate the air quality index using the aproppriate statistic methods for each pollutant:
    - NO2, O3 e SO2: Daily maximum value
    - PM10 e PM2.5: Daily mean value

In [None]:
#Split grouping columns
group_cols = ['Station', 'Poluente', 'dia']

# Group and apply custom function
resultados = []

for (station, poluente, dia), grupo in df.groupby(group_cols):
    if len(grupo) < 18:
        continue  # Ignore if it does not meet the 75% threshold
    
    if poluente in ['PM10', 'PM2.5']:
        aqi = grupo['Valor'].mean()
    elif poluente in ['SO2', 'NO2', 'O3']:
        aqi = grupo['Valor'].max()
    else:
        continue  # Not pollutant target
    
    resultados.append({
        'Station': station,
        'Poluente': poluente,
        'dia': dia,
        'AQI': aqi
    })

# Create final DataFrame
aqi_diario = pd.DataFrame(resultados)


Value ranges for air quality index categories:

| Classification | Numerical classification | PM10      | PM2.5    | NO2       | O3        | SO2       |
|----------------|--------------------------|-----------|----------|-----------|-----------|-----------|
| Very Good      | 0                        | 0–20      | 0–10     | 0–40      | 0–80      | 0–100     |
| Good           | 1                        | 21–35     | 11–20    | 41–100    | 81–100    | 101–200   |
| Moderate       | 2                        | 36–50     | 21–25    | 101–200   | 101–180   | 201–350   |
| Poor           | 3                        | 51–100    | 26–50    | 201–400   | 181–240   | 351–500   |
| Bad            | 4                        | 101–1200  | 51–800   | 401–1000  | 241–600   | 501–1250  |


Based on the table, define AQI intervals for each pollutant:

In [None]:
def classificar_aqi(poluente, valor):
    if pd.isna(valor):
        return None

    if poluente == 'PM10':
        if valor <= 20: return 0
        elif valor <= 35: return 1
        elif valor <= 50: return 2
        elif valor <= 100: return 3
        elif valor <= 1200: return 4

    elif poluente == 'PM2.5':
        if valor <= 10: return 0
        elif valor <= 20: return 1
        elif valor <= 25: return 2
        elif valor <= 50: return 3
        elif valor <= 800: return 4

    elif poluente == 'NO2':
        if valor <= 40: return 0
        elif valor <= 100: return 1
        elif valor <= 200: return 2
        elif valor <= 400: return 3
        elif valor <= 1000: return 4

    elif poluente == 'O3':
        if valor <= 80: return 0
        elif valor <= 100: return 1
        elif valor <= 180: return 2
        elif valor <= 240: return 3
        elif valor <= 600: return 4

    elif poluente == 'SO2':
        if valor <= 100: return 0
        elif valor <= 200: return 1
        elif valor <= 350: return 2
        elif valor <= 500: return 3
        elif valor <= 1250: return 4

    return None


aqi_diario['Classificacao_AQI'] = aqi_diario.apply(lambda row: classificar_aqi(row['Poluente'], row['AQI']),axis=1)

mapa_classificacao = {
    0: 'Muito Bom',
    1: 'Bom',
    2: 'Médio',
    3: 'Fraco',
    4: 'Mau'
}

aqi_diario['Qualidade do Ar'] = aqi_diario['Classificacao_AQI'].map(mapa_classificacao)

Calculation of the AQI for each pollutant group, by station and by day

In [None]:
aqi_diario

Save values to a DataFrame and export to CSV

In [None]:
# Check result
aqi_diario.head()

# Save the file
aqi_diario.to_csv("aqi_diario.csv", index = False)
