In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### # --------- AQI Breakpoints (Pakistan NEQS-adapted) ---------

In [2]:
breakpoints = {
    'PM2.5': [
        (0, 35, 0, 50),
        (36, 75, 51, 100),
        (76, 115, 101, 150),
        (116, 150, 151, 200),
        (151, 250, 201, 300),
        (251, 350, 301, 400),
        (351, 500, 401, 500),
    ],
    'PM10': [
        (0, 50, 0, 50),
        (51, 100, 51, 100),
        (101, 250, 101, 200),
        (251, 350, 201, 300),
        (351, 430, 301, 400),
        (431, 500, 401, 500),
    ],
    'O3': [
        (0, 50, 0, 50),
        (51, 100, 51, 100),
        (101, 168, 101, 200),
        (169, 208, 201, 300),
        (209, 748, 301, 400),
    ],
    'NO2': [
        (0, 40, 0, 50),
        (41, 80, 51, 100),
        (81, 180, 101, 200),
        (181, 280, 201, 300),
        (281, 400, 301, 400),
        (401, 1000, 401, 500),
    ],
    'SO2': [
        (0, 40, 0, 50),
        (41, 80, 51, 100),
        (81, 380, 101, 200),
        (381, 800, 201, 300),
        (801, 1600, 301, 400),
        (1601, 2000, 401, 500),
    ],
}

### 
# --------- AQI Calculation Function ---------

In [3]:
def calculate_aqi(concentration, bp):
    for c_low, c_high, i_low, i_high in bp:
        if c_low <= concentration <= c_high:
            return round(((i_high - i_low) / (c_high - c_low)) * (concentration - c_low) + i_low)
    return None

### # --------- Main Function with Missing Value Handling ---------

In [4]:
def process_aqi(input_csv_path, output_csv_path):
    df = pd.read_csv(input_csv_path)

    # Ensure column names are consistent
    required_cols = ['PM2.5', 'PM10', 'O3', 'NO2', 'SO2']
    for col in required_cols:
        if col not in df.columns:
            raise ValueError(f"Missing required column: {col}")

    # Step 1: Identify and display missing values
    print("Missing values before handling:\n", df[required_cols].isnull().sum())

    # Step 2: Handle missing values by filling with column mean
    df[required_cols] = df[required_cols].fillna(df[required_cols].mean())

    print("\nMissing values after handling:\n", df[required_cols].isnull().sum())

    # Step 3: Calculate AQI
    aqi_values = []
    for _, row in df.iterrows():
        sub_indices = []
        for pollutant in required_cols:
            concentration = row[pollutant]
            aqi = calculate_aqi(concentration, breakpoints[pollutant])
            if aqi is not None:
                sub_indices.append(aqi)
        overall_aqi = max(sub_indices) if sub_indices else None
        aqi_values.append(overall_aqi)

    df['AQI'] = aqi_values

    # Step 4: Save to CSV
    df.to_csv(output_csv_path, index=False)
    print(f"\n✅ AQI calculation complete. Output saved to: {output_csv_path}")



# --------- Example Usage ---------

In [5]:
input_file = "final AQI 19-23.csv"         # input CSV path having features
output_file = "air_quality_with_AQI.csv"    # Output CSV path to save features and AQI values

process_aqi(input_file, output_file)

Missing values before handling:
 PM2.5    0
PM10     0
O3       0
NO2      0
SO2      0
dtype: int64

Missing values after handling:
 PM2.5    0
PM10     0
O3       0
NO2      0
SO2      0
dtype: int64

✅ AQI calculation complete. Output saved to: air_quality_with_AQI.csv
