In [16]:
import pandas as pd

# Load the uploaded dataset
file_path = "preprocessed_air_quality.csv"
df = pd.read_csv(file_path)

In [17]:
# Data Preprocessing 

# Convert Date to datetime format
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Handle missing values
df = df.fillna(df.median(numeric_only=True))

# Feature Engineering
if 'Date' in df.columns:
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day

# Categorize AQI into levels if AQI column exists
def categorize_aqi(aqi):
    if aqi <= 50:
        return 'Good'
    elif aqi <= 100:
        return 'Moderate'
    elif aqi <= 150:
        return 'Unhealthy for Sensitive'
    elif aqi <= 200:
        return 'Unhealthy'
    elif aqi <= 300:
        return 'Very Unhealthy'
    else:
        return 'Hazardous'

if 'AQI' in df.columns:
    df['AQI_Level'] = df['AQI'].apply(categorize_aqi)

# Summary statistics after preprocessing
summary_stats = df.describe(include='all')

# Show first few rows and summary
df.head(), summary_stats.head()

(        Date         City Country  AQI  PM2.5 (µg/m³)  PM10 (µg/m³)  \
 0 2024-01-01     New York     USA   38          120.0         182.9   
 1 2024-01-01  Los Angeles     USA  280           38.4          46.9   
 2 2024-01-01       London      UK  117          168.1          34.3   
 3 2024-01-01      Beijing   China  197           96.8          35.4   
 4 2024-01-01        Delhi   India  187           76.2         226.8   
 
    NO2 (ppb)  SO2 (ppb)  CO (ppm)  O3 (ppb)  Temperature (°C)  Humidity (%)  \
 0       24.3       26.0      9.10     153.3              18.6            40   
 1       41.8       34.7      3.78     190.7              -2.2            59   
 2       81.5        8.2      3.67     105.4              36.3            62   
 3       18.5       39.4      9.51      92.8              29.9            32   
 4       46.9       17.2      1.02      68.4               9.9            55   
 
    Wind Speed (m/s)  Year  Month  Day                AQI_Level  
 0              13