## Environment: Air Quality Prediction

Design AI models to predict air quality levels in urban areas using data from traffic patterns, 
industrial emissions, and weather conditions. This would help policymakers implement measures 
to reduce pollution and safeguard public health.

In [3]:
# =======================================================================
# ENVIRONMENT - AIR QUALITY PREDICTION
# =======================================================================

In [6]:
import pandas as pd
import random

#balancing using smote
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
#model training
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

In [7]:
# Step 1: Generate synthetic air quality data
def generate_environment_data(num_rows):
    data = {
        "Region": [f"Region_{i}" for i in range(1, num_rows + 1)],
        "Traffic_Density": [random.uniform(0.1, 1.0) for _ in range(num_rows)],  # Traffic congestion level
        "Industrial_Emissions": [random.uniform(0, 100) for _ in range(num_rows)],  # Emissions in units
        "Temperature": [random.uniform(15, 45) for _ in range(num_rows)],  # Celsius
        "Air_Quality_Index": [random.choice(["Good", "Moderate", "Unhealthy"]) for _ in range(num_rows)]  # Labels
    }
    return pd.DataFrame(data)

In [8]:
# Generate environment dataset
environment_data = generate_environment_data(100000)
environment_data.to_csv("AssignData/synthetic_environment_data.csv", index=False)
print("Environment dataset generated!")

Environment dataset generated!


In [9]:
# Step 2: Preprocess and Balance Data using SMOTE
# Convert Air_Quality_Index labels to numeric for modeling
environment_data["Air_Quality_Index"] = environment_data["Air_Quality_Index"].map({"Good": 0, "Moderate": 1, "Unhealthy": 2})

# Features and target
X_env = environment_data[["Traffic_Density", "Industrial_Emissions", "Temperature"]]
y_env = environment_data["Air_Quality_Index"]

In [10]:
# Standardize features
scaler_env = StandardScaler()
X_env_scaled = scaler_env.fit_transform(X_env)

# Train-test split
X_env_train, X_env_test, y_env_train, y_env_test = train_test_split(X_env_scaled, y_env, test_size=0.2, random_state=42)

# Balance data using SMOTE
smote_env = SMOTE(random_state=42)
X_env_resampled, y_env_resampled = smote_env.fit_resample(X_env_train, y_env_train)
print("Environment data balanced using SMOTE!")

Environment data balanced using SMOTE!


In [12]:
# Step 3: Train and Evaluate the Model
rf_env_model = RandomForestClassifier(random_state=42)
rf_env_model.fit(X_env_resampled, y_env_resampled)

In [13]:
y_env_pred = rf_env_model.predict(X_env_test)
print(f"Environment Model Accuracy: {accuracy_score(y_env_test, y_env_pred):.2f}")
print(classification_report(y_env_test, y_env_pred))

Environment Model Accuracy: 0.33
              precision    recall  f1-score   support

           0       0.33      0.34      0.33      6660
           1       0.32      0.33      0.33      6565
           2       0.34      0.33      0.33      6775

    accuracy                           0.33     20000
   macro avg       0.33      0.33      0.33     20000
weighted avg       0.33      0.33      0.33     20000

