In [None]:
import os
import pandas as pd

BASE_DIR = r"../datasets/Air Pollution Image Dataset"

all_dfs = []

for city in os.listdir(BASE_DIR):
    city_path = os.path.join(BASE_DIR, city)
    if not os.path.isdir(city_path):
        continue
        
    csv_path = os.path.join(city_path, f"{city}_AQI_ALL_info.csv")
    df = pd.read_csv(csv_path)
    df["city"] = city
    df["image_path"] = df["Filename"].apply(
        lambda x: os.path.join(city_path, f"{df.loc[df['Filename']==x,'AQI_Class'].values[0]}", x)
    )
    all_dfs.append(df)

data = pd.concat(all_dfs, ignore_index=True)


In [None]:
# 1. Basic shape
print(data.shape)

# 2. Required columns check
print(data.columns)

# 3. Check broken image paths (VERY IMPORTANT)
broken = data[~data["image_path"].apply(os.path.exists)]
print("Broken images:", len(broken))

# 4. Class distribution
print(data["AQI_Class"].value_counts())

import matplotlib.pyplot as plt
data["AQI_Class"].value_counts().plot(kind="bar", title="AQI Class Distribution")
plt.show()


In [None]:
NUM_CLASSES = data["AQI_Class"].nunique()
print("Number of AQI classes:", NUM_CLASSES)

In [None]:
print(data.columns.tolist())


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report


In [None]:
BASE_DIR = r"../datasets/Air Pollution Image Dataset"

all_dfs = []

for city in os.listdir(BASE_DIR):
    city_path = os.path.join(BASE_DIR, city)
    if not os.path.isdir(city_path):
        continue
        
    csv_path = os.path.join(city_path, f"{city}_AQI_ALL_info.csv")
    df = pd.read_csv(csv_path)
    df["city"] = city

    df["image_path"] = df.apply(
        lambda row: os.path.join(
            city_path,
            str(row["AQI_Class"]),
            row["Filename"]
        ),
        axis=1
    )

    all_dfs.append(df)

data = pd.concat(all_dfs, ignore_index=True)
