In [None]:
!pip install ucimlrepo

In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np

# Load dataset
air_quality = fetch_ucirepo(id=360)

# All data is in .features (NO targets table)
df = air_quality.data.features

print(df.head())
print(df.info())

print("Columns:", df.columns.tolist())

In [None]:
df = df.drop(columns=["Date", "Time"])

# Define target variable
target_col = "CO(GT)"     # Predict CO as AQI proxy

# Features = all except target
X = df.drop(columns=[target_col])
y = df[target_col]

In [None]:
# Replace -200 with NaN
X = X.replace(-200, np.nan)
y = y.replace(-200, np.nan)

# Drop rows with missing values
data = pd.concat([X, y], axis=1).dropna()

X = data[X.columns]
y = data[target_col]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers

model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1
)


In [None]:

from sklearn.metrics import mean_squared_error, r2_score
import numpy as np # Import numpy for sqrt

y_pred = model.predict(X_test_scaled).flatten()

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse) # Calculate RMSE from MSE
r2 = r2_score(y_test, y_pred)

print("Test RMSE:", rmse)
print("Test RÂ²:", r2)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.legend()
plt.show()


In [None]:
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("Actual CO(GT)")
plt.ylabel("Predicted CO(GT)")
plt.title("Actual vs Predicted")
lims = [min(y_test.min(), y_pred.min()), max(y_test.max(), y_pred.max())]
plt.plot(lims, lims)
plt.show()