In [None]:
# Importing necessary libraries for the task
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [None]:
# Load the dataset
df = pd.read_csv('data.csv')

# Check for missing values
print(df.isnull().sum())

# Fill missing values with mean or other imputation methods if necessary
df.fillna(df.mean(), inplace=True)

# Select relevant columns for the prediction task (e.g., targeting PM2.5)
X = df.drop(columns=['pm2_5', 'date'])  # Drop non-numeric columns if needed
y = df['pm2_5']  # Target column (e.g., PM2.5)

# Convert categorical columns to numeric using one-hot encoding if necessary
X = pd.get_dummies(X)

# Split data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (important for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Data Preprocessing Complete!")

In [None]:
# Build a neural network model
model = Sequential()

# Add layers (input layer + hidden layers)
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))  # Input layer
model.add(Dense(64, activation='relu'))  # Hidden layer
model.add(Dense(1))  # Output layer (regression)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

print("Model Built and Compiled!")

In [None]:
# Train the model on the training data
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

print("Model Training Complete!")

In [None]:
# Evaluate the model on the test set
y_pred = model.predict(X_test)

# Compute evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

In [None]:
# Plot loss over epochs to see training progress
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot actual vs predicted values
plt.scatter(y_test, y_pred)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.title('Actual vs Predicted PM2.5')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.show()