In [1]:
import numpy as np

# Generate synthetic air pollution data (replace with real data if available)
num_samples = 1000  # Adjust based on available data
no2_values = np.random.uniform(10, 100, num_samples)
o3_values = np.random.uniform(10, 100, num_samples)
pm25_values = np.random.uniform(5, 200, num_samples)
pm10_values = np.random.uniform(10, 300, num_samples)
so2_values = np.random.uniform(2, 50, num_samples)
co_values = np.random.uniform(0.1, 10, num_samples)

# Stack input values for each model
input_1 = np.column_stack((no2_values, o3_values))  # NO2 & O3
input_2 = pm25_values.reshape(-1, 1)  # PM2.5
input_3 = pm10_values.reshape(-1, 1)  # PM10
input_4 = np.column_stack((so2_values, co_values))  # SO2 & CO

print("Synthetic data generated successfully!")


Synthetic data generated successfully!


In [2]:
from tensorflow.keras.models import load_model

# Load trained models
model_1 = load_model("mlp1.keras")  # NO2 & O3 → Output
model_2 = load_model("mlp2.keras")  # PM2.5 → Output
model_3 = load_model("mlp3.keras")  # PM10 → Output
model_4 = load_model("mlp4.keras")  # SO2 & CO → Output

# Get predictions
output_1 = model_1.predict(input_1)
output_2 = model_2.predict(input_2)
output_3 = model_3.predict(input_3)
output_4 = model_4.predict(input_4)

# Stack all outputs together as input for Model-5
X = np.column_stack((output_1, output_2, output_3, output_4))

print("Predictions from Models 1-4 generated!")
print(f"Shape of X: {X.shape}")  # Should be (num_samples, 4) if each model gives one output


Predictions from Models 1-4 generated!
Shape of X: (1000, 24)


In [3]:
# Example: Define pollution categories based on thresholds (customize as needed)
def categorize_air_quality(outputs):
    pollution_level = np.mean(outputs, axis=1)  # Average all model outputs
    categories = np.digitize(pollution_level, bins=[0.2, 0.4, 0.6, 0.8])  # 5 categories (0-4)
    return categories

y = categorize_air_quality(X)  # Generate labels

# Convert to categorical format (if classification)
from tensorflow.keras.utils import to_categorical
y = to_categorical(y)

print("Final labels generated!")
print(f"Shape of y: {y.shape}")


Final labels generated!
Shape of y: (1000, 1)


In [4]:
np.save("final_input_features.npy", X)
np.save("final_output_labels.npy", y)

print("Training data saved successfully! 🎉")


Training data saved successfully! 🎉


In [5]:
import numpy as np
import pandas as pd

# Number of synthetic samples
num_samples = 1000  # You can adjust this

# Function to generate synthetic data with noise
def generate_data(num_samples, feature_size):
    data = np.zeros((num_samples, feature_size))
    
    for i in range(num_samples):
        # Randomly activate one of the last two columns (simulating categorical pollution presence)
        idx = np.random.choice([4, 5])
        data[i, idx] = 1.0

        # Add small noise to simulate sensor readings
        data[i, :] += np.random.normal(0, 1e-9, size=(feature_size,))
    
    return data

# Generate synthetic data for each model
data_model_1 = generate_data(num_samples, 6)  # NO2 & O3
data_model_2 = generate_data(num_samples, 6)  # PM2.5
data_model_3 = generate_data(num_samples, 6)  # PM10
data_model_4 = generate_data(num_samples, 6)  # SO2 & CO

# Convert to DataFrames
df1 = pd.DataFrame(data_model_1, columns=[f"Feature_{i+1}" for i in range(6)])
df2 = pd.DataFrame(data_model_2, columns=[f"Feature_{i+1}" for i in range(6)])
df3 = pd.DataFrame(data_model_3, columns=[f"Feature_{i+1}" for i in range(6)])
df4 = pd.DataFrame(data_model_4, columns=[f"Feature_{i+1}" for i in range(6)])

# Save to CSV (optional)
df1.to_csv("synthetic_data_model_1.csv", index=False)
df2.to_csv("synthetic_data_model_2.csv", index=False)
df3.to_csv("synthetic_data_model_3.csv", index=False)
df4.to_csv("synthetic_data_model_4.csv", index=False)

print("Synthetic data generated and saved!")


Synthetic data generated and saved!
