In [None]:
!pip3 install pandas statsmodels matplotlib

In [None]:
# Importing necessary libraries
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import numpy as np

# Load dataset
data = pd.read_csv('./data/history_data.csv')

# Convert 'DateTime' to datetime format and set as index
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)

# Define pollutants of interest
pollutants = ['PM10', 'PM2.5', 'CO', 'SO2', 'NO2', 'O3']

# Function to forecast next 8 hours using ARIMA
def forecast_pollutant(data, pollutant, current_value, hours=8):
    model = ARIMA(data[pollutant], order=(5,1,0)) # ARIMA with p=5, d=1, q=0 (adjust based on the pollutant)
    model_fit = model.fit()
    
    # Forecast the next 'hours' values
    forecast = model_fit.forecast(steps=hours)
    
    # Replace the first forecast value with the current input value (to align)
    forecast[0] = current_value
    return forecast

# Given current pollutant values
current_values = {
    "PM10": 39.37,
    "PM2.5": 33.67,
    "SO2": 2.38,
    "NO2": 6.25,
    "CO": 13.35,
    "O3": 0.10
}

# Forecast for the next 8 hours for each pollutant
forecasts = {}
for pollutant in pollutants:
    forecast = forecast_pollutant(data, pollutant, current_values[pollutant])
    forecasts[pollutant] = forecast

# Display forecast results
forecasts_df = pd.DataFrame(forecasts)
# print("\n\nForecast\n")
print(forecasts_df)

# Plotting forecast for each pollutant
forecasts_df.plot(figsize=(10,6), marker='o')
plt.title('Pollutant Forecasts for Next 8 Hours')
plt.ylabel('Pollutant Levels')
plt.xlabel('Hours Ahead')
plt.grid(True)
plt.show()


In [None]:
# Cell 1: Importing necessary libraries
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import numpy as np

# Cell 2: Load dataset
data = pd.read_csv('./data/history_data.csv')  # Update the path to your local dataset

# Convert 'DateTime' to datetime format and set frequency to hourly
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)
data = data.asfreq('h')  # Ensure continuous hourly frequency

# Fill any missing data after setting frequency
data = data.ffill()

# Cell 3: Define pollutants of interest
pollutants = ['PM10', 'PM2.5', 'CO', 'SO2', 'NO2', 'O3']

# Cell 4: Function to apply ARIMA model and forecast next 8 hours
def forecast_pollutant(data, pollutant, current_value, hours=8):
    ts = data[pollutant]
    
    # Ensure there's enough data to fit the model
    if len(ts) > 10:
        try:
            # Fit ARIMA model with (5,1,0)
            model = ARIMA(ts, order=(5,1,0))
            model_fit = model.fit()
        except:
            # If error occurs, adjust ARIMA parameters and retry
            model = ARIMA(ts, order=(1,1,0))
            model_fit = model.fit()
        
        # Forecast the next 'hours' values
        forecast = model_fit.forecast(steps=hours)
        
        # Replace the first forecast value with the current input value (to align)
        forecast.iloc[0] = current_value
        return forecast
    else:
        print(f"Not enough data to fit model for {pollutant}")
        return np.array([current_value]*hours)

# Cell 5: Given current pollutant values
current_values = {
    "PM10": 89.37,
    "PM2.5": 60.67,
    "SO2": 2.38,
    "NO2": 6.25,
    "CO": 13.35,
    "O3": 0.10
}

# Cell 6: Forecast for the next 8 hours for each pollutant
forecasts = {}
for pollutant in pollutants:
    forecast = forecast_pollutant(data, pollutant, current_values[pollutant])
    forecasts[pollutant] = forecast

# Display forecast results
forecasts_df = pd.DataFrame(forecasts)
print(forecasts_df)

# Cell 7: Plotting forecast for each pollutant
forecasts_df.plot(figsize=(10,6), marker='o')
plt.title('Pollutant Forecasts for Next 8 Hours')
plt.ylabel('Pollutant Levels')
plt.xlabel('Hours Ahead')
plt.grid(True)
plt.show()


In [None]:
# Cell 1: Import necessary libraries
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
import joblib  # For saving the model and scaler

# Cell 2: Load dataset
data = pd.read_csv('./data/history_data.csv')  # Update the path to your local dataset

# Convert 'DateTime' to datetime format and set frequency to hourly
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)
data = data.asfreq('h')  # Ensure continuous hourly frequency

# Fill any missing data after setting frequency
data = data.ffill()

# Cell 3: Define pollutants of interest
pollutants = ['PM10', 'PM2.5', 'CO', 'SO2', 'NO2', 'O3']

# Cell 4: Function to apply ARIMA model and forecast next 8 hours
# Fine-tuned ARIMA with short memory
def forecast_pollutant(current_value, ts, hours=8):
    # Model ARIMA with short memory (p=1, d=1, q=1)
    model = ARIMA(ts, order=(1,1,1))
    model_fit = model.fit()
    
    # Save the model to a file
    model_filename = f"arima_model_{pollutant}.pkl"
    joblib.dump(model_fit, model_filename)  # Saving model

    # Forecast the next 'hours' values based on the current trend
    forecast = model_fit.forecast(steps=hours)
    
    # Adjust the forecast to start from the current value
    forecast = forecast + (current_value - forecast[0])
    
    return forecast

# Cell 5: Given current pollutant values
current_values = {
    "PM10": 55.37,  # Updated value for PM10
    "PM2.5": 33.67,
    "SO2": 2.38,
    "NO2": 6.25,
    "CO": 13.35,
    "O3": 0.10
}

# Cell 6: Forecast for the next 8 hours for each pollutant
forecasts = {}
for pollutant in pollutants:
    # Get the historical time series for the pollutant
    ts = data[pollutant]

    # Use the current value provided as the base for forecasting
    forecast = forecast_pollutant(current_values[pollutant], ts)
    forecasts[pollutant] = forecast

# Display forecast results
forecasts_df = pd.DataFrame(forecasts)
print(forecasts_df)

# Cell 7: Plotting forecast for each pollutant
forecasts_df.plot(figsize=(10,6), marker='o')
plt.title('Pollutant Forecasts for Next 8 Hours Based on Current Values')
plt.ylabel('Pollutant Levels')
plt.xlabel('Hours Ahead')
plt.grid(True)
plt.show()


In [None]:
# Cell 8: Loading the saved ARIMA model and using it for forecasting
import joblib

# Cell 9: Load the ARIMA model from a file
pollutant = 'PM10'  # Change this to load the model for any other pollutant
model_filename = f"arima_model_{pollutant}.pkl"
loaded_model = joblib.load(model_filename)

# Cell 10: Forecast using the loaded model
# You can change the `current_value` if needed
current_value = 52

# Perform forecast for the next 8 hours
forecast = loaded_model.forecast(steps=8)

# Adjust the forecast to start from the current value
forecast = forecast + (current_value - forecast[0])

print(f"Forecast for {pollutant}:")
print(forecast)

In [None]:
# Cell 1: Importing necessary libraries
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import numpy as np

# Cell 2: Load dataset
data = pd.read_csv('./data/history_data.csv')  # Update the path to your local dataset

# Convert 'DateTime' to datetime format and set frequency to hourly
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)
data = data.asfreq('h')  # Ensure continuous hourly frequency

# Fill any missing data after setting frequency
data = data.ffill()

# Cell 3: Given current pollutant values (these are the inputs for forecasting)
current_values = {
    "PM10": 55.37,  # Updated value for PM10
    "PM2.5": 33.67,
    "SO2": 2.38,
    "NO2": 6.25,
    "CO": 13.35,
    "O3": 0.10
}

# Cell 4: Function to apply ARIMA model and forecast next 8 hours
# Tuned ARIMA model with more sensitivity to past fluctuations
def forecast_pollutant(current_value, ts, hours=8):
    # Model ARIMA with larger p and q values to capture more fluctuations
    model = ARIMA(ts, order=(3,1,3))  # Increased p and q to add variability
    model_fit = model.fit()

    # Forecast the next 'hours' values based on the current trend
    forecast = model_fit.forecast(steps=hours)

    # Adjust the forecast to start from the current value
    forecast = forecast + (current_value - forecast[0])
    
    return forecast

# Cell 5: Forecast for the next 8 hours for each pollutant
pollutant = 'PM10'  # Let's focus on PM10 first
ts = data[pollutant]

# Use the current value provided as the base for forecasting
forecast = forecast_pollutant(current_values[pollutant], ts)

# Display forecast results
forecast.index = pd.date_range(start=pd.to_datetime('2024-09-03 20:00:00'), periods=len(forecast), freq='H')
print(f"Forecast for {pollutant}:")
print(forecast)

# Cell 6: Plotting forecast for the pollutant
# forecast.plot(figsize=(10,6), marker='o')
# plt.title(f'Forecast for {pollutant} (Next 8 Hours)')
# plt.ylabel(f'{pollutant} Level')
# plt.xlabel('Time')
# plt.grid(True)
# plt.show()


In [None]:
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Fetch data from API
data = requests.get("https://blogcontent.site/projects/aqi24.php").json()

# Load API data into pandas DataFrame
df_api = pd.DataFrame(data)

# Load historical data
file_path = './data/history_data.csv'  # Change this to the correct file path
df_history = pd.read_csv(file_path)

# Convert 'time' in API data to datetime and set it as index
df_api['time'] = pd.to_datetime(df_api['time'])
df_api.set_index('time', inplace=True)

# Combine API AQI data and historical AQI data for training
aqi_history = df_history[['AQI']].values
aqi_api = df_api[['aqi_val']].values

# Combine both AQI data
aqi_combined = np.concatenate((aqi_history, aqi_api), axis=0)

# Scale the AQI data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_aqi = scaler.fit_transform(aqi_combined)

# Create sequences for LSTM
def create_sequences(data, time_steps=24):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps as 24 (for 24-hour sequences)
time_steps = 24
X, y = create_sequences(scaled_aqi, time_steps)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()

# Adding LSTM layer with 50 units
model.add(LSTM(50, return_sequences=True, input_shape=(time_steps, 1)))
model.add(Dropout(0.2))

# Adding another LSTM layer
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))

# Adding Dense output layer for predicting AQI
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Forecasting the next 8 hours
n_forecast = 8
X_input = scaled_aqi[-time_steps:].reshape(1, time_steps, 1)
forecast = []

for i in range(n_forecast):
    next_pred = model.predict(X_input)
    forecast.append(next_pred[0, 0])
    
    # Update the input sequence for the next prediction
    X_input = np.append(X_input[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)

# Inverse transform the forecasted values to original scale
forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1))

# Print the forecasted AQI values for the next 8 hours
print("Forecasted AQI values for the next 8 hours:", forecast)

# Plot the forecasted AQI values
forecast_index = pd.date_range(start=df_api.index[-1], periods=n_forecast+1, freq='H')[1:]
plt.plot(forecast_index, forecast, label='Forecasted AQI', color='red')
plt.title("AQI Forecast for Next 8 Hours")
plt.xlabel("Time")
plt.ylabel("AQI")
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Fetch the most recent API data
data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
# data = data[::-1]  # Reverse the data to maintain correct chronological order
df_recent = pd.DataFrame(data)
df_recent['time'] = pd.to_datetime(df_recent['time'])
df_recent.set_index('time', inplace=True)

# Load historical data
file_path = './data/history_data.csv'  # Change this to the correct file path
df_history = pd.read_csv(file_path)

# Combine recent and historical data
df_history['DateTime'] = pd.to_datetime(df_history['DateTime'])
df_history.set_index('DateTime', inplace=True)

# Only use the AQI column
df_combined = pd.concat([df_history[['AQI']], df_recent[['aqi_val']].rename(columns={'aqi_val': 'AQI'})])

# Convert AQI column to numeric
df_combined['AQI'] = pd.to_numeric(df_combined['AQI'], errors='coerce')

# Apply differencing to make the series stationary
df_combined['AQI_diff'] = df_combined['AQI'].diff().dropna()

# Scale the differenced AQI data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_aqi = scaler.fit_transform(df_combined[['AQI_diff']].dropna())

# Create sequences for LSTM
def create_sequences(data, time_steps=24):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps as 24 (for 24-hour sequences)
time_steps = 24
X, y = create_sequences(scaled_aqi, time_steps)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()

# Adding LSTM layer with 64 units
model.add(LSTM(64, return_sequences=True, input_shape=(time_steps, 1)))
model.add(Dropout(0.1))  # Reduced dropout

# Adding another LSTM layer
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.1))  # Reduced dropout

# Adding Dense output layer for predicting AQI
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model with more epochs
model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Forecasting the next 8 hours
n_forecast = 8
X_input = scaled_aqi[-time_steps:].reshape(1, time_steps, 1)
forecast_diff = []

for i in range(n_forecast):
    next_pred = model.predict(X_input)
    forecast_diff.append(next_pred[0, 0])
    
    # Update the input sequence for the next prediction
    X_input = np.append(X_input[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)

# Inverse transform the forecasted values to original scale
forecast_diff = np.array(forecast_diff).reshape(-1, 1)
forecast_diff = scaler.inverse_transform(forecast_diff)

# Revert differencing to get actual AQI forecast values
last_known_aqi = df_combined['AQI'].iloc[-1]
forecast = last_known_aqi + np.cumsum(forecast_diff)

# Print the forecasted AQI values for the next 8 hours
print("Forecasted AQI values for the next 8 hours:", forecast)

# Plot the forecasted AQI values
forecast_index = pd.date_range(start=df_recent.index[-1], periods=n_forecast+1, freq='H')[1:]
plt.plot(forecast_index, forecast, label='Forecasted AQI', color='red')
plt.title("AQI Forecast for Next 8 Hours")
plt.xlabel("Time")
plt.ylabel("AQI")
plt.legend()
plt.show()


In [None]:
print(data)

In [None]:
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional

# Fetch the most recent API data
data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
data = data[::-1]  # Reverse the data to maintain correct chronological order
df_recent = pd.DataFrame(data)
df_recent['time'] = pd.to_datetime(df_recent['time'])
df_recent.set_index('time', inplace=True)

# Load historical data
file_path = './data/history_data.csv'  # Change this to the correct file path
df_history = pd.read_csv(file_path)

# Combine recent and historical data
df_history['DateTime'] = pd.to_datetime(df_history['DateTime'])
df_history.set_index('DateTime', inplace=True)

# Only use the AQI column
df_combined = pd.concat([df_history[['AQI']], df_recent[['aqi_val']].rename(columns={'aqi_val': 'AQI'})])

# Convert AQI column to numeric
df_combined['AQI'] = pd.to_numeric(df_combined['AQI'], errors='coerce')

# Apply differencing to make the series stationary
df_combined['AQI_diff'] = df_combined['AQI'].diff().dropna()

# Create a trend feature (1 for increase, 0 for decrease)
df_combined['trend'] = np.where(df_combined['AQI_diff'] > 0, 1, 0)

# Scale the differenced AQI data and trend using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_aqi = scaler.fit_transform(df_combined[['AQI_diff', 'trend']].dropna())

# Create sequences for LSTM
def create_sequences(data, time_steps=24):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps][0])  # Predicting the AQI_diff
    return np.array(X), np.array(y)

# Set time steps as 24 (for 24-hour sequences)
time_steps = 24
X, y = create_sequences(scaled_aqi, time_steps)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()

# Adding Bidirectional LSTM layer with 64 units
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(time_steps, 2)))
model.add(Dropout(0.1))

# Adding another Bidirectional LSTM layer
model.add(Bidirectional(LSTM(64, return_sequences=False)))
model.add(Dropout(0.1))

# Adding Dense output layer for predicting AQI_diff
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model with more epochs
model.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Forecasting the next 8 hours
n_forecast = 8
X_input = scaled_aqi[-time_steps:].reshape(1, time_steps, 2)
forecast_diff = []

for i in range(n_forecast):
    next_pred = model.predict(X_input)
    forecast_diff.append(next_pred[0, 0])
    
    # Update the input sequence for the next prediction
    new_trend = 1 if next_pred > 0 else 0  # Update trend based on prediction
    next_input = np.array([[next_pred[0, 0], new_trend]]).reshape(1, 1, 2)
    X_input = np.append(X_input[:, 1:, :], next_input, axis=1)

# Inverse transform the forecasted values to original scale
forecast_diff = np.array(forecast_diff).reshape(-1, 1)
forecast_diff = scaler.inverse_transform(np.hstack([forecast_diff, np.zeros_like(forecast_diff)]))[:, 0]

# Revert differencing to get actual AQI forecast values
last_known_aqi = df_combined['AQI'].iloc[-1]
forecast = last_known_aqi + np.cumsum(forecast_diff)

# Print the forecasted AQI values for the next 8 hours
print("Forecasted AQI values for the next 8 hours:", forecast)

# Plot the forecasted AQI values
forecast_index = pd.date_range(start=df_recent.index[-1], periods=n_forecast+1, freq='H')[1:]
plt.plot(forecast_index, forecast, label='Forecasted AQI', color='red')
plt.title("AQI Forecast for Next 8 Hours")
plt.xlabel("Time")
plt.ylabel("AQI")
plt.legend()
plt.show()

In [2]:
import numpy as np
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional

# Fetch the most recent API data
data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
data = data[::-1]  # Reverse the data to maintain correct chronological order
df_recent = pd.DataFrame(data)
df_recent['time'] = pd.to_datetime(df_recent['time'])
df_recent.set_index('time', inplace=True)

# Load historical data
file_path = './data/history_data.csv'  # Change this to the correct file path
df_history = pd.read_csv(file_path)

# Combine recent and historical data
df_history['DateTime'] = pd.to_datetime(df_history['DateTime'])
df_history.set_index('DateTime', inplace=True)

# Only use the AQI column
df_combined = pd.concat([df_history[['AQI']], df_recent[['aqi_val']].rename(columns={'aqi_val': 'AQI'})])

# Convert AQI column to numeric
df_combined['AQI'] = pd.to_numeric(df_combined['AQI'], errors='coerce')

# Apply differencing to make the series stationary
df_combined['AQI_diff'] = df_combined['AQI'].diff().dropna()

# Create a trend feature (1 for increase, 0 for decrease)
df_combined['trend'] = np.where(df_combined['AQI_diff'] > 0, 1, 0)

# Scale the differenced AQI data and trend using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_aqi = scaler.fit_transform(df_combined[['AQI_diff', 'trend']].dropna())

# Create sequences for LSTM
def create_sequences(data, time_steps=24):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps][0])  # Predicting the AQI_diff
    return np.array(X), np.array(y)

# Set time steps as 24 (for 24-hour sequences)
time_steps = 24
X, y = create_sequences(scaled_aqi, time_steps)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()

# Adding Bidirectional LSTM layer with 64 units
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(time_steps, 2)))
model.add(Dropout(0.1))

# Adding another Bidirectional LSTM layer
model.add(Bidirectional(LSTM(64, return_sequences=False)))
model.add(Dropout(0.1))

# Adding Dense output layer for predicting AQI_diff
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model with more epochs
model.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Save the model as model.h5 (HDF5 format)
model.save('model.h5')

# Save the scaler for future use
import joblib
joblib.dump(scaler, 'scaler_model.pkl')

print("Model and Scaler saved successfully!")


Epoch 1/30


  super().__init__(**kwargs)


[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 27ms/step - loss: 1.3933e-04 - val_loss: 1.8183e-04
Epoch 2/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 3.2515e-05 - val_loss: 1.8191e-04
Epoch 3/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 3.0010e-05 - val_loss: 1.8547e-04
Epoch 4/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - loss: 2.9592e-05 - val_loss: 1.8671e-04
Epoch 5/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 2.8733e-05 - val_loss: 1.8234e-04
Epoch 6/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 2.8569e-05 - val_loss: 1.8289e-04
Epoch 7/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 2.8479e-05 - val_loss: 1.8079e-04
Epoch 8/30
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - loss: 3.040



Model and Scaler saved successfully!


In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model
import joblib
import requests

# Load the trained model and scaler
model = load_model('model.h5')
scaler = joblib.load('scaler_model.pkl')

# Input new data
new_data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
new_data = new_data[::-1]

# Convert the input data to a DataFrame
df_new = pd.DataFrame(new_data)
df_new['time'] = pd.to_datetime(df_new['time'])
df_new.set_index('time', inplace=True)

# Convert 'aqi_val' column to numeric
df_new['aqi_val'] = pd.to_numeric(df_new['aqi_val'])

# Create difference and trend columns
df_new['AQI_diff'] = df_new['aqi_val'].diff().dropna()
df_new['trend'] = np.where(df_new['AQI_diff'] > 0, 1, 0)

# Scale the input data using the saved scaler
scaled_input = scaler.transform(df_new[['AQI_diff', 'trend']].dropna())

# Prepare input for LSTM (handle smaller data cases)
time_steps = 24  # Ensure this matches the training setup

if len(scaled_input) < time_steps:
    # Pad the input with zeros if there are fewer than `time_steps` data points
    padding = np.zeros((time_steps - len(scaled_input), 2))
    X_input = np.vstack([padding, scaled_input])
else:
    # Use the last `time_steps` data points for prediction
    X_input = scaled_input[-time_steps:]

# Reshape the input to fit the model input shape
X_input = X_input.reshape(1, time_steps, 2)

# Forecast the next 8 hours
n_forecast = 8
forecast_diff = []
for i in range(n_forecast):
    next_pred = model.predict(X_input)
    forecast_diff.append(next_pred[0, 0])

    # Update the input sequence for the next prediction
    new_trend = 1 if next_pred > 0 else 0  # Update trend based on prediction
    next_input = np.array([[next_pred[0, 0], new_trend]]).reshape(1, 1, 2)
    X_input = np.append(X_input[:, 1:, :], next_input, axis=1)

# Revert scaling
forecast_diff = np.array(forecast_diff).reshape(-1, 1)
forecast_diff = scaler.inverse_transform(np.hstack([forecast_diff, np.zeros_like(forecast_diff)]))[:, 0]

# Revert differencing to get actual AQI forecast values
last_known_aqi = df_new['aqi_val'].iloc[-1]
forecast = last_known_aqi + np.cumsum(forecast_diff)

# Print the forecasted AQI values for the next 8 hours
print("Forecasted AQI values for the next 8 hours:", forecast)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Forecasted AQI values for the next 8 hours: [62.18741  62.42733  62.71162  63.003086 63.243217 63.395035 63.50635
 63.574474]


In [None]:
# Input new data
new_data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
new_data = new_data[::-1]

In [2]:
print(new_data)

[{'aqi_name': 'PM10', 'aqi_val': '52.74', 'time': '2024-09-17 01:38:33.943268'}, {'aqi_name': 'PM10', 'aqi_val': '53.05', 'time': '2024-09-17 02:18:41.744855'}, {'aqi_name': 'PM10', 'aqi_val': '53.34', 'time': '2024-09-17 03:19:04.697004'}, {'aqi_name': 'PM10', 'aqi_val': '53.9', 'time': '2024-09-17 04:19:27.582172'}, {'aqi_name': 'PM10', 'aqi_val': '54.58', 'time': '2024-09-17 05:19:50.580556'}, {'aqi_name': 'PM10', 'aqi_val': '55.1', 'time': '2024-09-17 06:00:06.157689'}, {'aqi_name': 'PM10', 'aqi_val': '55.99', 'time': '2024-09-17 07:00:29.402049'}, {'aqi_name': 'PM10', 'aqi_val': '57.18', 'time': '2024-09-17 08:00:52.227380'}, {'aqi_name': 'PM10', 'aqi_val': '58.55', 'time': '2024-09-17 09:01:15.922779'}, {'aqi_name': 'PM10', 'aqi_val': '59.99', 'time': '2024-09-17 10:01:39.000063'}, {'aqi_name': 'PM10', 'aqi_val': '60.98', 'time': '2024-09-17 11:02:02.099594'}, {'aqi_name': 'PM10', 'aqi_val': '61.75', 'time': '2024-09-17 12:02:25.180874'}, {'aqi_name': 'PM10', 'aqi_val': '62.51', 

In [3]:
df_new

Unnamed: 0_level_0,aqi_name,aqi_val,AQI_diff,trend
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-09-17 01:38:33.943268,PM10,52.74,,0
2024-09-17 02:18:41.744855,PM10,53.05,0.31,1
2024-09-17 03:19:04.697004,PM10,53.34,0.29,1
2024-09-17 04:19:27.582172,PM10,53.9,0.56,1
2024-09-17 05:19:50.580556,PM10,54.58,0.68,1
2024-09-17 06:00:06.157689,PM10,55.1,0.52,1
2024-09-17 07:00:29.402049,PM10,55.99,0.89,1
2024-09-17 08:00:52.227380,PM10,57.18,1.19,1
2024-09-17 09:01:15.922779,PM10,58.55,1.37,1
2024-09-17 10:01:39.000063,PM10,59.99,1.44,1


In [None]:
# Load the dataset (use your local file or API)
input_data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
input_data = input_data[::-1]  # Reverse the order of the data

In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

input_data = requests.get("https://blogcontent.site/projects/aqi24.php").json()
input_data = input_data[::-1]  # Reverse the order of the data

# Convert input data to DataFrame
input_df = pd.DataFrame(input_data)
# Explicitly convert the 'time' column to datetime format
input_df['time'] = pd.to_datetime(input_df['time'], errors='coerce')

# Convert 'aqi_val' to float
input_df['aqi_val'] = input_df['aqi_val'].astype(float)

# Filter out any rows with invalid 'time' values (e.g., NaT values)
input_df = input_df.dropna(subset=['time'])

# Continue with relevant columns
input_df = input_df[['time', 'aqi_val']]
input_df.columns = ['DateTime', 'PM10']


# Load additional historical data (if available)
historical_data = pd.read_csv('./data/history_data.csv')
combined_data = pd.concat([historical_data, input_df], ignore_index=True)

# combined_data = input_df.copy()  # Assuming input_df is the entire dataset for now
combined_data = combined_data.sort_values(by='DateTime')

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(combined_data['PM10'].values.reshape(-1, 1))

# Define a function to create a dataset with a sliding window approach
def create_dataset(dataset, time_step=8):
    X, Y = [], []
    for i in range(len(dataset) - time_step):
        X.append(dataset[i:i + time_step, 0])
        Y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(Y)

# Define time step
time_step = 8

# Create training and testing datasets by removing the last 8 time steps for testing
train_size = len(scaled_data) - 8
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size - time_step:]  # Last 8 steps are for testing

# Generate training dataset
X_train, y_train = create_dataset(train_data, time_step)

# Reshape for LSTM input [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

# Generate test dataset
X_test, y_test = create_dataset(test_data, time_step)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build LSTM Model with more units
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(time_step, 1)))  # Increased units
model.add(LSTM(64, return_sequences=False))  # Increased units
model.add(Dense(50))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5)

# Train the model with validation split
history = model.fit(X_train, y_train, batch_size=1, epochs=100, validation_split=0.2, callbacks=[early_stop])

# Predict using the model
predictions = model.predict(X_test)

# Inverse transform to original scale
predicted_values = scaler.inverse_transform(predictions)
actual_values = scaler.inverse_transform(y_test.reshape(-1, 1))

# Evaluation Metrics
mae = mean_absolute_error(actual_values, predicted_values)
mse = mean_squared_error(actual_values, predicted_values)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

# Plot the actual vs predicted values
plt.figure(figsize=(10, 6))
plt.plot(actual_values, label='Actual PM10')
plt.plot(predicted_values, label='Predicted PM10', linestyle='--')
plt.title('PM10 Prediction vs Actual')
plt.xlabel('Time steps')
plt.ylabel('PM10')
plt.legend()
plt.show()

# Optional: Plot loss during training to observe performance
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


TypeError: '<' not supported between instances of 'Timestamp' and 'str'