In [None]:
import pandas as pd
import numpy as np
from sklearnex import patch_sklearn, config_context
patch_sklearn()
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import intel_extension_for_tensorflow as itex
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

In [None]:
df = pd.read_csv('city_day.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['DayOfYear'] = df['Date'].dt.dayofyear
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

#['PM2.5','PM10', 'NO', 'NO2', 'NOx', 'CO', 'SO2', 'O3', 'Benzene','AQI']

In [None]:
df = df.drop(columns=['NH3', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket'], axis=1)

In [None]:
columns_to_fill = ['PM2.5','PM10', 'NO', 'NO2', 'NOx', 'CO', 'SO2', 'O3', 'Benzene']
df[columns_to_fill] = df[columns_to_fill].fillna(df[columns_to_fill].mean())

In [None]:
features = ['PM2.5','PM10', 'NO', 'NO2', 'NOx', 'CO', 'SO2', 'O3', 'Benzene']
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

In [None]:
df[['DayOfYear', 'Month', 'Year']] = scaler.fit_transform(df[['DayOfYear', 'Month', 'Year']])

In [None]:
cities = df['City'].unique()
cities_to_index = {city: index for index, city in enumerate(cities)}
df['CityIndex'] = df['City'].map(cities_to_index)
num_cities= len(cities)

In [None]:
def create_sequences(data, city_data, seq_length):
    X, y, X_city = [], [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length), :])
        y.append(data[i + seq_length, :len(features)])
        X_city.append(city_data[i + seq_length])
    return np.array(X), np.array(y), np.array(X_city)


In [None]:
seq_length = 30
X, y, X_city = create_sequences(df[features + ['DayOfYear', 'Month', 'Year']].values,
                                   df['CityIndex'].values,
                                   seq_length)

In [None]:
X_city = to_categorical(X_city, num_classes=num_cities)

In [None]:
X_train, X_test, y_train, y_test, X_city_train, X_city_test = train_test_split(X, y, X_city, test_size=0.2, random_state=42)

In [None]:
input_seq = Input(shape=(seq_length, X.shape[2]))
input_city = Input(shape=(num_cities,))

lstm_out = LSTM(64, activation='relu', return_sequences=True)(input_seq)
lstm_out = LSTM(32, activation='relu')(lstm_out)

concat = Concatenate()([lstm_out, input_city])
dense = Dense(32, activation='relu')(concat)
output = Dense(len(features))(dense)

model = Model(inputs=[input_seq, input_city], outputs=output)
model.compile(optimizer='adam', loss='mse')


In [None]:
itex_model = itex.optimize(model)
with config_context(target_offload="gpu:0"):
    history = itex_model.fit(
        [X_train, X_city_train], y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.1,
        verbose=1
    )

In [None]:
itex_model.save('my_model.h5')

In [None]:
scaler = StandardScaler()
df = pd.read_csv('city_day.csv')
cities = df['City'].unique()
cities_to_index = {city: index for index, city in enumerate(cities)}

def predict_future(model, last_sequence, future_date, city):
    input_seq = last_sequence.copy()
    future_day = future_date.dayofyear
    future_month = future_date.month
    future_year = future_date.year
    scaled_future_date = scaler.transform([[future_day, future_month, future_year]])[0]
    input_seq[-1, -3:] = scaled_future_date
    city_index = cities_to_index[city]
    city_one_hot = to_categorical([city_index], num_classes=num_cities)
    prediction = model.predict([input_seq.reshape(1, seq_length, -1), city_one_hot])
    return scaler.inverse_transform(prediction)[0]

In [None]:
features = ['PM2.5','PM10', 'NO', 'NO2', 'NOx', 'CO', 'SO2', 'O3', 'Benzene']
seq_length = 30

last_sequence = df[features + ['DayOfYear', 'Month', 'Year']].values[-seq_length:]

In [None]:
future_date = pd.Timestamp('2025-10-05')
city = 'Chennai'

In [None]:
from tensorflow.keras.models import load_model

loaded_model = load_model('my_model.h5')

In [None]:
future_prediction = predict_future(loaded_model, last_sequence, future_date, city)

In [None]:
result_df = pd.DataFrame([future_prediction], columns=features, index=[future_date])
print(f"Predicted values for {future_date.date()} at Station ID {city}:")
print(result_df)