<a href="https://colab.research.google.com/github/biswajitmohanty/genai/blob/main/Weather_Forecasting_with_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import datetime

In [21]:
# Load Mumbai weather dataset
data = pd.read_csv("weather.csv")  # Ensure correct file path
data['datetime'] = pd.to_datetime(data['datetime'])  # Convert to datetime format
data = data.sort_values(by='datetime')  # Ensure chronological order

# Selecting temperature column
data_temp = data[['datetime', 'temp']].set_index('datetime')

In [22]:
# Plot temperature trend using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_temp.index, y=data_temp['temp'], mode='lines', name="Temperature"))
fig.update_layout(title="Bangalore Temperature Over Time", xaxis_title="Date", yaxis_title="Temperature (°C)")
fig.show()

In [24]:
# Normalize the temperature data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data_temp)

In [25]:
# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i : i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

SEQ_LENGTH = 30  # Use past 30 days to predict next day
X, y = create_sequences(data_scaled, SEQ_LENGTH)

In [26]:
# Split into training and testing sets
train_size = int(len(X) * 0.8)  # 80% training, 20% testing
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Reshape data for LSTM model (Samples, Time Steps, Features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) #(samples, time steps, 1)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [27]:
# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, 1)),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for temperature prediction
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model with early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=16,
    validation_data=(X_test, y_test),
    callbacks=[early_stop]
)

Epoch 1/100



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.1061 - val_loss: 0.0129
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0185 - val_loss: 0.0100
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0127 - val_loss: 0.0091
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0124 - val_loss: 0.0088
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0106 - val_loss: 0.0086
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0079 - val_loss: 0.0087
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0088 - val_loss: 0.0080
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0085 - val_loss: 0.0079
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━

In [28]:
# Plot training loss using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'))
fig.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig.update_layout(title="Model Training Loss", xaxis_title="Epochs", yaxis_title="Loss")
fig.show()

In [29]:
# Predict on test data
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)  # Convert back to original scale

# Convert actual test values back to original scale
y_test_actual = scaler.inverse_transform(y_test)

# Plot actual vs predicted temperatures using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_test_actual.flatten(), mode='lines', name="Actual Temperature"))
fig.add_trace(go.Scatter(y=predictions.flatten(), mode='lines', name="Predicted Temperature", line=dict(dash='dot')))
fig.update_layout(title="Mumbai Temperature Prediction (LSTM)", xaxis_title="Days", yaxis_title="Temperature (°C)")
fig.show()

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step


In [30]:
# Function to predict future temperature with dates
def predict_future_temperature(last_seq, model, start_date, days=7):
    last_seq = np.array(last_seq).reshape(1, SEQ_LENGTH, 1)
    future_predictions = []
    future_dates = [start_date + datetime.timedelta(days=i) for i in range(1, days + 1)]

    for _ in range(days):
        next_temp = model.predict(last_seq)[0]
        future_predictions.append(next_temp)

        # Update sequence with new prediction
        last_seq = np.roll(last_seq, -1)
        last_seq[0, -1, 0] = next_temp  # Add new prediction to sequence

    return future_dates, scaler.inverse_transform(np.array(future_predictions))

In [31]:
# Get last 30 days of data
last_seq = data_scaled[-SEQ_LENGTH:]

# Predict next 7 days temperature
start_date = data_temp.index[-1]  # Last date in dataset
future_dates, future_temps = predict_future_temperature(last_seq, model, start_date, days=7)

# Convert predicted temperatures into a dataframe
future_df = pd.DataFrame({"Date": future_dates, "Predicted Temperature": future_temps.flatten()})

# Print predicted temperatures
print(future_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
        Date  Predicted Temperature
0 2025-03-12              24.579807
1 2025-03-13              24.478540
2 2025-03-14              24.452074
3 2025-03-15              24.457058
4 2025-03-16              24.478468
5 2025-03-17              24.508152
6 2025-03-18              24.540590


In [32]:
# Plot future predictions using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=future_dates, y=future_temps.flatten(), mode='markers+lines', name="Predicted Temperature", line=dict(dash='dot')))
fig.update_layout(title="Predicted Temperature for Next 7 Days", xaxis_title="Date", yaxis_title="Temperature (°C)")
fig.show()