# Weather forecasting model using Long Short-Term Memory Networks (LSTMs)

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Dataset

In [None]:
!wget https://raw.githubusercontent.com/RobotGyal/Weather-Prediction/master/data/weatherHistory.csv
filepath = '/content/weatherHistory.csv'
data = pd.read_csv(filepath)
temp_data = data.iloc[:, [0,3]]  # Time and temperature columns
temp_data[:10]

--2024-08-03 17:40:38--  https://raw.githubusercontent.com/RobotGyal/Weather-Prediction/master/data/weatherHistory.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16294377 (16M) [text/plain]
Saving to: ‘weatherHistory.csv.13’


2024-08-03 17:40:38 (132 MB/s) - ‘weatherHistory.csv.13’ saved [16294377/16294377]



Unnamed: 0,Formatted Date,Temperature (C)
0,2006-04-01 00:00:00.000 +0200,9.472222
1,2006-04-01 01:00:00.000 +0200,9.355556
2,2006-04-01 02:00:00.000 +0200,9.377778
3,2006-04-01 03:00:00.000 +0200,8.288889
4,2006-04-01 04:00:00.000 +0200,8.755556
5,2006-04-01 05:00:00.000 +0200,9.222222
6,2006-04-01 06:00:00.000 +0200,7.733333
7,2006-04-01 07:00:00.000 +0200,8.772222
8,2006-04-01 08:00:00.000 +0200,10.822222
9,2006-04-01 09:00:00.000 +0200,13.772222


In [None]:
# Load and preprocess data
def load_and_preprocess_data(filepath, sequence_length):
    data = pd.read_csv(filepath)

    # Ensure columns match your data
    temp_data = data.iloc[:,3:4].values  # Temperature column

    # Normalize the temperature data
    scaler = MinMaxScaler()
    temp_data = scaler.fit_transform(temp_data)

    # Create sequences
    features, targets = [], []
    for i in range(len(temp_data) - sequence_length):
        features.append(temp_data[i:i + sequence_length])
        targets.append(temp_data[i + sequence_length])

    return np.array(features), np.array(targets), scaler


sequence_length = 10  # Number of time steps in each sequence
features, targets, scaler = load_and_preprocess_data(filepath, sequence_length)

# Split data
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, shuffle=False)
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = torch.utils.data.TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the LSTM model

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Train model

In [None]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_loss = 0
            for val_inputs, val_labels in val_loader:
                val_outputs = model(val_inputs)
                val_loss += criterion(val_outputs, val_labels).item()
            val_loss /= len(val_loader)

        if (epoch + 1) % 1 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')




input_size = 1
hidden_size = 50
num_layers = 1
output_size = 1

model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs=10
train_model(model, criterion, optimizer, train_loader, val_loader,num_epochs)

Epoch [1/10], Training Loss: 0.0009, Validation Loss: 0.0007
Epoch [2/10], Training Loss: 0.0010, Validation Loss: 0.0006
Epoch [3/10], Training Loss: 0.0020, Validation Loss: 0.0004
Epoch [4/10], Training Loss: 0.0008, Validation Loss: 0.0004
Epoch [5/10], Training Loss: 0.0004, Validation Loss: 0.0004
Epoch [6/10], Training Loss: 0.0003, Validation Loss: 0.0004
Epoch [7/10], Training Loss: 0.0005, Validation Loss: 0.0004
Epoch [8/10], Training Loss: 0.0003, Validation Loss: 0.0004
Epoch [9/10], Training Loss: 0.0005, Validation Loss: 0.0004
Epoch [10/10], Training Loss: 0.0012, Validation Loss: 0.0004


# Evaluate model

In [None]:
# Forecasting future values function
def forecast(model, initial_seq, scaler, sequence_length, future_steps):
    model.eval()
    forecasts = []
    current_seq = initial_seq

    for _ in range(future_steps):
        with torch.no_grad():
            input_seq = torch.tensor(current_seq, dtype=torch.float32).unsqueeze(0)
            prediction = model(input_seq).item()
            forecasts.append(prediction)
            # Update current_seq with the new prediction
            current_seq = np.roll(current_seq, -1)
            current_seq[-1] = prediction

    return scaler.inverse_transform(np.array(forecasts).reshape(-1, 1))

model.eval()
with torch.no_grad():
    y_pred = model(torch.tensor(X_test, dtype=torch.float32)).numpy()
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on test set: {mse:.4f}')

# Forecast future values
initial_seq = features[-1]  # Start with the last sequence from training data
future_steps = 10
forecasts = forecast(model, initial_seq, scaler, sequence_length, future_steps)
print(f'\n Forecasted future values: \n{forecasts}')

Mean Squared Error on test set: 0.0004

 Forecasted future values: 
[[20.53229707]
 [19.68614401]
 [18.9436137 ]
 [18.38037751]
 [18.01809527]
 [17.80747223]
 [17.69387868]
 [17.65323019]
 [17.65679908]
 [17.71437217]]
