In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
data = pd.read_csv('tmax.csv')

data['date'] = pd.to_datetime(data['date'])

scaler = MinMaxScaler(feature_range=(0, 1))
data['tmax'] = scaler.fit_transform(data[['tmax']])

train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

In [3]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.hidden_size = 64
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size, num_layers=2, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

In [4]:
def prepare_data(data, sequence_length):
    sequences = []
    targets = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i+sequence_length][['tmax']].values
        target = data.iloc[i+sequence_length]['tmax']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

sequence_length = 10
X_train, y_train = prepare_data(train_data, sequence_length)
X_test, y_test = prepare_data(test_data, sequence_length)

X_train = torch.Tensor(X_train)
y_train = torch.Tensor(y_train).unsqueeze(-1)
X_test = torch.Tensor(X_test)
y_test = torch.Tensor(y_test)

X_train.shape,y_train.shape

(torch.Size([6659, 10, 1]), torch.Size([6659, 1]))

In [5]:
model = Model()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0559
Epoch [20/100], Loss: 0.0313
Epoch [30/100], Loss: 0.0253
Epoch [40/100], Loss: 0.0215
Epoch [50/100], Loss: 0.0205
Epoch [60/100], Loss: 0.0187
Epoch [70/100], Loss: 0.0158
Epoch [80/100], Loss: 0.0109
Epoch [90/100], Loss: 0.0078
Epoch [100/100], Loss: 0.0077


In [7]:
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    test_loss = criterion(outputs, y_test)
    print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 0.0482


  return F.mse_loss(input, target, reduction=self.reduction)


In [11]:
def predict_temperature(model, date, sequence_length, scaler, weather_data):
    sequence_dates = pd.date_range(start=date - pd.Timedelta(days=sequence_length), end=date - pd.Timedelta(days=1))
    
    sequence_data = weather_data.loc[weather_data['date'].isin(sequence_dates), 'tmax'].values
    
    scaled_sequence = scaler.transform(sequence_data.reshape(-1, 1))
    
    sequence_tensor = torch.Tensor(scaled_sequence).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(sequence_tensor)
        prediction = scaler.inverse_transform(prediction).item()
    
    return prediction

In [16]:
input_date = pd.to_datetime('2022-04-10')
predicted_temperature = predict_temperature(model, input_date, 10, scaler, data)
print(f'Predicted temperature for {input_date}: {predicted_temperature:.2f}')

Predicted temperature for 2022-04-10 00:00:00: 5.87


