In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Data Preparation

### About Dataset
[Weather in Szeged 2006-2016](https://www.kaggle.com/datasets/budincsevity/szeged-weather)
<br>2006-04-01 00:00 ~ 2016-09-09 23:00

- Context <br>
    This is a dataset for a larger project I have been working on. My idea is to analyze and compare real historical weather with weather folklore.

- Content <br>
    The CSV file includes a hourly/daily summary for Szeged, Hungary area, between 2006 and 2016.

    Data available in the hourly response:
    - time
    - summary
    - precipType
    - temperature
    - apparentTemperature
    - humidity
    - windSpeed
    - windBearing
    - visibility
    - loudCover
    - pressure

In [17]:
weather = pd.read_csv('weatherHistory.csv')
weather = weather[['Formatted Date', 'Temperature (C)', 'Humidity', 'Wind Speed (km/h)']]

print(weather.shape)
weather.head(5)

(96453, 4)


Unnamed: 0,Formatted Date,Temperature (C),Humidity,Wind Speed (km/h)
0,2006-04-01 00:00:00.000 +0200,9.472222,0.89,14.1197
1,2006-04-01 01:00:00.000 +0200,9.355556,0.86,14.2646
2,2006-04-01 02:00:00.000 +0200,9.377778,0.89,3.9284
3,2006-04-01 03:00:00.000 +0200,8.288889,0.83,14.1036
4,2006-04-01 04:00:00.000 +0200,8.755556,0.83,11.0446


In [18]:
# 轉換'Formatted Date' (刪除時區)
weather['Formatted Date'] = pd.to_datetime(weather['Formatted Date'].str.slice(0, -6))

# 這裡修改列名並將修改應用到 DataFrame
weather = weather.rename(columns={'Formatted Date': 'Date'})

weather.head()

Unnamed: 0,Date,Temperature (C),Humidity,Wind Speed (km/h)
0,2006-04-01 00:00:00,9.472222,0.89,14.1197
1,2006-04-01 01:00:00,9.355556,0.86,14.2646
2,2006-04-01 02:00:00,9.377778,0.89,3.9284
3,2006-04-01 03:00:00,8.288889,0.83,14.1036
4,2006-04-01 04:00:00,8.755556,0.83,11.0446


## LSTM
[pytorch LSTM多变量预测温度](https://blog.51cto.com/u_16213349/7157643)

In [16]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, input):
        hidden = (torch.zeros(1, 1, self.hidden_size),
                  torch.zeros(1, 1, self.hidden_size))
        # '_'代表新的隱藏狀態，但不需要用到，所以忽略
        output, _ = self.lstm(input.view(len(input), 1, -1), hidden)
        output = self.fc(output.view(len(input), -1))
        return output