In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv(r"./Aleppo2017_processed.csv",encoding='utf-8')
print(df.head())

    id                 time     gl
0  183  2015-05-16 05:35:41  162.0
1  183  2015-05-16 05:30:41  164.0
2  183  2015-05-16 05:25:41  168.0
3  183  2015-05-16 05:20:41  169.0
4  183  2015-05-16 05:15:41  170.0


In [2]:
ID = df['id'].value_counts().index
print(ID)

Index([263,  77, 193, 277, 229, 245, 155, 251, 111, 164,
       ...
       128, 264, 186, 162, 249,  39,  52, 266, 223, 289],
      dtype='int64', name='id', length=226)


In [10]:
all_data = []
for i in ID[:100]:
    data = df[df['id']==i]
    data = data.sort_values(by='time')   
    data['target'] = data['gl'].shift(-1)
    
    # 使用了shift函数，在最后必然是有缺失值的，这里去掉缺失值所在行
    data.dropna()                     
    data = data[['gl','target']].astype(np.float32)  # 修改数据类型
    data = data.gl.values.astype(float)
    all_data.append(data)
    break

In [11]:
import numpy as np
all_data = np.array(all_data)
print(all_data.shape)
# 将2000条数据的最后24条用作测试集
test_data_size = 100
train_data = all_data[:,:-test_data_size]
test_data = all_data[:,-test_data_size:]

(1, 94680)


In [13]:
print(train_data.shape)
print(test_data.shape)

(1, 94580)
(1, 100)


In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))
print(train_data_normalized[:5])
print(train_data_normalized[-5:])
print(train_data_normalized.shape)

[[-0.72375691]
 [-0.73480663]
 [-0.75138122]
 [-0.75690608]
 [-0.76243094]]
[[-0.3480663 ]
 [-0.35911602]
 [-0.36464088]
 [-0.37016575]
 [-0.38674033]]
(94580, 1)


In [15]:
import torch
import torch.nn as nn
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)

In [16]:
train_data_normalized.shape

torch.Size([94580])

In [17]:
train_window = 24
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [18]:
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

In [19]:
print(len(train_inout_seq))
train_inout_seq[:5]

94556


[(tensor([-0.7238, -0.7348, -0.7514, -0.7569, -0.7624, -0.7680, -0.7680, -0.7680,
          -0.7680, -0.7624, -0.7569, -0.7624, -0.7680, -0.7735, -0.7680, -0.7680,
          -0.7680, -0.7680, -0.7680, -0.7735, -0.7790, -0.7845, -0.7790, -0.7790]),
  tensor([-0.7790])),
 (tensor([-0.7348, -0.7514, -0.7569, -0.7624, -0.7680, -0.7680, -0.7680, -0.7680,
          -0.7624, -0.7569, -0.7624, -0.7680, -0.7735, -0.7680, -0.7680, -0.7680,
          -0.7680, -0.7680, -0.7735, -0.7790, -0.7845, -0.7790, -0.7790, -0.7790]),
  tensor([-0.7735])),
 (tensor([-0.7514, -0.7569, -0.7624, -0.7680, -0.7680, -0.7680, -0.7680, -0.7624,
          -0.7569, -0.7624, -0.7680, -0.7735, -0.7680, -0.7680, -0.7680, -0.7680,
          -0.7680, -0.7735, -0.7790, -0.7845, -0.7790, -0.7790, -0.7790, -0.7735]),
  tensor([-0.7735])),
 (tensor([-0.7569, -0.7624, -0.7680, -0.7680, -0.7680, -0.7680, -0.7624, -0.7569,
          -0.7624, -0.7680, -0.7735, -0.7680, -0.7680, -0.7680, -0.7680, -0.7680,
          -0.7735, -0.7790

In [21]:
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=64, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [22]:
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model.add_module('linear',nn.Linear(64,1))

In [23]:
print(model)

LSTM(
  (lstm): LSTM(1, 64)
  (linear): Linear(in_features=64, out_features=1, bias=True)
)


In [24]:
for seq, labels in train_inout_seq:
    print(seq.shape)
    print(labels.shape)
    break

torch.Size([24])
torch.Size([1])


In [None]:
epochs = 150

for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))
        
        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.00019356
epoch:  26 loss: 0.00021001
epoch:  51 loss: 0.05988961
epoch:  76 loss: 0.01351678
