In [1]:
import torch
import torch.nn as nn

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df=pd.read_csv('housing.csv')
print(df.shape)
print(df.head()) #默认打印前五行

(21597, 3)
         date     price  yr_built
0   3/12/2015  530000.0      1900
1  11/21/2014  740500.0      1900
2   8/18/2014  625000.0      1900
3   12/4/2014  595000.0      1900
4  12/19/2014  485000.0      1900


In [3]:
all_data = df['price'].values.astype(float) #取值后 将类型改为float
print(all_data[:10])

[530000. 740500. 625000. 595000. 485000. 565000. 352950. 440000. 712000.
 490000.]


In [4]:
#将后面的1597条作为测试，前面的2w条作为训练
test_data_size = 1597

train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]

In [5]:
from sklearn.preprocessing import MinMaxScaler
#将训练集进行归一化
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))
print(train_data_normalized[:5])
print(train_data_normalized[-5:])

[[-0.88139596]
 [-0.82616111]
 [-0.85646812]
 [-0.86434007]
 [-0.89320388]]
[[-0.60062976]
 [-0.14143269]
 [-0.82366833]
 [-0.54815009]
 [-0.88926791]]


In [6]:
#将训练样本进行转换成tensor
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
print(train_data_normalized.shape)

torch.Size([20000])


In [7]:
#选择窗口大小
train_window = 15 #自己设置 用前15个预测第16个
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq,train_label))
    return inout_seq

train_inout_seq = create_inout_sequences(train_data_normalized, train_window)
print(train_inout_seq[:2])

[(tensor([-0.8814, -0.8262, -0.8565, -0.8643, -0.8932, -0.8722, -0.9279, -0.9050,
        -0.8336, -0.8919, -0.7528, -0.9089, -0.8895, -0.8987, -0.9076]), tensor([-0.8407])), (tensor([-0.8262, -0.8565, -0.8643, -0.8932, -0.8722, -0.9279, -0.9050, -0.8336,
        -0.8919, -0.7528, -0.9089, -0.8895, -0.8987, -0.9076, -0.8407]), tensor([-0.8213]))]


In [8]:
from torch.utils.data import DataLoader

In [9]:
train_loader = DataLoader(dataset=train_inout_seq, 
                          batch_size=64, #批量大小
                          shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2842513bdc8>

In [10]:
class RNN(nn.Module):
    def __init__(self, input_size=15, hidden_layer_size=64, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size #看一下隐层结果

        self.rnn = nn.RNN(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        
#         self.batch_size = batch_size
                            

    def forward(self, input_seq):
        self.hidden_cell = torch.zeros(1,input_seq.shape[0],self.hidden_layer_size) #1：层数，1 64 64 hidden cell
        rnn_out, self.hidden_cell = self.rnn(input_seq.view(-1,input_seq.shape[0],input_seq.shape[1]), self.hidden_cell)
        predictions = self.linear(rnn_out.view(len(input_seq), -1))
        return predictions[-1]
model = RNN()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

RNN(
  (rnn): RNN(15, 64)
  (linear): Linear(in_features=64, out_features=1, bias=True)
)


In [11]:
#训练模型
epochs = 10

for i in range(epochs):
    for seq, labels in train_loader:
#         print(seq.shape,len(labels))
        optimizer.zero_grad()
        model.hidden_cell = torch.zeros(1, seq.shape[0], model.hidden_layer_size)

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%2 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


epoch:   1 loss: 0.00725872
epoch:   3 loss: 0.00607895
epoch:   5 loss: 0.00403136
epoch:   7 loss: 0.00572256
epoch:   9 loss: 0.00407876


In [12]:
fut_pred = test_data_size

test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)

[-0.9181317090988159, -0.8367882370948792, -0.8979270458221436, -0.9057990312576294, -0.7790606021881104, -0.8472973108291626, -0.882707953453064, -0.8813959360122681, -0.9194437265396118, -0.9242324829101562, -0.6006297469139099, -0.14143268764019012, -0.8236683011054993, -0.5481500625610352, -0.8892679214477539]


In [13]:
model.eval()

for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    seq = seq.view(-1,len(seq))
#     print(seq.shape)
    with torch.no_grad():
        model.hidden = torch.zeros(1, seq.shape[0], model.hidden_layer_size) # (num_layers, batch_size, hidden_size)
                        
        test_inputs.append(model(seq).item())
#         print(test_inputs)
#         print('====================================')


In [14]:
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
actual_predictions

array([[712458.02408457],
       [732146.99077606],
       [721866.03236198],
       ...,
       [598638.09227943],
       [598638.09227943],
       [598638.09227943]])