In [1]:
import torch
import torch.nn as nn
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df=pd.read_csv('housing.csv')
print(df.shape)
print(df.head())

(21597, 3)
         date     price  yr_built
0   3/12/2015  530000.0      1900
1  11/21/2014  740500.0      1900
2   8/18/2014  625000.0      1900
3   12/4/2014  595000.0      1900
4  12/19/2014  485000.0      1900


In [3]:
all_data = df['price'].values.astype(float)
print(all_data[:10])

[530000. 740500. 625000. 595000. 485000. 565000. 352950. 440000. 712000.
 490000.]


In [4]:
#将后面的1597条作为测试，前面的2w条作为训练
test_data_size = 1597

train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]

In [18]:
from sklearn.preprocessing import MinMaxScaler

#将训练集进行归一化
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))
print(train_data_normalized[:5])
print(train_data_normalized[-5:])

[[-0.88139596]
 [-0.82616111]
 [-0.85646812]
 [-0.86434007]
 [-0.89320388]]
[[-0.60062976]
 [-0.14143269]
 [-0.82366833]
 [-0.54815009]
 [-0.88926791]]


In [19]:
#将训练样本进行转换成tensor
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
print(train_data_normalized.shape)

torch.Size([20000])


In [20]:
#选择窗口大小
train_window = 15 #自己设置
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_seq = train_seq.unsqueeze(0)
        #print(train_seq.shape)
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq,train_label))
    return inout_seq

train_inout_seq = create_inout_sequences(train_data_normalized, train_window)
print(train_inout_seq[:2])

[(tensor([[-0.8814, -0.8262, -0.8565, -0.8643, -0.8932, -0.8722, -0.9279, -0.9050,
         -0.8336, -0.8919, -0.7528, -0.9089, -0.8895, -0.8987, -0.9076]]), tensor([-0.8407])), (tensor([[-0.8262, -0.8565, -0.8643, -0.8932, -0.8722, -0.9279, -0.9050, -0.8336,
         -0.8919, -0.7528, -0.9089, -0.8895, -0.8987, -0.9076, -0.8407]]), tensor([-0.8213]))]


In [21]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=train_inout_seq, 
                          batch_size=64, 
                          shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1b6deb3a848>

In [22]:
help(nn.Sequential)

Help on class Sequential in module torch.nn.modules.container:

class Sequential(torch.nn.modules.module.Module)
 |  Sequential(*args)
 |  
 |  A sequential container.
 |  Modules will be added to it in the order they are passed in the constructor.
 |  Alternatively, an ordered dict of modules can also be passed in.
 |  
 |  To make it easier to understand, here is a small example::
 |  
 |      # Example of using Sequential
 |      model = nn.Sequential(
 |                nn.Conv2d(1,20,5),
 |                nn.ReLU(),
 |                nn.Conv2d(20,64,5),
 |                nn.ReLU()
 |              )
 |  
 |      # Example of using Sequential with OrderedDict
 |      model = nn.Sequential(OrderedDict([
 |                ('conv1', nn.Conv2d(1,20,5)),
 |                ('relu1', nn.ReLU()),
 |                ('conv2', nn.Conv2d(20,64,5)),
 |                ('relu2', nn.ReLU())
 |              ]))
 |  
 |  Method resolution order:
 |      Sequential
 |      torch.nn.modules.module.Modul




In [23]:
#定义模型
class CNN_Series(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Sequential(
            nn.Conv1d(
                in_channels=1,
                out_channels=64,
                kernel_size=3,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        self.conv2=nn.Sequential(
            nn.Conv1d(
                in_channels=64,
                out_channels=32,
                kernel_size=3,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
#         print(self.conv2.shape)
        self.fc=nn.Linear(64,1)
        
    def forward(self,indata):
        x=self.conv1(indata)
        x=self.conv2(x)
        x=x.view(x.size(0),-1)
        out=self.fc(x)
        return out

In [24]:
# 定义损失函数，优化方法
# import torch.optim as optim

model = CNN_Series()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

CNN_Series(
  (conv1): Sequential(
    (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv1d(64, 32, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=64, out_features=1, bias=True)
)


In [25]:
epochs = 10

for i in range(epochs):
    for seq, labels in train_loader:
        # print(seq.shape,len(labels))
        optimizer.zero_grad()
        # model.hidden_cell = torch.zeros(1, seq.shape[0], model.hidden_layer_size)

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%2 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

epoch:   1 loss: 0.00502253
epoch:   3 loss: 0.00467997
epoch:   5 loss: 0.00356072
epoch:   7 loss: 0.00435790
epoch:   9 loss: 0.00415384


In [26]:
fut_pred = test_data_size
print(fut_pred)
test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)

1597
[-0.9181317090988159, -0.8367882370948792, -0.8979270458221436, -0.9057990312576294, -0.7790606021881104, -0.8472973108291626, -0.882707953453064, -0.8813959360122681, -0.9194437265396118, -0.9242324829101562, -0.6006297469139099, -0.14143268764019012, -0.8236683011054993, -0.5481500625610352, -0.8892679214477539]


In [27]:
model.eval()

for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    seq = seq.unsqueeze(0)
    seq = seq.unsqueeze(0)
    # print(seq.shape)
    with torch.no_grad():
    # model.hidden = torch.zeros(1, seq.shape[0], model.hidden_layer_size) # (num_layers, batch_size, hidden_size)
                        
        test_inputs.append(model(seq).item())
    # print(test_inputs)
    # print('====================================')


In [28]:
len(test_inputs)

1612

In [29]:
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
actual_predictions

array([[737125.28252602],
       [729104.72661257],
       [805464.58035707],
       ...,
       [821175.86559057],
       [821175.86559057],
       [821175.86559057]])

In [17]:
actual_predictions.shape

(1597, 1)