## 全连接神经网络

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np
import matplotlib.pyplot as plt

In [8]:
import os
import requests

DATASET_URL = 'https://raw.githubusercontent.com/ZhengDWu/NFSYSU-Wu-Zhan/master/dataset/stock_2.csv' # the url of the dataset
DATASET_PATH = './datasets' # save path

def fetch_housing_data(dataset_url=DATASET_URL, dataset_path=DATASET_PATH):
    ''' Downloads file from the url and save it as filename '''
    # check if folder already exists
    if not os.path.exists(dataset_path):
        os.makedirs(dataset_path)
    else:
        print("Folder exists")
    # check if file already exists
    dataset_file = os.path.join(dataset_path, "stock_2.csv")
    if not os.path.isfile(dataset_file):
        print('Downloading File')
        response = requests.get(dataset_url)
        # Check if the response is ok (200)
        if response.status_code == 200:
            # Open file and write the content
            with open(dataset_file,'wb') as file:
                # A chunk of 128 bytes
                for chunk in response:
                    file.write(chunk)
            print('Download finish...')
    else:
        print('File exists')

fetch_housing_data()

Folder exists
Downloading File
Download finish...


### 导入数据

In [3]:
import pandas as pd

dataset = pd.read_csv('./dataset/stock_2.csv', delimiter=',', dtype='str')

In [4]:
dataset.head(5)

Unnamed: 0,index_code,date,open,close,low,high,volume,money,change,label
0,sh000001,1990/12/20,104.3,104.39,99.98,104.39,197000,85000,0.044108822,109.13
1,sh000001,1990/12/21,109.07,109.13,103.73,109.13,28000,16100,0.045406648,114.55
2,sh000001,1990/12/24,113.57,114.55,109.13,114.55,32000,31100,0.049665537,120.25
3,sh000001,1990/12/25,120.09,120.25,114.55,120.25,15000,6500,0.04975993,125.27
4,sh000001,1990/12/26,125.27,125.27,120.25,125.27,100000,53700,0.041746362,125.28


## 第二种导入方式： numpy

In [6]:
dataset_numpy = np.loadtxt('./dataset/stock_2.csv', delimiter=",", dtype='str')

In [7]:
dataset_numpy[:5,:]

array([['index_code', 'date', 'open', 'close', 'low', 'high', 'volume',
        'money', 'change', 'label'],
       ['sh000001', '1990/12/20', '104.3', '104.39', '99.98', '104.39',
        '197000', '85000', '0.044108822', '109.13'],
       ['sh000001', '1990/12/21', '109.07', '109.13', '103.73', '109.13',
        '28000', '16100', '0.045406648', '114.55'],
       ['sh000001', '1990/12/24', '113.57', '114.55', '109.13', '114.55',
        '32000', '31100', '0.049665537', '120.25'],
       ['sh000001', '1990/12/25', '120.09', '120.25', '114.55', '120.25',
        '15000', '6500', '0.04975993', '125.27']], dtype='<U12')

### 用 pandas 导入的数据来处理

In [9]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6109 entries, 0 to 6108
Data columns (total 10 columns):
index_code    6109 non-null object
date          6109 non-null object
open          6109 non-null object
close         6109 non-null object
low           6109 non-null object
high          6109 non-null object
volume        6109 non-null object
money         6109 non-null object
change        6109 non-null object
label         6109 non-null object
dtypes: object(10)
memory usage: 477.3+ KB


### pandas 转 numpy，网络只能输入矩阵，不能输入 pandas 的数据类型

In [10]:
dataset_np = dataset.values

In [11]:
dataset_np[:5,:] # 查看前面 5 行

array([['sh000001', '1990/12/20', '104.3', '104.39', '99.98', '104.39',
        '197000', '85000', '0.044108822', '109.13'],
       ['sh000001', '1990/12/21', '109.07', '109.13', '103.73', '109.13',
        '28000', '16100', '0.045406648', '114.55'],
       ['sh000001', '1990/12/24', '113.57', '114.55', '109.13', '114.55',
        '32000', '31100', '0.049665537', '120.25'],
       ['sh000001', '1990/12/25', '120.09', '120.25', '114.55', '120.25',
        '15000', '6500', '0.04975993', '125.27'],
       ['sh000001', '1990/12/26', '125.27', '125.27', '120.25', '125.27',
        '100000', '53700', '0.041746362', '125.28']], dtype=object)

In [12]:
dataset_np.shape

(6109, 10)

In [13]:
dataset_np.dtype

dtype('O')

### 去掉 前面 两列

In [14]:
dataset_array = dataset_np[:,2:] 

In [15]:
dataset_array.shape

(6109, 8)

In [16]:
dataset_array[:5,:]

array([['104.3', '104.39', '99.98', '104.39', '197000', '85000',
        '0.044108822', '109.13'],
       ['109.07', '109.13', '103.73', '109.13', '28000', '16100',
        '0.045406648', '114.55'],
       ['113.57', '114.55', '109.13', '114.55', '32000', '31100',
        '0.049665537', '120.25'],
       ['120.09', '120.25', '114.55', '120.25', '15000', '6500',
        '0.04975993', '125.27'],
       ['125.27', '125.27', '120.25', '125.27', '100000', '53700',
        '0.041746362', '125.28']], dtype=object)

### 将 object 类型 强转 为 float32类型

In [17]:
dataset_array = dataset_array.astype('float32')

In [18]:
dataset_array.dtype

dtype('float32')

In [19]:
dataset_array.shape

(6109, 8)

In [20]:
dataset_array[:5,:]

array([[1.0430000e+02, 1.0439000e+02, 9.9980003e+01, 1.0439000e+02,
        1.9700000e+05, 8.5000000e+04, 4.4108823e-02, 1.0913000e+02],
       [1.0907000e+02, 1.0913000e+02, 1.0373000e+02, 1.0913000e+02,
        2.8000000e+04, 1.6100000e+04, 4.5406647e-02, 1.1455000e+02],
       [1.1357000e+02, 1.1455000e+02, 1.0913000e+02, 1.1455000e+02,
        3.2000000e+04, 3.1100000e+04, 4.9665537e-02, 1.2025000e+02],
       [1.2009000e+02, 1.2025000e+02, 1.1455000e+02, 1.2025000e+02,
        1.5000000e+04, 6.5000000e+03, 4.9759932e-02, 1.2527000e+02],
       [1.2527000e+02, 1.2527000e+02, 1.2025000e+02, 1.2527000e+02,
        1.0000000e+05, 5.3700000e+04, 4.1746363e-02, 1.2528000e+02]],
      dtype=float32)

### 把 X 和 y 给切出来

In [21]:
X = dataset_array[:, :-1]
y = dataset_array[:, -1:]

In [22]:
print(X.shape)
print(y.shape)

(6109, 7)
(6109, 1)


### split training set and testing set

In [24]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)

### 标准化

In [25]:
from sklearn.preprocessing import StandardScaler

standard = StandardScaler()
standard.fit(X_train)

X_train_standard = standard.transform(X_train)
X_test_standard = standard.transform(X_test)

In [26]:
print(X_train_standard.shape)
print(X_test_standard.shape)

(4887, 7)
(1222, 7)


In [27]:
print(y_train.shape)
print(y_test.shape)

(4887, 1)
(1222, 1)


### 搭建模型---全连接神经网络

In [28]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # input shape: [-1, 7]
        self.hidden1 = nn.Linear(7, 64) # [-1, 7] * [7, 64] = [-1, 64]
        self.hidden2 = nn.Linear(64, 32) # [-1, 64] * [64, 32] = [-1, 32]
        self.hidden3 = nn.Linear(32, 10) # [-1, 32] * [32, 10] = [-1, 10]
        
        # output layer
        self.pred = nn.Linear(10, 1) # [-1, 10] * [10, 1] = [-1, 1]
        
    def forward(self, x):
        
        x = F.relu(self.hidden1(x)) # F.relu 激活函数，做非线性变换
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        out = self.pred(x)
        
        return out

In [29]:
net = Net()
net

Net(
  (hidden1): Linear(in_features=7, out_features=64, bias=True)
  (hidden2): Linear(in_features=64, out_features=32, bias=True)
  (hidden3): Linear(in_features=32, out_features=10, bias=True)
  (pred): Linear(in_features=10, out_features=1, bias=True)
)

### gradient descent

In [30]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_func = nn.MSELoss()

### 训练神经网络

In [32]:
EPOCH = 1000

for epoch in range(EPOCH):
    # 将 array to tensor
    input_x = Variable(torch.FloatTensor(X_train_standard))
    input_y = Variable(torch.FloatTensor(y_train))
    
    y_predict = net(input_x) # 把输入数据 扔进 神经网络 去 预测出一个值
    
    loss = loss_func(input_y, y_predict) # 计算 MSE
    
    optimizer.zero_grad() # 梯度归 0
    loss.backward() # 反向传播，梯度下降
    optimizer.step() # 找到新的 梯度
    
    if (epoch+1) % 50 == 0:
        print('Epoch: ', (epoch+1), "Loss: ", loss.data.numpy())

Epoch:  50 Loss:  620170.5
Epoch:  100 Loss:  65354.62
Epoch:  150 Loss:  18175.773
Epoch:  200 Loss:  6535.317
Epoch:  250 Loss:  3406.4531
Epoch:  300 Loss:  2320.0774
Epoch:  350 Loss:  1811.5177
Epoch:  400 Loss:  1526.9849
Epoch:  450 Loss:  1345.066
Epoch:  500 Loss:  1222.3618
Epoch:  550 Loss:  1137.8502
Epoch:  600 Loss:  1079.0999
Epoch:  650 Loss:  1033.4211
Epoch:  700 Loss:  1004.6772
Epoch:  750 Loss:  986.0038
Epoch:  800 Loss:  971.4063
Epoch:  850 Loss:  959.3853
Epoch:  900 Loss:  948.88367
Epoch:  950 Loss:  939.2487
Epoch:  1000 Loss:  930.22296


### 用测试集预测结果