## 全连接神经网络

In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np
import matplotlib.pyplot as plt



## 导入数据

In [33]:

dataset_numpy = np.loadtxt('./dataset/stock_2.csv', delimiter=",", dtype='str')

In [34]:
dataset_numpy[:5,:]

array([['index_code', 'date', 'open', 'close', 'low', 'high', 'volume',
        'money', 'change', 'label'],
       ['sh000001', '1990/12/20', '104.3', '104.39', '99.98', '104.39',
        '197000', '85000', '0.044108822', '109.13'],
       ['sh000001', '1990/12/21', '109.07', '109.13', '103.73', '109.13',
        '28000', '16100', '0.045406648', '114.55'],
       ['sh000001', '1990/12/24', '113.57', '114.55', '109.13', '114.55',
        '32000', '31100', '0.049665537', '120.25'],
       ['sh000001', '1990/12/25', '120.09', '120.25', '114.55', '120.25',
        '15000', '6500', '0.04975993', '125.27']], dtype='<U12')

In [35]:
import pandas as pd
dataset = pd.read_csv('./dataset/stock_2.csv',delimiter=',',dtype='str')
dataset.headd(5)

AttributeError: 'DataFrame' object has no attribute 'headd'

### 用pandas 导入的数据处理

In [36]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6109 entries, 0 to 6108
Data columns (total 10 columns):
index_code    6109 non-null object
date          6109 non-null object
open          6109 non-null object
close         6109 non-null object
low           6109 non-null object
high          6109 non-null object
volume        6109 non-null object
money         6109 non-null object
change        6109 non-null object
label         6109 non-null object
dtypes: object(10)
memory usage: 477.3+ KB


In [37]:
#pandas 转 numpy ,网络只能输入矩阵，不能输入pandas的数据类型
dataset_np = dataset.values
dataset_np[:5, :] #查看前面5行

array([['sh000001', '1990/12/20', '104.3', '104.39', '99.98', '104.39',
        '197000', '85000', '0.044108822', '109.13'],
       ['sh000001', '1990/12/21', '109.07', '109.13', '103.73', '109.13',
        '28000', '16100', '0.045406648', '114.55'],
       ['sh000001', '1990/12/24', '113.57', '114.55', '109.13', '114.55',
        '32000', '31100', '0.049665537', '120.25'],
       ['sh000001', '1990/12/25', '120.09', '120.25', '114.55', '120.25',
        '15000', '6500', '0.04975993', '125.27'],
       ['sh000001', '1990/12/26', '125.27', '125.27', '120.25', '125.27',
        '100000', '53700', '0.041746362', '125.28']], dtype=object)

### 去掉前面两个特征

In [38]:
dataset_np.shape

(6109, 10)

In [39]:
dataset_array = dataset_np[:, 2:] #去掉前面两列
dataset_array.shape

(6109, 8)

In [40]:
dataset_array[:5,:]

array([['104.3', '104.39', '99.98', '104.39', '197000', '85000',
        '0.044108822', '109.13'],
       ['109.07', '109.13', '103.73', '109.13', '28000', '16100',
        '0.045406648', '114.55'],
       ['113.57', '114.55', '109.13', '114.55', '32000', '31100',
        '0.049665537', '120.25'],
       ['120.09', '120.25', '114.55', '120.25', '15000', '6500',
        '0.04975993', '125.27'],
       ['125.27', '125.27', '120.25', '125.27', '100000', '53700',
        '0.041746362', '125.28']], dtype=object)

### 将object类型强转为float32类型

In [41]:
dataset_array = dataset_array.astype('float32')

In [42]:
dataset_array.dtype

dtype('float32')

In [43]:
dataset_array.shape

(6109, 8)

In [44]:
dataset_array[:5,:]

array([[1.0430000e+02, 1.0439000e+02, 9.9980003e+01, 1.0439000e+02,
        1.9700000e+05, 8.5000000e+04, 4.4108823e-02, 1.0913000e+02],
       [1.0907000e+02, 1.0913000e+02, 1.0373000e+02, 1.0913000e+02,
        2.8000000e+04, 1.6100000e+04, 4.5406647e-02, 1.1455000e+02],
       [1.1357000e+02, 1.1455000e+02, 1.0913000e+02, 1.1455000e+02,
        3.2000000e+04, 3.1100000e+04, 4.9665537e-02, 1.2025000e+02],
       [1.2009000e+02, 1.2025000e+02, 1.1455000e+02, 1.2025000e+02,
        1.5000000e+04, 6.5000000e+03, 4.9759932e-02, 1.2527000e+02],
       [1.2527000e+02, 1.2527000e+02, 1.2025000e+02, 1.2527000e+02,
        1.0000000e+05, 5.3700000e+04, 4.1746363e-02, 1.2528000e+02]],
      dtype=float32)

## 分割数据集

### 把x 和 y 给切出来

In [45]:
X = dataset_array[:, :-1]
y = dataset_array[:, :-1]

In [46]:
from sklearn.model_selection import train_test_split

X_train,y_train,X_test,y_test= train_test_split(X,y,test_size=0.2,random_state=12)

### 标准化

In [47]:
from sklearn.preprocessing import StandardScaler

standard = StandardScaler()
standard.fit(X_train)

X_train_standard = standard.transform(X_train)
X_test_standard = standard.transform(X_test)

In [48]:
print(X_train_standard.shape)
print(X_test_standard.shape)

(4887, 7)
(4887, 7)


## 搭建模型 CNN

In [55]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        #input[-1,7]
        self.hidden1 = nn.Linear(7,64)#64个神经元 [-1,7]*[7,64]=[-1,64]
        self.hidden2 = nn.Linear(64,32)#[-1,64]*[64,32]=[-1,32]
        self.hidden3 = nn.Linear(32,10)#[-1,32]*[]
        
        #output
        self.pred = nn.Linear(10,1)#[-1,10]*[10,1]=[-1,1]
        
    def forward(self,x):
        
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        out = self.pred(x)
        
        return out

In [56]:
Net()

Net(
  (hidden1): Linear(in_features=7, out_features=64, bias=True)
  (hidden2): Linear(in_features=64, out_features=32, bias=True)
  (hidden3): Linear(in_features=32, out_features=10, bias=True)
  (pred): Linear(in_features=10, out_features=1, bias=True)
)

In [58]:
net = Net()
net

Net(
  (hidden1): Linear(in_features=7, out_features=64, bias=True)
  (hidden2): Linear(in_features=64, out_features=32, bias=True)
  (hidden3): Linear(in_features=32, out_features=10, bias=True)
  (pred): Linear(in_features=10, out_features=1, bias=True)
)

### gradient descent

In [60]:
optimizer = torch.optim.SGD(net.parameters(),lr=0.01)
loss_function = nn.MSELoss()



In [None]:
EPOCH = 1000

for epoch in range(EPOCH):
    #将array to tensor
    input_x = Variable(torch.FloatTensor(X_train_standard))
    input_y = Variable(torch.FloatTensor(y_train))
    
    y_predit= net(input_x) #将输入数据 扔进 神经网络 去预测出一个值
    loss = loss_function(input_y,y_predit) #计算MSE
    optimizer.zero_gard() #梯度归零
    loss.backward() #反向传播，梯度下降
    optimizer.step() #找到新的梯度
    
    print('EPOCH:',epoch , "loss:",loss.data)
    
    