In [349]:
import os
import random
import time
import pandas as pd
import numpy as np
import mxnet as mx
from mxnet import autograd, gluon, image, init, nd
from mxnet.gluon import data as gdata, loss as gloss, nn, utils as gutils

In [395]:
labelnames = 'ret_1|ret_1_alpha|ret_1_label|ret_1_weight|ret_2|ret_2_alpha|ret_2_label|ret_2_weight|ret_3|ret_3_alpha|ret_3_label|ret_3_weight|ret_4|ret_4_alpha|ret_4_label|ret_4_weight|ret_5|ret_5_alpha|ret_5_label|ret_5_weight'
label = 'ret_1_label'
weight = 'ret_1_weight'
labelnames = labelnames.split("|")
start_train = '20181227'
end_train = '20181231'
start_test = '20190104'
end_test = '20190107'
datadir = '~\\Documents\\NewData\\data4nn_pv\\data4nn_pv_{YYYYMMDD}.h5'
batch_size = 512
lr = 0.05
num_epochs = 5
ctx = mx.cpu()

In [363]:
def data_iter(batch_size, features, labels):
    """Iterate through a data set."""
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = nd.array(indices[i: min(i + batch_size, num_examples)])
        yield features.take(j, axis=0), labels.take(j, axis=0)

In [364]:
def load_data(startDate, endDate, labelnames, label, weight, datadir):
    """read the data and then load into memory."""
    path = os.path.expanduser(datadir)
    date_range = pd.date_range(startDate, endDate, closed='left')
    dfs = []
    for date in date_range:
        file = path.format(YYYYMMDD=date.strftime("%Y%m%d"))
        if os.path.exists(file):
            df = pd.read_hdf(file)
            dfs.append(df)
    rawdata = pd.concat(dfs)
    labels = rawdata[label]
    weight = rawdata[weight]
    features = rawdata.drop(labels=labelnames, axis=1)
    labels = nd.array(labels)
    weight = nd.array(weight)
    features = nd.array(features)
    return features, labels, weight

In [396]:
features_train, labels_train, weight_train = load_data(startDate=start_train, endDate=end_train, labelnames=labelnames, label=label, weight=weight, datadir=datadir)
features_test, labels_test, weight_test = load_data(startDate=start_test, endDate=end_test, labelnames=labelnames, label=label, weight=weight, datadir=datadir)

In [366]:
features_train = features_train.reshape((-1, 6, 60))
features_test = features_test.reshape((-1, 6, 60))
#features = features.swapaxes(2, 3)

In [370]:
class CovNet(nn.Block): 
    def __init__(self, channels, kernel_size, strides=1, **kwargs):
        super(CovNet, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.BatchNorm(),
                     nn.Conv1D(channels, kernel_size, padding=1, strides=strides),
                     nn.BatchNorm(), 
                     nn.Activation('relu'),
                     nn.MaxPool1D(pool_size=3, strides=1, padding=1))
    def forward(self, X):
        Y = self.net(X)
        return Y

In [371]:
class WideNet(nn.Block):
    # c1 - c4为每条线路里全连接层节点个数
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(WideNet, self).__init__(**kwargs)
        
        # 线路1，512 x 1全连接层
        self.p1 = nn.Sequential()
        self.p1.add(nn.Dense(c1[0], activation='relu'),
                    nn.Dense(c1[1], activation='relu'))
        
        # 线路2，256 x 2全连接层
        self.p2 = nn.Sequential()
        self.p2.add(nn.Dense(c2[0], activation='relu'),
                    nn.Dense(c2[1], activation='relu'))
        
        # 线路3，128 x 3全连接层
        self.p3 = nn.Sequential()
        self.p3.add(nn.Dense(c3[0], activation='relu'),
                    nn.Dense(c3[1], activation='relu'))

        # 线路4，64 x 4全连接层
        self.p4 = nn.Sequential()
        self.p4.add(nn.Dense(c4[0], activation='relu'),
                    nn.Dense(c4[1], activation='relu'))
        
    def forward(self, x):
        p1 = self.p1(x)
        p2 = self.p2(x)
        p3 = self.p3(x)
        p4 = self.p4(x)
        p = nd.concat(p1, p2, p3, p4, dim=1) 
#         print(self.p1.name, 'output shape:\t', p1.shape)
#         print(self.p2.name, 'output shape:\t', p2.shape)
#         print(self.p3.name, 'output shape:\t', p3.shape)
#         print(self.p4.name, 'output shape:\t', p4.shape)
#         print('p output shape:\t', p.shape)
        return p

In [386]:
net = nn.Sequential()
net.add(CovNet(16,3),
        CovNet(32,3),
        nn.Flatten(),
        WideNet(c1=[512,1], c2=[256,2], c3=[128,3], c4=[64,4]),
        nn.Dense(2, activation='sigmoid'),
       )
net.initialize(force_reinit=True, init=init.Xavier())
print(net.collect_params())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})

sequential116_ (
  Parameter batchnorm66_gamma (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm66_beta (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm66_running_mean (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm66_running_var (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter conv40_weight (shape=(16, 0, 3), dtype=<class 'numpy.float32'>)
  Parameter conv40_bias (shape=(16,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm67_gamma (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm67_beta (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm67_running_mean (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm67_running_var (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm68_gamma (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm68_beta (shape=(0,), dtype=<class 'numpy.float32'>)
  Parameter batchnorm68_running_mean (shape=(0,), dtype=<class 'numpy.float

In [373]:
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
    """Evaluate accuracy of a model on the given data set."""
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc_sum, n = nd.array([0]), 0
    for batch in data_iter:
        features, labels, _ = _get_batch(batch, ctx)
        for X, y in zip(features, labels):
            y = y.astype('float32')
            acc_sum += (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
            n += y.size
        acc_sum.wait_to_read()
    return acc_sum.asscalar() / n

In [374]:
def train_ms(net, train_iter, test_iter, batch_size, trainer, num_epochs, ctx=mx.cpu()):
    """Train and evaluate a model with CPU or GPU."""
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [375]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})

In [376]:
train_iter = data_iter(batch_size=batch_size, features=features_train, labels=labels_train)
test_iter = data_iter(batch_size=batch_size, features=features_test, labels=labels_test)

In [397]:
train_ms(net, train_iter, test_iter, batch_size, trainer, num_epochs, ctx=ctx)

KeyboardInterrupt: 

In [292]:
X = feature
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

Conv1D(6 -> 16, kernel_size=(3,), stride=(1,), padding=(1,))
covnet23 output shape:	 (3, 16, 60)
Conv1D(16 -> 32, kernel_size=(3,), stride=(1,), padding=(1,))
covnet24 output shape:	 (3, 32, 60)


In [124]:
features_back[['open_1','high_1', 'low_1', 'avg_1', 'close_1', 'volume_1' ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,open_1,high_1,low_1,avg_1,close_1,volume_1
dates,instruments,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-11-05,000001.SZ,11.04,11.16,10.83,11.0224,11.09,2212922.87
2018-11-05,000002.SZ,25.20,25.25,24.00,24.5443,24.62,652339.22
2018-11-05,000004.SZ,15.50,15.94,15.39,15.5786,15.76,12594.87
2018-11-05,000005.SZ,2.73,2.76,2.73,2.7462,2.76,47221.45
2018-11-05,000006.SZ,5.10,5.15,5.06,5.1127,5.14,143410.08
2018-11-05,000007.SZ,7.39,7.50,7.13,7.2262,7.17,234180.19
2018-11-05,000008.SZ,4.37,4.43,4.31,4.3564,4.37,405368.64
2018-11-05,000009.SZ,4.10,4.15,4.09,4.1274,4.14,148234.88
2018-11-05,000010.SZ,4.36,4.39,4.30,4.3374,4.38,39959.00
2018-11-05,000011.SZ,9.00,9.06,8.90,8.9913,9.06,46312.51


In [113]:
features_back

Unnamed: 0_level_0,Unnamed: 1_level_0,open_0,open_1,open_2,open_3,open_4,open_5,open_6,open_7,open_8,open_9,...,volume_50,volume_51,volume_52,volume_53,volume_54,volume_55,volume_56,volume_57,volume_58,volume_59
dates,instruments,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2018-11-05,000001.SZ,10.95,11.04,10.99,10.95,10.78,11.20,11.29,10.80,10.90,11.20,...,649476.56,638272.55,670442.85,430240.36,818491.12,424168.36,806783.77,578953.16,690423.19,554010.08
2018-11-05,000002.SZ,24.48,25.20,24.90,23.68,23.31,24.28,23.71,22.18,22.23,22.88,...,424160.44,490384.44,472553.50,486784.81,489494.76,1201163.03,896200.17,410720.18,451653.09,315702.05
2018-11-05,000004.SZ,16.00,15.50,15.63,15.50,15.49,17.49,15.90,15.90,15.90,15.90,...,8149.00,15696.24,5622.00,5364.00,8594.46,17253.09,46074.31,50710.92,15929.09,19858.91
2018-11-05,000005.SZ,2.76,2.73,2.74,2.69,2.66,2.72,2.75,2.70,2.76,2.86,...,39300.00,55993.82,69249.00,101070.00,52695.27,56844.76,73136.04,99062.24,73806.24,87308.51
2018-11-05,000006.SZ,5.12,5.10,5.13,5.00,4.92,5.08,5.05,4.86,5.06,5.11,...,61919.36,67965.44,106777.56,129730.18,106356.62,151639.23,90436.49,65022.92,66732.73,82971.49
2018-11-05,000007.SZ,7.20,7.39,7.25,7.13,6.99,7.39,7.72,7.77,8.17,8.47,...,7341.00,6252.51,9919.00,4385.00,3706.35,8694.50,16457.97,10446.74,24227.28,23116.77
2018-11-05,000008.SZ,4.41,4.37,4.43,4.29,4.25,4.27,4.20,3.97,4.04,4.03,...,161600.00,200172.10,356897.35,367111.38,408715.77,787934.71,1970198.73,123521.00,60909.00,0.00
2018-11-05,000009.SZ,4.15,4.10,4.05,4.00,3.92,4.04,3.96,3.86,3.92,3.96,...,67049.97,45735.97,64944.41,65345.22,75110.20,70069.85,103612.10,71571.11,81454.27,73157.12
2018-11-05,000010.SZ,4.37,4.36,4.38,4.38,4.34,4.40,4.33,4.40,4.40,4.37,...,39849.55,23439.00,34221.10,43129.20,42916.20,19711.83,49412.87,45476.08,34991.04,22500.50
2018-11-05,000011.SZ,9.02,9.00,8.90,8.78,8.72,8.95,8.84,8.63,8.82,9.10,...,41925.00,46305.13,89276.20,110553.64,91825.94,54925.00,33895.01,39922.53,30049.67,56341.06


In [88]:
features.columns

Index(['open_0', 'open_1', 'open_2', 'open_3', 'open_4', 'open_5', 'open_6',
       'open_7', 'open_8', 'open_9',
       ...
       'volume_50', 'volume_51', 'volume_52', 'volume_53', 'volume_54',
       'volume_55', 'volume_56', 'volume_57', 'volume_58', 'volume_59'],
      dtype='object', length=360)