### import modules

In [None]:
import datetime
import timeit
import gzip
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

K_DIM = 4
K_HEADER = ['open','high','low','close']

### 读取日K线数据，并计算次日涨幅

In [None]:
# 日K线数据
# {
#     symbol_1: {
#         date_1: {
#             'quote': [open, high, low, close, volume, amount, turnover],
#             'rise': next_day_open_rise
#         }
#         date_2: ...
#     },
#     symbol_2: ...
# }
def loadDayKLine(filepath):
    dayKLines = {}
    prevDayData = {}
    with gzip.open(filepath, 'rb') as f:
        for line in f.readlines():
            (symbol, dateTime, quoteString) = line.decode().split(',', 2)
            quote = list(map(float, quoteString.replace('None', '0').split(',')))
            date = dateTime.split()[0]
            openPrice = quote[0]
            
            if symbol in prevDayData:
                lastClose = prevDayData[symbol]['quote'][3]
                prevDayData[symbol]['rise'] = openPrice / lastClose - 1

            if symbol not in dayKLines:
                dayKLines[symbol] = {}
                
            dayKLines[symbol][date] = prevDayData[symbol] = {
                'quote': quote
            }
            
    return dayKLines

dayKline = loadDayKLine('data/2016日K线.gz')

In [None]:
dayKline['SZ002353']['2016-03-03']['quote'][3]

### 生成训练数据和验证数据

训练数据：

* train_x：一只股票一个交易日的240根K线，(240 * K_DIM)
* train_y：次日开盘涨幅

验证数据

* text_x
* text_y

In [None]:
import datetime

def loadInput(filepath):
    names = ['symbol', 'datetime', 'open', 'high', 'low', 'close', 'volume', 'amount']
    df = pd.read_csv(filepath, compression = 'gzip', names = names, header = None)
    print("process file: " + filepath, datetime.datetime.now().time())
    train_input = np.zeros(shape = (len(df), K_DIM))
    print(len(df))
    table = []
    i = 0

    processedStock = 0
    for symbol, d1 in df.groupby('symbol'):
        if processedStock % 300 == 0:
            print("data processing...【" + str(processedStock // 300) + "0%】", datetime.datetime.now().time())
        stockData = d1.copy(True)
        stockData['date'], stockData['time'] = stockData['datetime'].str.split(' ', 1).str
        
        for date, d2 in stockData.groupby('date'):
            if 'rise' not in dayKline[symbol][date]:
                continue
            dayQuote = d2[K_HEADER].values
            if dayQuote.shape != (240, K_DIM):
                continue
            dayClose = dayQuote[239, 3]
            train_input[i * 240 : (i + 1) * 240] = dayQuote
            #train_input = train_input / dayClose - 1
            table.append((symbol, date))
            i += 1
        processedStock += 1
    
    return train_input[ : i * 240].reshape(-1, 240 * K_DIM), table

train_x_raw, train_table = loadInput('data/1分钟K线(2016年11月).gz')
test_x_raw, test_table = loadInput('data/1分钟K线(2016年12月).gz')


In [None]:

print('数据归一化')
train_x = train_x_raw / train_x_raw[...,959].reshape(-1, 1) * 100 - 100
test_x = test_x_raw / test_x_raw[...,959].reshape(-1, 1) * 100 - 100
print(train_x.shape)
print(test_x.shape)


In [None]:
test_x

In [None]:
def loadOutput(table):
    output = np.zeros(shape = (len(table), 2))
    i = 0
    for symbol, date in table:
        output[i][0] = 1 if dayKline[symbol][date]['rise'] > 0.00 else 0 #涨
        output[i][1] = 1 if dayKline[symbol][date]['rise'] <= 0.00 else 0 #跌
        i += 1
    return output #[ : i].reshape(-1, 1)

train_y = loadOutput(train_table)
print(train_y.shape)
print(np.sum(train_y))
test_y = loadOutput(test_table)
print(test_y.shape)


In [None]:
print('train_y')
print(len(train_y))
print(np.sum(train_y))
print('test_y')
print(len(test_y))
print(np.sum(test_y))

train_y.flatten()

### 创建Modal并描述模型拓扑

In [None]:
model = Sequential()
model.add(Dense(240 * K_DIM, input_dim=240 * K_DIM, activation='relu'))
model.add(Dense(8 * K_DIM, activation='relu'))
model.add(Dense(K_DIM, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))

model.summary()

### compile and train

In [None]:
from keras import optimizers
from keras.optimizers import RMSprop

model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.fit(train_x, train_y, epochs=3, batch_size=300)


In [None]:
score = model.evaluate(test_x, test_y, batch_size=300)
print(score)

In [None]:
pred = model.predict(test_x, batch_size=100, verbose=1)

In [None]:
pred.flatten()[20000:21000]


In [None]:
model.get_weights()