In [28]:
import keras
import pandas as pd
import numpy as np       
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.datasets import mnist
from keras.utils import np_utils
from keras import initializers
from keras.utils.vis_utils import plot_model
from keras.optimizers import rmsprop
from keras import regularizers
from keras.optimizers import SGD, Adam, RMSprop


import matplotlib.pyplot as plt
%matplotlib inline

In [29]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = {'batch':[], 'epoch':[]}
        self.accuracy = {'batch':[], 'epoch':[]}
        self.val_loss = {'batch':[], 'epoch':[]}
        self.val_acc = {'batch':[], 'epoch':[]}

    def on_batch_end(self, batch, logs={}):
        self.losses['batch'].append(logs.get('loss'))
        self.accuracy['batch'].append(logs.get('acc'))
        self.val_loss['batch'].append(logs.get('val_loss'))
        self.val_acc['batch'].append(logs.get('val_acc'))
        
    def on_epoch_end(self, batch, logs={}):
        self.losses['epoch'].append(logs.get('loss'))
        self.accuracy['epoch'].append(logs.get('acc'))
        self.val_loss['epoch'].append(logs.get('val_loss'))
        self.val_acc['epoch'].append(logs.get('val_acc'))
        
    def loss_plot(self, loss_type):
        iters = range(len(self.losses[loss_type]))
        plt.figure()
        # acc
        plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
        # loss
        plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
        if loss_type == 'epoch':
            # val_acc
            plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
            # val_loss
            plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
        plt.grid(True)
        plt.xlabel(loss_type)
        plt.ylabel('acc-loss')
        plt.legend(loc="upper right")
        plt.show()

In [30]:
nb_classes = 2
dim_input_vector = 7
time_steps = 20
batch_size=90
nb_epochs=150
log_filepath = './logs' 

In [31]:
file="C:\\Users\DELL\Desktop\shaoguang123\lstm_classify\HS300.csv"
data_raw=pd.read_csv(file,encoding="utf-8")

In [32]:
data_raw=data_raw.rename(index=str,columns={"CHGPct":"label"})
data_raw=data_raw.iloc[:,3:10]
data_raw=data_raw.drop('turnoverValue',1)
data_raw=data_raw.drop('turnoverVol',1)
cls=data_raw.iloc[:,1].values
close=[]
for i in range(len(cls)-1):
    close.append(cls[i+1])

In [33]:
up=[]
down=[]
wide=[]
for i in range(len(data_raw)):
    if data_raw.iloc[i,1]>=data_raw.iloc[i,0]:
        up.append((data_raw.iloc[i,3]-data_raw.iloc[i,1])/100)
        down.append((data_raw.iloc[i,0]-data_raw.iloc[i,2])/100)
        wide.append((data_raw.iloc[i,1]-data_raw.iloc[i,0])/100)
    else:
        up.append((data_raw.iloc[i,3]-data_raw.iloc[i,0])/100)
        down.append((data_raw.iloc[i,1]-data_raw.iloc[i,2])/100)
        wide.append((data_raw.iloc[i,0]-data_raw.iloc[i,1])/100)
#up=np.arctan(up)*2/np.pi
#down=np.arctan(down)*2/np.pi
data_raw.insert(4,'up',up)
data_raw.insert(5,'down',down)
data_raw.insert(6,'wide',wide)

In [34]:
data_tmp=data_raw
for i in range(len(data_tmp)-1):
    if data_tmp.iloc[i+1,7]<=0:
        data_tmp.iloc[i,7]=0
    elif data_tmp.iloc[i+1,7]>0:
        data_tmp.iloc[i,7]=1
for j in [0,2,3,1]:
    for i in range(len(data_tmp)-1):
        data_tmp.iloc[len(data_tmp)-i-1,j]=(data_tmp.iloc[len(data_tmp)-i-1,j]-data_tmp.iloc[len(data_tmp)-i-2,1])/data_tmp.iloc[len(data_tmp)-i-2,1]
#for i in range(4):
    #for j in range(len(data_tmp)):
        #data_tmp.iloc[j,i]=np.log10(data_tmp.iloc[j,i])

#for j in range(4):
    #for i in range(len(data_tmp)):
        #data_tmp.iloc[i,j]=(data_tmp.iloc[i,j]-3000)/3000
data_tmp=data_tmp.values
data_tmp=np.delete(data_tmp,0,axis=0)
data_tmp=np.delete(data_tmp,-1,axis=0)
data=data_tmp
pd.DataFrame(data)

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.002782,0.032150,-0.001642,0.032746,0.012700,0.094300,0.626000,1.0
1,0.004850,0.025399,-0.002418,0.025399,0.000000,0.159900,0.452100,0.0
2,0.000461,-0.010789,-0.014007,0.015062,0.329400,0.072600,0.253800,0.0
3,-0.007067,-0.025936,-0.032201,0.008026,0.336800,0.139800,0.421100,1.0
4,-0.001293,0.052255,-0.001293,0.052412,0.003400,0.000000,1.164000,1.0
5,0.010326,0.029086,0.004328,0.029331,0.005600,0.137200,0.429100,0.0
6,0.002778,-0.019092,-0.037186,0.016717,0.328100,0.425900,0.514800,1.0
7,-0.007259,0.003538,-0.029763,0.007189,0.084300,0.519600,0.249300,1.0
8,0.001627,0.034090,0.001627,0.034090,0.000000,0.000000,0.752200,1.0
9,0.011990,0.039740,0.011990,0.039907,0.004000,0.000000,0.664900,1.0


In [35]:
def get_train_data(data,begin=0,end=1800):
    data_train=data[train_begin:train_end]
    #data_train[:,:-1]=(data_train[:,:-1]-np.mean(data_train[:,:-1],axis=0))/np.std(data_train[:,:-1],axis=0)  #标准化
        
    x=data_train[:,:7]
    y=data_train[:,7]
        
    return x.tolist(),y.tolist()

In [36]:
def get_test_data(data,begin=1800,end=2400):
    data_test=data[test_begin:test_end]
    #data_test[:,:-1]=(data_test[:,:-1]-mean)/std  #标准化
    
    x=data_test[:,:7]
    y=data_test[:,7]
        
    return x.tolist(),y.tolist()

In [37]:
model = Sequential()
model.add(LSTM(16,
               #kernel_regularizer=regularizers.l2(0.01),
               #kernel_initializer='random_normal',
               #bias_regularizer=regularizers.l2(0.01),
               return_sequences=True,
               input_shape=(time_steps,dim_input_vector)))
#model.add(LSTM(25,return_sequences=True))
#model.add(LSTM(8,return_sequences=True))
#model.add(LSTM(8,return_sequences=True))
model.add(LSTM(16,
               #kernel_regularizer=regularizers.l2(0.01),
               #kernel_initializer='random_normal',
               return_sequences=True))
#model.add(LSTM(16,return_sequences=True))
#model.add(LSTM(25,return_sequences=True))
model.add(Dropout(0.5))
#model.add(Dense(nb_classes, activation='softmax', init=init_weights))
model.add(Dense(nb_classes, activation='softmax'))

model.compile(optimizer=RMSprop(0.005), loss='categorical_crossentropy', metrics=['accuracy'])
    
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 20, 16)            1536      
_________________________________________________________________
lstm_4 (LSTM)                (None, 20, 16)            2112      
_________________________________________________________________
dropout_2 (Dropout)          (None, 20, 16)            0         
_________________________________________________________________
dense_2 (Dense)              (None, 20, 2)             34        
Total params: 3,682
Trainable params: 3,682
Non-trainable params: 0
_________________________________________________________________


In [38]:
train_begin=0
train_end=1800
test_begin=1800
test_end=2200

a=[]
for i in range(len(data)):
    a.append(data[i,:].tolist())
a=np.array(a)

b=[]
for i in range(len(data)):
    b.append(data[i,:].tolist())
b=np.array(b)

X_train, Y_train = get_train_data(a,begin=train_begin,end=train_end) 
X_test, Y_test = get_test_data(b,begin=test_end,end=test_end) 

In [39]:
X_train=np.reshape(X_train,(-1,time_steps,dim_input_vector))
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_train=np.reshape(Y_train,(-1,time_steps,nb_classes))

X_test=np.reshape(X_test,(-1,time_steps,dim_input_vector))
Y_test = np_utils.to_categorical(Y_test, nb_classes)
Y_test=np.reshape(Y_test,(-1,time_steps,nb_classes))

In [None]:
print('X_train shape:', np.shape(X_train))
print('Y_train shape:', np.shape(Y_train))
print('X_test shape:', np.shape(X_test))
print('Y_test shape:', np.shape(Y_test))

X_train=np.array(X_train)
Y_train=np.array(Y_train)
X_test=np.array(X_test)
Y_test=np.array(Y_test)


X_train shape: (90, 20, 7)
Y_train shape: (90, 20, 2)
X_test shape: (20, 20, 7)
Y_test shape: (20, 20, 2)


In [None]:
history = LossHistory()
model.fit(X_train, Y_train,
          batch_size=batch_size, epochs=nb_epochs,  
          verbose=1, 
          validation_data=(X_test, Y_test),
          #validation_split=0.3,  
          callbacks=[history])

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
history.loss_plot('epoch')

In [None]:
pred=model.predict(X_test)

In [None]:
np.shape(pred)

In [None]:
pred=np.reshape(pred,(-1,2))

In [None]:
p=[]
for i in range(len(pred)):
    p.append(np.argmax(pred[i]))

In [None]:
Y_test=np.reshape(Y_test,(-1,2))
q=[]
for i in range(len(Y_test)):
    q.append(np.argmax(Y_test[i]))

In [None]:
plt.figure(figsize=(15,10))
for i in range(test_begin,test_end):
    if q[i-test_begin]==1:
        plt.plot(i-test_begin,close[i],'or')
        plt.hold
    elif q[i-test_begin]==0:
        plt.plot(i-test_begin,close[i],'og')
        plt.hold
plt.plot(close[test_begin:test_end],'-y')
plt.show

In [None]:
plt.figure(figsize=(15,10))
for i in range(test_begin,test_end):
    if p[i-test_begin]==1:
        plt.plot(i-test_begin,close[i],'or')
        plt.hold
    elif p[i-test_begin]==0:
        plt.plot(i-test_begin,close[i],'og')
        plt.hold
plt.plot(close[test_begin:test_end],'-y')
plt.show