In [None]:
GSPC.set_index(pd.to_datetime(GSPC['Date']), inplace=True)#Set the index of the DataFrame to make it become date type.
del GSPC['Date']
GSPC=GSPC.T
GSPC

In [None]:
#python -m pip install h5py
from __future__ import print_function  
import numpy as np
np.random.seed(1337)  # for reproducibility  用于指定随机数生成时所用算法开始的整数值，如果使用相同的seed()值，则每次生成的随即数都相同
  
from PIL import Image  
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD  
from keras.utils import np_utils
from keras import backend as K

'''
Olivetti Faces是纽约大学的一个比较小的人脸库，由40个人的400张图片构成，即每个人的人脸图片为10张。每张图片的灰度级为8位，每个像素的灰度大小位于0-255之间。整张图片大小是1190 × 942，一共有20 × 20张照片。那么每张照片的大小就是（1190 / 20）× （942 / 20）= 57 × 47 。
'''

# There are 40 different classes  
nb_classes = 40  # 40个类别
epochs = 40  # 进行40轮次训
batch_size = 40 # 每次迭代训练使用40个样本
  
# input image dimensions  
img_rows, img_cols = 57, 47  
# number of convolutional filters to use  
nb_filters1, nb_filters2 = 5, 10  # 卷积核的数目（即输出的维度）
# size of pooling area for max pooling  
nb_pool = 2  
# convolution kernel size  
nb_conv = 3  # 单个整数或由两个整数构成的list/tuple，卷积核的宽度和长度。如为单个整数，则表示在各个空间维度的相同长度。
  
def load_data(dataset_path):  
    img = Image.open(dataset_path)  
    img_ndarray = np.asarray(img, dtype = 'float64') / 255  # asarray，将数据转化为np.ndarray，但使用原内存
    # 400 pictures, size: 57*47 = 2679  
    faces = np.empty((400, 2679)) 
    for row in range(20):  
        for column in range(20):
           faces[row * 20 + column] = np.ndarray.flatten(img_ndarray[row*57 : (row+1)*57, column*47 : (column+1)*47]) 
           # flatten将多维数组降为一维
  
    label = np.empty(400)  
    for i in range(40):
        label[i*10 : i*10+10] = i  
    label = label.astype(np.int)  
  
    #train:320,valid:40,test:40  
    train_data = np.empty((320, 2679))  
    train_label = np.empty(320)  
    valid_data = np.empty((40, 2679))  
    valid_label = np.empty(40)  
    test_data = np.empty((40, 2679))  
    test_label = np.empty(40)  
  
    for i in range(40):
        train_data[i*8 : i*8+8] = faces[i*10 : i*10+8] # 训练集中的数据
        train_label[i*8 : i*8+8] = label[i*10 : i*10+8]  # 训练集对应的标签
        valid_data[i] = faces[i*10+8] # 验证集中的数据
        valid_label[i] = label[i*10+8] # 验证集对应的标签
        test_data[i] = faces[i*10+9] 
        test_label[i] = label[i*10+9]   
    
    train_data = train_data.astype('float32')
    valid_data = valid_data.astype('float32')
    test_data = test_data.astype('float32')
       
    rval = [(train_data, train_label), (valid_data, valid_label), (test_data, test_label)]  
    return rval  
  
def set_model(lr=0.005,decay=1e-6,momentum=0.9): 
    model = Sequential()
    if K.image_data_format() == 'channels_first':
        model.add(Conv2D(5, kernel_size=(3, 3), input_shape = (1, img_rows, img_cols)))
    else:
        model.add(Conv2D(5, kernel_size=(3, 3), input_shape = (img_rows, img_cols, 1)))
    model.add(Activation('relu')) #sigmoid，relu，tanh，elu
    model.add(MaxPooling2D(pool_size=(2, 2)))  
    model.add(Conv2D(10, kernel_size=(3, 3)))  
    model.add(Activation('relu'))  
    model.add(MaxPooling2D(pool_size=(2, 2)))  
    model.add(Dropout(0.25))  
    model.add(Flatten())      
    model.add(Dense(128)) #Full connection  
    model.add(Activation('tanh')) 
    model.add(Dropout(0.5))  
    model.add(Dense(nb_classes))  
    model.add(Activation('softmax'))  
    sgd = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True)  
    model.compile(loss='categorical_crossentropy', optimizer=sgd)#keras.losses.binary_crossentropy  'categorical_crossentropy'
    return model  
  
def train_model(model,X_train, Y_train, X_val, Y_val):  
    model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs,  
          verbose=1, validation_data=(X_val, Y_val))  
    model.save_weights('model_weights.h5', overwrite=True)  
    return model  
  
def test_model(model,X,Y):  
    model.load_weights('model_weights.h5')  
    score = model.evaluate(X, Y, verbose=0)
    return score  
  
if __name__ == '__main__':  
    # the data, shuffled and split between tran and test sets  
    (X_train, y_train), (X_val, y_val),(X_test, y_test) = load_data('olivettifaces.gif')  
    
    if K.image_data_format() == 'channels_first':
        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)  
        X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols)  
        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)  
        input_shape = (1, img_rows, img_cols)
    else:
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)  
        X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)  
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)  
        input_shape = (img_rows, img_cols, 1) # 1 为图像像素深度
    
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples') 
    print(X_val.shape[0], 'validate samples')  
    print(X_test.shape[0], 'test samples')
  
    # convert class vectors to binary class matrices  
    Y_train = np_utils.to_categorical(y_train, nb_classes)  
    Y_val = np_utils.to_categorical(y_val, nb_classes)  
    Y_test = np_utils.to_categorical(y_test, nb_classes)  
  
    model = set_model()
    train_model(model, X_train, Y_train, X_val, Y_val)   
    score = test_model(model, X_test, Y_test)  
  
    model.load_weights('model_weights.h5')  
    classes = model.predict_classes(X_test, verbose=0)  
    test_accuracy = np.mean(np.equal(y_test, classes))  
    print("accuarcy:", test_accuracy)
    for i in range(0,40):
        if y_test[i] != classes[i]:
            print(y_test[i], '被错误分成', classes[i]);
    

In [None]:
from sklearn.preprocessing import MinMaxScaler

mm = MinMaxScaler()
test_mm = mm.fit_transform(GSPC_test)
Y_test_inv=mm.inverse_transform(Y_test)

In [None]:
Y_val[0].shape
plt.figure(figsize=(20,6))
plt.plot(Y_val_df.apply(lambda x: x.sum()/15,axis=0),"r")
plt.plot(X_val[0])

In [None]:
,scoring='f1_weighted'

In [None]:
# Define the Keras model by model = Sequential() with input shape [DATE_BACK,the number of features]
LSTM_MODEL = None 

# Change Kreas model
def declare_LSTM_MODEL(model=LSTM_MODEL):
    print("LSTM_MODEL has changed to be %s and start your forecast."%model)
    global LSTM_MODEL
    LSTM_MODEL = model
            

# Build LSTM model
def LSTM_model(shape):
    if LSTM_MODEL is None:
        model = Sequential()
        model.add(LSTM(CELLS*4, input_shape=(shape[1], shape[2]), activation='tanh', return_sequences=True))
        model.add(Dropout(DROPOUT))
        model.add(LSTM(CELLS*2,activation='tanh',return_sequences=True))
        model.add(Dropout(DROPOUT))
        model.add(LSTM(CELLS,activation='tanh',return_sequences=False))
        model.add(Dropout(DROPOUT))
        model.add(Dense(1,activation='tanh'))
        model.compile(loss=OPTIMIZER_LOSS, optimizer='adam')
        return model
    elif LSTM_MODEL == 'GRU':
        model = Sequential()
        model.add(GRU(CELLS*4, input_shape=(shape[1], shape[2]), activation='tanh', return_sequences=True))
        model.add(Dropout(DROPOUT))
        model.add(GRU(CELLS*2,activation='tanh',return_sequences=True))
        model.add(Dropout(DROPOUT))
        model.add(GRU(CELLS,activation='tanh',return_sequences=False))
        model.add(Dropout(DROPOUT))
        model.add(Dense(1,activation='tanh'))
        model.compile(loss=OPTIMIZER_LOSS, optimizer='adam')
        return model
    elif LSTM_MODEL == 'DNN':
        model = Sequential()
        model.add(Dense(CELLS*4, input_shape=(shape[1], shape[2]), activation='tanh'))
        model.add(Dropout(DROPOUT))
        model.add(Dense(CELLS*2,activation='tanh'))
        model.add(Dropout(DROPOUT))
        model.add(Flatten())
        model.add(Dense(CELLS,activation='tanh'))
        model.add(Dropout(DROPOUT))
        model.add(Dense(1,activation='tanh'))
        model.compile(loss=OPTIMIZER_LOSS, optimizer='adam')
        return model
    elif LSTM_MODEL == 'BPNN':
        model = Sequential()
        model.add(Dense(CELLS*4, input_shape=(shape[1], shape[2]), activation='tanh'))
        model.add(Dropout(DROPOUT))
        model.add(Flatten())
        model.add(Dense(1,activation='tanh'))
        model.compile(loss=OPTIMIZER_LOSS, optimizer='adam')
        return model
    else: return LSTM_MODEL

# Other variables
# -------------------------------
# Method for unified normalization only 0,1,2,3
METHOD = 0 

# declare Method for unified normalization
def declare_uni_method(method=None):
    if method not in [0,1,2,3]: raise TypeError('METHOD should be 0,1,2,3.')
    global METHOD
    METHOD = method
    print('Unified normalization method (%d) is start using.'%method)

In [None]:
def svmf1_real(dataset1, dataset2,long_predict=10):
    daily_log_rets1 = np.diff(np.log(dataset1).T)
    daily_log_rets1=daily_log_rets1.reshape(len(dataset1)-1)
    daily_rets_series1 = pd.DataFrame(daily_log_rets1)#calculate the log return
    daily_log_rets2 = np.diff(np.log(dataset2).T)
    daily_log_rets2=daily_log_rets2.reshape(len(dataset2)-1)
    daily_rets_series2 = pd.DataFrame(daily_log_rets2)#calculate the log return
    
    f11=np.empty(0)
    f12=np.empty(0)
    
    for n in range(0,1500,5):#get multiple threshold
        threshold = n/100000
        daily_label=np.empty(0)#make the data labelled
        for i in range(0,len(daily_rets_series1)):
            if daily_rets_series1.iloc[i,0]<-threshold:
                n=-1
                daily_label=np.append(daily_label,n)
            elif abs(daily_rets_series1.iloc[i,0])<=threshold:
                n=0
                daily_label=np.append(daily_label,n)
            else:
                n=1
                daily_label=np.append(daily_label,n)
        
        X1=pd.DataFrame()
        X2=pd.DataFrame()
        Y=pd.DataFrame()
        for i in range(len(daily_rets_series1)-long_predict):
            X1=pd.concat([X1, pd.DataFrame(daily_log_rets1[i:i+long_predict]).T],ignore_index=True)#n values of log return to predict trends
            X2=pd.concat([X2, pd.DataFrame(daily_log_rets2[i:i+long_predict]).T],ignore_index=True)#n values of log return to predict trends
            Y=pd.concat([Y, pd.DataFrame(daily_label[i+long_predict:i+long_predict+1]).T],ignore_index=True)#labels of trends
        X1=np.array(X1)
        X2=np.array(X2)
        Y=np.array(Y[0])
        
        X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X1, Y, test_size=0.2,random_state=0)
        svc_rbf1 = SVC(kernel='rbf',decision_function_shape='ovo')
        f11=np.append(f11,cross_val_score(svc_rbf1 , X_train1, Y_train1, scoring='f1_micro',cv=5).mean())
        
        X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X2, Y, test_size=0.2,random_state=0)
        svc_rbf2 = SVC(kernel='rbf',decision_function_shape='ovo')
        f12=np.append(f12,cross_val_score(svc_rbf2 , X_train2, Y_train2, scoring='f1_micro',cv=5).mean())
    return f11,f12

In [None]:
def svmf1_real_label(dataset1, dataset2,long_predict=10):
    daily_log_rets1 = np.diff(np.log(dataset1).T)
    daily_log_rets1=daily_log_rets1.reshape(len(dataset1)-1)
    daily_rets_series1 = pd.DataFrame(daily_log_rets1)#calculate the log return
    daily_log_rets2 = np.diff(np.log(dataset2).T)
    daily_log_rets2=daily_log_rets2.reshape(len(dataset2)-1)
    daily_rets_series2 = pd.DataFrame(daily_log_rets2)#calculate the log return
    
    f11=np.empty(0)
    f12=np.empty(0)
    
    for n in np.arange(0.0, 0.04, 0.0005):#get multiple threshold
        threshold = n
        daily_label=np.empty(0)#make the data labelled
        for i in range(0,len(daily_rets_series1)):
            if daily_rets_series1.iloc[i,0]<-threshold:
                n=-1
                daily_label=np.append(daily_label,n)
            elif abs(daily_rets_series1.iloc[i,0])<=threshold:
                n=0
                daily_label=np.append(daily_label,n)
            else:
                n=1
                daily_label=np.append(daily_label,n)
        
        daily_label2=np.empty(0)#make the data labelled
        for i in range(0,len(daily_rets_series2)):
            if daily_rets_series2.iloc[i,0]<-threshold:
                n=-1
                daily_label2=np.append(daily_label2,n)
            elif abs(daily_rets_series2.iloc[i,0])<=threshold:
                n=0
                daily_label2=np.append(daily_label2,n)
            else:
                n=1
                daily_label2=np.append(daily_label2,n)
        
        X1=pd.DataFrame()
        X2=pd.DataFrame()
        Y=pd.DataFrame()
        for i in range(len(daily_rets_series1)-long_predict):
            X1=pd.concat([X1, pd.DataFrame(daily_label[i:i+long_predict]).T],ignore_index=True)#n values of log return to predict trends
            X2=pd.concat([X2, pd.DataFrame(daily_label2[i:i+long_predict]).T],ignore_index=True)#n values of log return to predict trends
            Y=pd.concat([Y, pd.DataFrame(daily_label[i+long_predict:i+long_predict+1]).T],ignore_index=True)#labels of trends
        X1=np.array(X1)
        X2=np.array(X2)
        Y=np.array(Y[0])
        
        X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X1, Y, test_size=0.2,random_state=0)
        svc_rbf1 = SVC(kernel='rbf',decision_function_shape='ovo')
        f11=np.append(f11,cross_val_score(svc_rbf1 , X1, Y, scoring='f1_micro',cv=2).mean())
        
        X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X2, Y, test_size=0.2,random_state=0)
        svc_rbf2 = SVC(kernel='rbf',decision_function_shape='ovo')
        f12=np.append(f12,cross_val_score(svc_rbf2 , X2, Y, scoring='f1_micro',cv=2).mean())
    return f11,f12

In [None]:
plt.scatter(pure_mm_lstm_signal.index,pure_mm_lstm_signal.iloc[:,0])
plt.scatter(pure_mm_lstm_signal.index,pure_mm_lstm_signal.iloc[:,1])
plt.scatter(pure_mm_lstm_signal.index,pure_mm_lstm_signal.iloc[:,2])
plt.scatter(pure_mm_lstm_signal.index,pure_mm_lstm_signal.iloc[:,3])

In [None]:
close=np.array(GSPC)[0]
GSPC_pure=pd.DataFrame()
for i in range(2,52):#SMA
    #print(i)
    n=np.ones(i)
    weights_sma=n/i
    empty=np.ones(i-1)
    sma=np.convolve(weights_sma, close, mode='valid')
    sma_full=np.hstack((empty,sma))
    GSPC_pure=pd.concat([GSPC_pure, pd.DataFrame(sma_full).T],ignore_index=True)
    weights_ema=np.exp(np.linspace(0,1,i))
    weights_ema =weights_ema/np.sum(weights_ema)
    ema=np.convolve(weights_ema, close, mode='valid')
    ema_full=np.hstack((empty,sma))
    GSPC_pure=pd.concat([GSPC_pure, pd.DataFrame(ema_full).T],ignore_index=True)
GSPC_pure=GSPC_pure.iloc[:, 52:]

In [None]:
# Volatility
window_stdev = 20
pure_mm_lstm2['volatility'] = pure_mm_lstm2['log_ret'].rolling(window=window_stdev).std().shift(1)


window_autocorr = 20
#compute the lag-N autocorrelation.
pure_mm_lstm2['autocorr_1'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=1), raw=False).shift(1)
pure_mm_lstm2['autocorr_2'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=2), raw=False).shift(1)
pure_mm_lstm2['autocorr_3'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=3), raw=False).shift(1)
pure_mm_lstm2['autocorr_4'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=4), raw=False).shift(1)
pure_mm_lstm2['autocorr_5'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=5), raw=False).shift(1)
pure_mm_lstm2['autocorr_6'] = pure_mm_lstm2['log_ret'].rolling(window=window_autocorr).apply(lambda x: x.autocorr(lag=6), raw=False).shift(1)

# Get the various log -t returns
pure_mm_lstm2['log_t1'] = pure_mm_lstm2['log_ret'].shift(1)
pure_mm_lstm2['log_t2'] = pure_mm_lstm2['log_ret'].shift(2)
pure_mm_lstm2['log_t3'] = pure_mm_lstm2['log_ret'].shift(3)
pure_mm_lstm2['log_t4'] = pure_mm_lstm2['log_ret'].shift(4)
pure_mm_lstm2['log_t5'] = pure_mm_lstm2['log_ret'].shift(5)

# Add fast and slow moving averages
fast_window = 12
slow_window = 26

pure_mm_lstm2['fast_mavg'] = pure_mm_lstm2['Close'].rolling(window=fast_window).mean().shift(1)
pure_mm_lstm2['slow_mavg'] = pure_mm_lstm2['Close'].rolling(window=slow_window).mean().shift(1)

In [None]:
# Add Trending signals  macd
pure_mm_lstm2['sma'] = np.nan

long_signals = pure_mm_lstm2['fast_mavg'] >= pure_mm_lstm2['slow_mavg']
short_signals = pure_mm_lstm2['fast_mavg'] < pure_mm_lstm2['slow_mavg']
pure_mm_lstm2.loc[long_signals, 'sma'] = 1
pure_mm_lstm2.loc[short_signals, 'sma'] = -1

In [None]:
# Initiate short and long windows
short_window = 12
long_window = 26

# Initialise the `signals` dataframeand add the `signal` column
signals = pd.DataFrame(index=pure_mm_lstm2.index)
signals['signal'] = 0.0

# Calculate short term simple moving averages
signals['short_mavg'] = pure_mm_lstm2['Close'].rolling(window=short_window, min_periods=1, center=False).mean()

# Calculate long term simple moving averages
signals['long_mavg'] = pure_mm_lstm2['Close'].rolling(window=long_window, min_periods=1, center=False).mean()

# 生成信号
signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:] 
                                            > signals['long_mavg'][short_window:], 1.0, 0.0)   

# 生成交易命令
signals['positions'] = signals['signal'].diff()

# 输出`signals`
signals

In [None]:
fig = plt.figure(figsize=(20,10))

ax1 = fig.add_subplot(111,  ylabel='Price in $')

# plot close price
plt.plot(pure_mm_lstm2['Close'], color='r', lw=2,label='Close')
# plot short/long_ma lines
plt.plot(signals[['short_mavg', 'long_mavg']], lw=2,label=['short_mavg', 'long_mavg'])
# 绘制买入信号
ax1.plot(signals.loc[signals.positions == 1.0].index, 
         signals.short_mavg[signals.positions == 1.0],
         '^', markersize=10, color='m',label='long')
         
# 绘制卖出信号
ax1.plot(signals.loc[signals.positions == -1.0].index, 
         signals.short_mavg[signals.positions == -1.0],
         'v', markersize=10, color='k',label='short')
plt.legend()         
# 显示做图
plt.show()