In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import QuantileTransformer, MinMaxScaler, StandardScaler, RobustScaler, PowerTransformer, MaxAbsScaler, Normalizer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.layers import Dense, LSTM
from keras.models import Sequential
from keras.callbacks.callbacks import Callback
from keras import optimizers

Using TensorFlow backend.


In [2]:
def getDf(n_in,n_out,steps):
    df = pd.read_csv('Datos/EURUSD_MT5_min.csv', sep='\t')
    df = df['<OPEN>']
    df = df.fillna(method='pad')
    df = df.values.astype('float64')
    df = df[np.nonzero(df)]
    df = df.reshape(len(df),1)
    
    sect = list()
    for i in range(steps):
        dfp = df[i::steps]
        dfp = series_to_supervised(dfp,n_in,n_out)
        sect.append(dfp)
    return sect

In [3]:
def series_to_supervised(df, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(df) is list else df.shape[1]
    df = pd.DataFrame(df)
    cols = list()

    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    
    agg = pd.concat(cols, axis=1)
    
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [4]:
def getTVT_split(n_in,n_out,steps,split):
    df = pd.concat(getDf(n_in,n_out,steps),axis=0)
    df = df.values
    x, y = df[:,:-n_out], df[:,-n_out:]
    
    x_train, x_test = train_test_split(x,train_size=split,random_state=5)
    x_val, x_test = train_test_split(x_test,train_size=0.5,shuffle=False)

    y_train, y_test = train_test_split(y,train_size=split,random_state=5)
    y_val, y_test = train_test_split(y_test,train_size=0.5,shuffle=False)
    
    return x_train, x_val, x_test, y_train, y_val, y_test

In [5]:
class CustomSaver(Callback):
    mini = 10**100
    def on_epoch_end(self, epoch, logs={}):
        if(logs['val_loss'] < self.mini):
            self.mini = logs['val_loss']
            pickle.dump(self.model,open('model_MinAhead.p','wb')) 

In [6]:
def fitter(gen,memb,gene):
    n_in = int(int(np.array2string(gen[:9],separator='')[1:-1],2)/3) + 4
    n_out = 1
    n_var = 1
    steps = int(np.array2string(gen[9:21],separator='')[1:-1],2) + 21
    split = round(int(np.array2string(gen[21:36],separator='')[1:-1],2)/10**5 + 0.5,5)
    hid_layers = 21
    act_in = act[int(int(np.array2string(gen[36:43],separator='')[1:-1],2)/11)]
    nn_in = int(np.array2string(gen[43:50],separator='')[1:-1],2) + 1
    act_out = act[int(int(np.array2string(gen[50:57],separator='')[1:-1],2)/11)]
    
    nn_hid = np.zeros(21).astype('int')
    for i,j in zip(range(57,204,7),range(21)):
        nn_hid[j] = int(np.array2string(gen[i:i+7],separator='')[1:-1],2)
        
    act_hid = np.zeros(21).astype('str')
    for i,j in zip(range(204,351,7),range(21)):
        act_hid[j] = act[int(int(np.array2string(gen[i:i+7],separator='')[1:-1],2)/11)]
        
    sc = int(np.array2string(gen[351:355],separator='')[1:-1],2)

    if sc == 0:
        scalerX = QuantileTransformer(output_distribution='normal',random_state=5)
        scalerY = QuantileTransformer(output_distribution='normal',random_state=5)
    elif sc == 1:
        scalerX = QuantileTransformer(random_state=5)
        scalerY = QuantileTransformer(random_state=5)
    elif sc == 2:
        scalerX = MinMaxScaler()
        scalerY = MinMaxScaler()
    elif sc == 3:
        scalerX = StandardScaler()
        scalerY = StandardScaler()
    elif sc == 4:
        scalerX = StandardScaler(with_mean=True,with_std=False)
        scalerY = StandardScaler(with_mean=True,with_std=False)
    elif sc == 5:
        scalerX = StandardScaler(with_mean=False,with_std=True)
        scalerY = StandardScaler(with_mean=False,with_std=True)
    elif sc == 6:
        scalerX = StandardScaler(with_mean=False,with_std=False)
        scalerY = StandardScaler(with_mean=False,with_std=False)
    elif sc == 7:
        scalerX = RobustScaler()
        scalerY = RobustScaler()
    elif sc == 8:
        scalerX = RobustScaler(with_centering=False,with_scaling=True)
        scalerY = RobustScaler(with_centering=False,with_scaling=True)
    elif sc == 9:
        scalerX = RobustScaler(with_centering=True,with_scaling=False)
        scalerY = RobustScaler(with_centering=True,with_scaling=False)
    elif sc == 10:
        scalerX = RobustScaler(with_centering=False,with_scaling=False)
        scalerY = RobustScaler(with_centering=False,with_scaling=False)
    elif sc == 11:
        scalerX = Normalizer()
    elif sc == 12:
        scalerX = PowerTransformer()
        scalerY = PowerTransformer()
    elif sc == 13:
        scalerX = PowerTransformer(method='box-cox')
        scalerY = PowerTransformer(method='box-cox')
    elif sc == 14:
        scalerX = PowerTransformer(standardize=False)
        scalerY = PowerTransformer(standardize=False)
    elif sc == 15:
        scalerX = PowerTransformer(method='box-cox', standardize=False)
        scalerY = PowerTransformer(method='box-cox', standardize=False)
        
    ls = losses[int(int(np.array2string(gen[355:362],separator='')[1:-1],2)/10)]
    opt = opts[int(np.array2string(gen[362:365],separator='')[1:-1],2)]
    
    if opt == 'adamT':
        opt = optimizers.Adam(amsgrad=True)
    
    x_train, x_val, x_test, y_train, y_val, y_test = getTVT_split(n_in,n_out,steps,split)

    if sc != 11:
        x_train = scalerX.fit_transform(x_train)
        x_val = scalerX.transform(x_val)
        x_test = scalerX.transform(x_test)
        
        y_train = scalerY.fit_transform(y_train)
        y_val = scalerY.transform(y_val)
        y_test = scalerY.transform(y_test)
    
    else:
        norm_train = np.linalg.norm(x_train,axis=1).reshape(len(x_train),1)
        norm_val = np.linalg.norm(x_val,axis=1).reshape(len(x_val),1)
        norm_test = np.linalg.norm(x_test,axis=1).reshape(len(x_test),1)
        
        x_train = scalerX.transform(x_train)
        x_val = scalerX.transform(x_val)
        x_test = scalerX.transform(x_test)
        
        y_train = y_train/norm_train
        y_val = y_val/norm_val
        y_test = y_test/norm_test
    
    model = Sequential()
    if act_in == '0':
        model.add(Dense(nn_in,input_dim=(n_in*n_var)))
    else:
        model.add(Dense(nn_in,input_dim=(n_in*n_var),activation=act_in))

    for i in range(21):
        if nn_hid[i] == 0:
            continue
        elif act_hid[i] == '0':
            model.add(Dense(nn_hid[i]))
        else:
            model.add(Dense(nn_hid[i],activation=act_hid[i]))

    if act_out == '0':
        model.add(Dense(y_train.shape[1]))
    else:
        model.add(Dense(y_train.shape[1],activation=act_out))

    model.compile(loss=ls,optimizer=opt)
    
    eps = int(np.array2string(gen[365:370],separator='')[1:-1],2) + 1
    batch = int(np.array2string(gen[370:380],separator='')[1:-1],2) + 1
    model.fit(x_train,y_train,epochs=eps,validation_data=(x_val,y_val),batch_size=batch,verbose=0)
    
    if sc != 11:
        results = scalerY.inverse_transform(model.predict(x_val))
        oy_val = scalerY.inverse_transform(y_val)
        last = scalerX.inverse_transform(x_val)[:,-1:]
    else:
        results = model.predict(x_val)*norm_val
        oy_val = y_val*norm_val
        last = (x_val*norm_val)[:,-1:]
    
    if ((np.count_nonzero(np.isnan(results)) > 0) or (np.count_nonzero(np.isinf(results)) > 0)):
        return 0,0
    
    results = results - last
    oy_val = oy_val - last
    
    rmse = metrics.mean_squared_error(results, oy_val)**(1/2)
    
    results[np.nonzero(results > rmse)] = results[np.nonzero(results > rmse)] - (1+5**(1/2))/2*rmse
    results[np.nonzero(results < -rmse)] = results[np.nonzero(results < -rmse)] + (1+5**(1/2))/2*rmse
    results[np.nonzero((results < mn)*(results > -mn))] = 0
    
    if np.count_nonzero(results > 0) != 0:
        up = np.count_nonzero(oy_val[results > 0] >= mn)/np.count_nonzero(results > 0)
    else:
        up = 0
    if np.count_nonzero(results < 0) != 0:
        dw = np.count_nonzero(oy_val[results < 0] <= -mn)/np.count_nonzero(results < 0)
    else:
        dw = 0
    
    pup = np.count_nonzero(results > 0)/len(results)
    pdw = np.count_nonzero(results < 0)/len(results)
    
    ut = pup >= 5e-3
    dt = pdw >= 5e-3
    
    fitup = ut*up
    fitdw = dt*dw
    
    print(fitup,fitdw)
    if ((fitup > 0.7) or (fitdw > 0.7)):
        pickle.dump(model,open("Models/model_%f_%f_%f_%f.p" %(fitup,fitdw,memb,gene),'wb'))
        
    return fitup, fitdw

In [7]:
act = ['0','linear','elu','selu','relu','sigmoid','hard_sigmoid','tanh','softmax','softplus','softsign','exponential']
losses = ['mean_squared_error','mean_absolute_error','mean_absolute_percentage_error','mean_squared_logarithmic_error','squared_hinge','hinge','categorical_hinge','logcosh','huber_loss','binary_crossentropy','kullback_leibler_divergence','poisson','cosine_proximity']
opts = ['sgd','rmsprop','adagrad','adadelta','adam','adamT','adamax','nadam']
mn = 0.001
n_gen = 50000
adn = 380

In [8]:
rd = np.random
rd.seed(5)

pop = list()
for i in range(20):
    gen = rd.randint(0,2,adn,'int')
    pop.append(gen)
pop = np.array(pop)
mez_pop = np.zeros(adn,'int').reshape(1,adn)
mez_fit = np.zeros(2,'int').reshape(1,2)

In [9]:
pop = pickle.load(open('Generations/pop_'+str(29)+'.p','rb'))

In [10]:
for j in range(30,n_gen+1):
    print('*****************************GENERATION '+str(j)+'*****************************')
    fit = list()
    for i in range(len(pop)):
        print('________MEMBER '+str(i+1)+'________')
        fit.append(fitter(pop[i],i,j))
    fit = np.array(fit)
    tot_fit = np.append(fit,mez_fit,axis=0)
    tot_pop = np.append(pop,mez_pop,axis=0)
    pickle.dump(tot_pop,open('Generations/pop_'+str(j)+'.p','wb'))
    pickle.dump(tot_fit,open('Generations/fit_'+str(j)+'.p','wb'))
    
    mez_pop = np.append(tot_pop[tot_fit[:,0].argsort()][-int(len(pop)/2):],tot_pop[tot_fit[:,1].argsort()][-int(len(pop)/2):],axis=0)
    mez_fit = np.append(tot_fit[tot_fit[:,0].argsort()][-int(len(pop)/2):],tot_fit[tot_fit[:,1].argsort()][-int(len(pop)/2):],axis=0)
    print('Best buying result:'+str(tot_fit[tot_fit[:,0].argsort()][-1:]))
    print('Best selling result:'+str(tot_fit[tot_fit[:,1].argsort()][-1:]))
    n_pop = list()
    for i in range(len(pop)):
        p1 = rd.randint(0,len(pop))
        p2 = rd.randint(0,len(pop))
        h = rd.randint(0,2,adn,'int')
        h = h.astype(bool)
        p1 = mez_pop[p1]
        p2 = mez_pop[p2]
        p1 = p1*h
        p2 = p2*np.invert(h)
        h = p1+p2

        mut = rd.rand(adn)
        mut = mut < 0.02
        h = (mut + h)%2

        n_pop.append(h)
    pop = np.array(n_pop)

*****************************GENERATION 30*****************************
________MEMBER 1________
0.30893494458706405 0.42009335407868414
________MEMBER 2________
0.7467567206098703 0.8591930679664229
________MEMBER 3________
0.9979895167659941 0.9992467043314501
________MEMBER 4________
0.9992039945003256 0.998553913863565
________MEMBER 5________
0.9983344899375434 0.9986898991222324
________MEMBER 6________
0.9990646809122958 0.9971746091542664
________MEMBER 7________
0.9982632607279832 0.9990398463754201
________MEMBER 8________
0.998904366880576 0.9963254057364499
________MEMBER 9________
0.9985563041385948 0.9981364144614238
________MEMBER 10________
0.998355005483315 0.9982584232031616
________MEMBER 11________
0.9981348438795248 0.9988959605143525
________MEMBER 12________
0.9982937482937483 0.9999307527179558
________MEMBER 13________
0.9987024221453287 0.9982850609756098
________MEMBER 14________
0.9980082417582418 0.9999288863604039
________MEMBER 15________
0.99811122770199

KeyboardInterrupt: 