In [8]:
import os
os.environ['KERAS_BACKEND' ] = 'tensorflow'
os.environ['MKL_THREADING_LAYER'] = 'GNU'
import keras as ks
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import TensorBoard
import keras
import pandas as pd
import numpy as np
from keras import optimizers
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout, Activation
from sklearn.preprocessing import MinMaxScaler
import time 

from keras.layers.merge import concatenate
def main():

    # Set seed for reproducibility
    NAME = "Shared_input_multi_output_single_merge"
    np.random.seed(0)
    print("Loading data...")
    # Load the data from the CSV files
    training_data = pd.read_csv('numerai_training_data.csv', header=0)
    print('original train data shape: {},\t{} \n\n \t:'.format(training_data.shape[0],training_data.shape[1]))

    prediction_data = pd.read_csv('numerai_tournament_data.csv', header=0)
    print('original prediction data shape: {},\t{} \n\n \t:'.format(prediction_data.shape[0],prediction_data.shape[1]))
    
    complete_training_data = pd.concat([training_data, prediction_data])
    print('total training / valdation shape {}'.format(complete_training_data))
    
    # Transform the loaded CSV data into numpy arrays

    features = [f for f in list(training_data) if "feature" in f]
    print(features)

    X = training_data[features]

    mini= MinMaxScaler(feature_range=(0,1)) 
    X = mini.fit_transform(X)

    Y = training_data["target_bernie"]

    Y= keras.utils.to_categorical(Y,2) 

    x_prediction = prediction_data[features]
    x_prediction = mini.fit_transform(x_prediction)


    ids = prediction_data["id"]  

    batch_size = 710

    dropout = 0.2

    dropout = 0.2
    visible = Input(shape=(50,))
    m1 = Dense(5, activation='relu')(visible)
    m1 = Dense(5, activation='relu')(m1)
    m1 = Dropout(dropout)(m1)
    
    m2 = Dense(5, activation='relu')(visible)
    m2 = Dense(5, activation='relu')(m2)
    
    m3 = Dense(5, activation='relu')(visible)
    m3 = Dense(5, activation='relu')(m3)
    m3 = Dropout(dropout)(m3)
    
    
    m4 = Dense(10, activation='relu')(m3)
    m4 = Dense(10, activation='relu')(m4)
    output1 = Dense(2, activation='sigmoid')(m4)
    
    #second shared input multi merge/output
    m5 = Dense(10, activation='relu')(output1)
    m5 = Dense(10, activation='relu')(m5)
    m6 = Dense(10, activation='relu')(m5)
    m6 = Dense(10,activation='relu')(m6)
    
    m7 = Dense(50, activation='relu')(m6)
    output = Dense(2, activation='sigmoid')(m7)
    
    sig1 = Dense(20, activation='relu')(output)
    sig1 = Dense(20, activation='relu')(sig1)
    sig2 = Dense(20, activation='relu')(sig1)
    
    sig2 = Dense(20, activation='relu')(visible) 
    sig2 = Dense(20, activation='relu')(sig2)
    sig3 = Dense(20, activation='relu')(sig2)
    sig3 = Dense(10, activation='relu')(sig3)
    
    sig4 = Dense(10, activation='relu')(sig3)
    sig4 = Dense(10, activation='relu')(sig4)
    sig_output1 = Dense(2, activation='sigmoid')(sig4)
    
    #second shared input multi sig_merge/sig_output
    sig5 = Dense(5, activation='relu')(sig_output1)
    merge1 = concatenate([m7,sig5],axis=1)
    sig_output = Dense(2, activation='sigmoid')(merge1)

    model = Model(inputs=visible, outputs=sig_output)   
    model.compile(loss='binary_crossentropy',optimizer='rmsprop')
    model.summary()
    tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
    model.fit(X,Y,batch_size=batch_size,epochs=30,validation_split=0.33,callbacks=[tensorboard])
    

    y_prediction = model.predict(x_prediction)
    evaluate = model.evaluate(x_prediction,y_prediction)
    
    eras = prediction_data.era.unique()
    count = 0
    count_consistent = 0
        
    for era in eras:
        count += 1
        current_valid_data = prediction_data[prediction_data.era==era]
        features = [f for f in list(complete_training_data) if "feature" in f]
        X_valid = current_valid_data[features]
        Y_valid = current_valid_data["target_bernie"]
        loss = evaluate
        if (loss < -np.log(.5)):
            consistent = True
            count_consistent += 1
        else:
            consistent = False
        print("{}: loss - {} consistent: {}".format(era, loss, consistent))
    print ("Consistency: {}".format(count_consistent/count))
        
    
    probabilities = y_prediction[:, 1]
    print("- probabilities:", probabilities[1:6])

    # We can see the probability does seem to be good at predicting the
    # true target correctly.
    print("- target:", prediction_data['target_bernie'][1:6])
    print("- rounded probability:", [np.round(p) for p in probabilities][1:6])

    # But overall the accuracy is very low.
    correct = [
        np.round(x) == y
        for (x, y) in zip(probabilities, prediction_data['target_bernie'])
    ]
    print("- accuracy: ", sum(correct) / float(prediction_data.shape[0]))

    tournament_corr = np.corrcoef(prediction_data['target_bernie'],
                                  prediction_data['target_elizabeth'])
    print("- bernie vs elizabeth corr:", tournament_corr)
    # You can see that target_elizabeth is accurate using the bernie model as well.
    correct = [
        np.round(x) == y
        for (x, y) in zip(probabilities, prediction_data['target_elizabeth'])
    ]
    print("- elizabeth using bernie:",
          sum(correct) / float(prediction_data.shape[0]))

    # Numerai measures models on logloss instead of accuracy. The lower the logloss the better.
    # Numerai only pays models with logloss < 0.693 on the live portion of the tournament data.)

    print("- validation logloss:",
          model.evaluate(x_prediction,y_prediction))
    
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability_bernie':results})

    joined = pd.DataFrame(ids).join(results_df)
    pd.DataFrame(joined[:5])


    print("Writing predictions to predictions.csv")
    path = 'predictions_{:}_{}_1'.format(time.strftime("%Y-%m-%d_%Hh%Mm%Ss", time.gmtime()),NAME) + '.csv'
    print()
    print("Writing predictions to " + path.strip())
    joined.to_csv(path,float_format='%.15f', index=False)

if __name__ == '__main__':

    main()

Loading data...
original train data shape: 502732,	60 

 	:
original prediction data shape: 333925,	60 

 	:
total training / valdation shape                       id   era data_type  feature1  feature2  feature3  \
0       n0003126ff2349f6  era1     train   0.54836   0.31077   0.37524   
1       n003d773d29b57ec  era1     train   0.34712   0.40275   0.42747   
2       n0074df2dc6810b6  era1     train   0.50871   0.48639   0.47544   
3       n0090630f530903e  era1     train   0.61363   0.40268   0.53779   
4       n00af19089546fe9  era1     train   0.30704   0.47273   0.54495   
5       n011d2da12b1e735  era1     train   0.52336   0.59136   0.60506   
6       n014149cadeee55d  era1     train   0.30875   0.62510   0.35229   
7       n0148a4dcf539aba  era1     train   0.40632   0.30590   0.43227   
8       n015855690d31908  era1     train   0.48193   0.27060   0.50228   
9       n0169447f4d6a10e  era1     train   0.51191   0.53663   0.42109   
10      n01703ba4eff8fe7  era1     train   0

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 50)           0                                            
__________________________________________________________________________________________________
dense_170 (Dense)               (None, 5)            255         input_7[0][0]                    
__________________________________________________________________________________________________
dense_171 (Dense)               (None, 5)            30          dense_170[0][0]                  
__________________________________________________________________________________________________
dropout_14 (Dropout)            (None, 5)            0           dense_171[0][0]                  
__________________________________________________________________________________________________
dense_172 