In [1]:
import math
import time
import random
random.seed(11)
import numpy as np
np.random.seed(11)
import pandas as pd
import tensorflow as tf
tf.set_random_seed(11)
from sklearn.utils import shuffle
from tqdm import tqdm
from model import Model
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import KFold

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_integer('numepochs', 30, "")
tf.app.flags.DEFINE_integer('batchsize', 256, "")
tf.app.flags.DEFINE_boolean('denoise', True, "")

datadir='/home/lacin/data/'

In [2]:
to = np.load('../../../tourwithcnn50w_100col.npy')
tr = np.load('../../../trainwithcnn50w_100col.npy')

In [3]:
va=to[0:73865]
te=to[73865:-6798]
li=to[-6798:]

In [4]:
li.shape,va.shape,te.shape,tr.shape

((6798, 100), (73865, 100), (268163, 100), (535663, 100))

In [4]:
tour = pd.read_csv( datadir + 'numerai_tournament_data.csv')   
df_train = pd.read_csv( datadir + 'numerai_training_data.csv')
df_valid = tour[tour['data_type'].isin(['validation'])]

df_live = tour[tour['data_type'].isin(['live'])]
df_test = tour[tour['data_type'].isin(['test'])]

In [5]:
df_train=df_train[50:] #window size

In [7]:
df_train.shape, df_valid.shape, df_test.shape,df_live.shape

((535663, 54), (73865, 54), (268163, 54), (6798, 54))

In [6]:
num_features = tr.shape[1]
features = tf.placeholder(tf.float32, shape=[None, num_features], name='features')

with tf.variable_scope('model'):
    train_model = Model(features, denoise=FLAGS.denoise, is_training=True)

with tf.variable_scope('model', reuse=True):
    test_model = Model(features, denoise=FLAGS.denoise, is_training=False)

best = None
wait = 0
summary_op = tf.summary.merge_all() #  merge_all_summaries()
logdir = 'logs/{}'.format(int(time.time()))
supervisor = tf.train.Supervisor(logdir=logdir, summary_op=None)
config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC'
with supervisor.managed_session() as sess:
    summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph)
        
    print('Training model with {} parameters...'.format(train_model.num_parameters))
    with tqdm(total=FLAGS.numepochs) as pbar:
        for epoch in range(FLAGS.numepochs):
            X_train_epoch = shuffle(tr)
            num_batches = len(X_train_epoch) // FLAGS.batchsize
            losses = []
            for batch_index in range(num_batches):
                batch_start = batch_index * FLAGS.batchsize
                batch_end = batch_start + FLAGS.batchsize

                X_train_batch = X_train_epoch[batch_start:batch_end]

                _, loss = sess.run([
                    train_model.train_step,
                    train_model.loss,
                ], feed_dict={
                    features: X_train_batch,
                })
                losses.append(loss)
            loss_train = np.mean(losses)

            loss_valid, summary_str = sess.run([
                test_model.loss,
                summary_op,
            ], feed_dict={
                features: va,
            })
            if best is None or loss_valid < best:
                best = loss_valid
                wait = 0
            else:
                wait += 1
            summary_writer.add_summary(summary_str, epoch)
            summary_writer.flush()
            pbar.set_description('[{}] loss (train): {:.8f}, loss (valid): {:.8f} [best: {:.8f}, wait: {}]' \
                .format(epoch, loss_train, loss_valid, best, wait))
            pbar.update()        

    summary_writer.close()
    
    loss_valid = sess.run(test_model.loss, feed_dict={
        features: va,
    })
    print('Validation loss: {}'.format(loss_valid))
    
    t_Group_eras = df_train["era"].map(lambda x: int(x[3:])).values
    group_kfold = GroupKFold(n_splits=10)
    z_train = np.zeros([len(tr),48])    
    #tr=tr.values
    for f,(train_index, test_index) in enumerate(group_kfold.split(tr, None, t_Group_eras)): 
                
        x_train = tr[test_index]      
       
        z_train_tmp = sess.run(test_model.z, feed_dict={ features: x_train })
        
        z_train[test_index] = z_train_tmp    
    
    v_Group_eras = df_valid["era"].map(lambda x: int(x[3:])).values
    group_kfold2 = GroupKFold(n_splits=10)
    z_valid = np.zeros([len(va),48])
    #va=va.values
    for f,(train_index, test_index) in enumerate(group_kfold2.split(va, None, v_Group_eras)): 
                
        x_valid = va[test_index]        
        
        z_valid_tmp = sess.run(test_model.z, feed_dict={ features: x_valid })
        
        z_valid[test_index] = z_valid_tmp    
    
    kf = KFold(n_splits=5)
    z_test = np.zeros([len(te),48])
    #te=te.values
    for f,(train_index, test_index) in enumerate(kf.split(te, None, None)): 
                
        x_test = te[test_index]       
        
        z_test_tmp = sess.run(test_model.z, feed_dict={ features: x_test })
        
        z_test[test_index] = z_test_tmp    
    
    z_live = sess.run(test_model.z, feed_dict={ features: li })
    
    if FLAGS.denoise:
        np.savez('./denoisingconcnn50w7f15nf.npz', z_train=z_train, z_valid=z_valid, z_test=z_test, z_live=z_live)
        print("SavedDEnc")
    else:
        np.savez('./autoencoderoncnn50w7f15nf.npz', z_train=z_train, z_valid=z_valid, z_test=z_test, z_live=z_live)
        print("SavedEnc")

Instructions for updating:
Use tf.losses.add_loss instead.
Instructions for updating:
Use tf.losses.get_total_loss instead.
Instructions for updating:
Use tf.losses.get_losses instead.
Instructions for updating:
Use tf.losses.get_regularization_losses instead.
Instructions for updating:
Use tf.losses.add_loss instead.
Instructions for updating:
Use tf.losses.get_total_loss instead.
Instructions for updating:
Use tf.losses.get_losses instead.
Instructions for updating:
Use tf.losses.get_regularization_losses instead.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path logs/1511525534/model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:model/global_step/sec: 0


  0%|          | 0/30 [00:00<?, ?it/s]

Training model with 59988 parameters...


[5] loss (train): 0.00035465, loss (valid): 0.00056429 [best: 0.00056429, wait: 0]:  20%|██        | 6/30 [01:50<07:17, 18.24s/it]

INFO:tensorflow:model/global_step/sec: 109.676


[11] loss (train): 0.00035395, loss (valid): 0.00046685 [best: 0.00045175, wait: 1]:  40%|████      | 12/30 [03:39<05:25, 18.07s/it]

INFO:tensorflow:model/global_step/sec: 116.308


[18] loss (train): 0.00035359, loss (valid): 0.00042092 [best: 0.00041897, wait: 1]:  63%|██████▎   | 19/30 [05:44<03:17, 17.91s/it]

INFO:tensorflow:model/global_step/sec: 116.1


[25] loss (train): 0.00035347, loss (valid): 0.00039464 [best: 0.00039464, wait: 0]:  87%|████████▋ | 26/30 [07:49<01:11, 17.86s/it]

INFO:tensorflow:model/global_step/sec: 116.916


[29] loss (train): 0.00035370, loss (valid): 0.00037800 [best: 0.00037800, wait: 0]: 100%|██████████| 30/30 [09:01<00:00, 17.98s/it]


Validation loss: 0.0003779316321015358
SavedDEnc
