In [1]:
ID_GPU = 0

import os
import numpy as np
import pandas as pd

os.environ["CUDA_VISIBLE_DEVICES"] = str(ID_GPU)

import tensorflow as tf
from keras import backend as K

from keras.callbacks import EarlyStopping, ReduceLROnPlateau

from modules.data_processing import make_Xy, make_DA, post_process
from modules.keras_processing import make_model

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

NB = 12  # size of cube

NUM_EPOCHS = 2000
BATCH_SIZE = 128
    
NLAYER = 3  # number of consecutive Dense layers
NUNIT = 150  # number of units in Dense layers
PDROPOUT = 0.5  # dropout probability between Dense layers

# get data from csv
X, y, Xsub, id_sub = make_Xy()

# keras callbacks
learning_rate_reduction = ReduceLROnPlateau(monitor='loss', 
                                            patience=10, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

early_stopping = EarlyStopping(monitor='loss', patience=100) 

In [3]:
### Train model

N = y.shape[0]  # number of samples in train
Nsub = Xsub.shape[0]  # number of samples in submission
dimX = X.shape[1]  # number of features in X

ind = np.tile(True, N)  # all indices in train used for training
y = np.log(1+y)  # learn in log space
    
model = make_model(dimX, NB, NLAYER, NUNIT, PDROPOUT)   # create keras model
        
for iepoch in range(NUM_EPOCHS):
    X_DA, y_DA, cube_DA = make_DA(X, y=y, ind=ind)  # random data-augmentation at each epoch

    model.fit([cube_DA, X_DA], y_DA, shuffle=True, verbose=True, batch_size=BATCH_SIZE, 
              epochs=iepoch+1, initial_epoch=iepoch,
              callbacks=[learning_rate_reduction, early_stopping])

Epoch 1/1
  640/14400 [>.............................] - ETA: 2:14:01 - loss: 1.8340

KeyboardInterrupt: 

In [5]:
### Compute submission

# data augmentation at test time
pred_DA = np.zeros((100, 6*Nsub, 2))
for iDA in range(100):
    X_DA, y_DA, cube_DA = make_DA(Xsub, sub=True)  # data-augmentation
    pred_DA[iDA, ] = model.predict([cube_DA, X_DA])
    
# post process
pred_DA = post_process(pred_DA)

# average data augmentation and axis permutation
pred_DA = pred_DA.mean(0)
pred_DA = pred_DA.reshape((6, -1, 2))
pred_DA = pred_DA.mean(0)
    
# set as submission
submission = pred_DA

In [12]:
submission = pd.concat([pd.DataFrame(id_sub), pd.DataFrame(submission)], axis=1)
submission.columns = ['id','formation_energy_ev_natom', 'bandgap_energy_ev']
submission.to_csv('output/submission.csv', index=False)