In [72]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer
import NUTILS as nutils

In [73]:
# Prepare data sets
df_t = nutils.load_data('./train_prepared.csv')
df_s = nutils.load_data('./test_prepared.csv')

train_set, test_set = train_test_split(df_t, test_size=0.2, random_state=42)

X_full = nutils.encode(nutils.drop_features(df_t))
y_form_full = df_t["formation_energy_ev_natom"]
y_band_full = df_t["bandgap_energy_ev"]

X_train = nutils.encode(nutils.drop_features(train_set))
X_test  = nutils.encode(nutils.drop_features(test_set))
X_submt = nutils.encode(nutils.drop_features(df_s))
                       
y_form = train_set["formation_energy_ev_natom"]
y_band = train_set["bandgap_energy_ev"]
                       
y_form_test = test_set["formation_energy_ev_natom"]
y_band_test = test_set["bandgap_energy_ev"]

In [74]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, BatchNormalization
from keras.optimizers import SGD
import keras.losses as losses

# For custom metrics
import keras.backend as K

def rmsle(y_actual, y_pred):
    return K.sqrt(losses.mean_squared_logarithmic_error(y_actual, y_pred))

objective  = make_scorer(rmsle, greater_is_better=False)

In [75]:
model_f = Sequential()
model_f.add(Dense(30, activation='relu', kernel_initializer='he_uniform', input_dim=X_train.shape[1]))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(30, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(20, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(20, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(10, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(10, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(5, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(5, activation='relu', kernel_initializer='he_uniform',))
model_f.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_f.add(Dropout(0.5))
model_f.add(Dense(1))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model_f.compile(optimizer='rmsprop',
              loss=rmsle,
              metrics=[rmsle])

history_f = model_f.fit(X_train.values, y_form.values,
          epochs=10,
          batch_size=10)

score_f = model_f.evaluate(X_test.values, y_form_test.values, batch_size=10)
print("Test ", model_f.metrics_names[1], ": ", score_f[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test  rmsle :  0.168216431669


In [76]:
model_b = Sequential()
model_b.add(Dense(30, activation='relu', kernel_initializer='he_uniform', input_dim=X_train.shape[1]))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(30, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(20, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(20, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(10, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(10, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(5, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(5, activation='relu', kernel_initializer='he_uniform',))
model_b.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True))
model_b.add(Dropout(0.5))
model_b.add(Dense(1))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model_b.compile(optimizer='rmsprop',
              loss=rmsle,
              metrics=[rmsle])

history_b = model_b.fit(X_train.values, y_band.values,
          epochs=10,
          batch_size=10)

score_b = model_b.evaluate(X_test.values, y_band_test.values, batch_size=1)
print("Test ", model_b.metrics_names[1], ": ", score_b[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test  rmsle :  0.277089487016


In [77]:
print((score_f[1] + score_b[1])/2)

0.222652959342


In [78]:
submit_pred_form = model_f.predict(X_submt.values)
submit_pred_band = model_b.predict(X_submt.values)


submission = np.concatenate((submit_pred_form.reshape(600,1), submit_pred_band.reshape(600,1)), axis=1)
submit_df = pd.DataFrame(submission, columns=['formation_energy_ev_natom', "bandgap_energy_ev"])
submit_df[submit_df < 0] = 0
submit_df.insert(0, 'id', range(1, 601))

# Save to file
submit_df.to_csv("./k_dnn_1.csv", index=False)