In [26]:
import warnings
from importlib import reload

with warnings.catch_warnings():
    ## ignore future warnings from too-advanced numpy version
    warnings.simplefilter("ignore")    
    from keras.models import Sequential
    from keras.layers import Dense, Activation, Dropout
    from keras.models import load_model
    import tensorflow as tf
import pandas as pd, numpy as np

In [46]:
import energy
_ = reload(energy)
from energy import MissingStrategy, ModelingConfigA

In [None]:
MAX_ROWS = 8*1000*1000
time_cols = ['second', 'minute', 'day', 'week']
weather_cols = [
    'cloud_coverage', 'dew_temperature', 'precip_depth_1_hr', 
    'sea_level_pressure', 'wind_direction', 'wind_speed']
other_cols = ['square_feet', 'meter', 'year_built', 'primary_use']
predictor_cols = time_cols + weather_cols + other_cols
modeling_config = ModelingConfigA(
    paths=energy.ModelingPaths(), missing_strategy=MissingStrategy.USE_COLUMN_MEAN)

In [4]:

try:
    combined_df = energy.Data.Merged(MAX_ROWS).read_train()
except FileNotFoundError:
    weather = energy.Data.Weather(MAX_ROWS)
    meter = energy.Data.Meter(MAX_ROWS)
    buildings = energy.Data.Buildings()
    combined_df = energy.merge_data(
        weather_df=weather.train, 
        meter_df=meter.train,
        buildings_df=buildings.data)
    

In [5]:
if 'minute' not in combined_df.columns:
    energy.extract_time_columns_from_timestamps(combined_df)

In [9]:
# combined_df.to_csv(energy.Data.Merged.train_file, index=False)

In [10]:
combined_df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,minute,second,week,day
0,building_id,meter,timestamp,meter_reading,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,minute,second,week,day
1,0,0,2016-01-01 00:00:00,0.0,0,Education,7432,2008.0,,25.0,6.0,20.0,,1019.7,0.0,0.0,0,0,53,1
2,1,0,2016-01-01 00:00:00,0.0,0,Education,2720,2004.0,,25.0,6.0,20.0,,1019.7,0.0,0.0,0,0,53,1
3,2,0,2016-01-01 00:00:00,0.0,0,Education,5376,1991.0,,25.0,6.0,20.0,,1019.7,0.0,0.0,0,0,53,1
4,3,0,2016-01-01 00:00:00,0.0,0,Education,23685,2002.0,,25.0,6.0,20.0,,1019.7,0.0,0.0,0,0,53,1


In [7]:
X_train, y_train = energy.prepare_data(
    df=combined_df, is_train=True, 
    predictor_cols=predictor_cols, response_col='meter_reading', 
    config=modeling_config)
assert not np.isnan(X_train).any()
assert not np.isnan(y_train).any()

In [9]:
model = Sequential()
def add_relu_with_scaled_dropout(dim):
    model.add(Dense(dim, activation='relu'))
    model.add(Dropout(1 / dim))

model.add(Dense(12, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(1/12))
for dim in (10, 8, 6):
    add_relu_with_scaled_dropout(dim)
model.add(Dense(4, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='relu'))

In [58]:
batch_size = 10000
train_rmsle = energy.define_rmsle(batch_size)#X_train.shape[0])
model.compile(optimizer='Nadam',
              loss='mean_squared_logarithmic_error', metrics=[train_rmsle])

In [59]:
model.fit(X_train, y_train, epochs=6, batch_size=batch_size)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.callbacks.History at 0x7f2f424bf828>

In [16]:
model.save(modeling_config.paths.trained_model)

### Evaluate model.

In [18]:
validation_df = energy.Data.Merged(MAX_ROWS).read_val()

In [19]:
X_val, y_val = energy.prepare_data(
    df=validation_df, is_train=False, 
    predictor_cols=predictor_cols, response_col='meter_reading', 
    config=modeling_config)
assert not np.isnan(X_val).any()
assert not np.isnan(y_val).any()

In [21]:
val_rmsle = energy.define_rmsle(X_val.shape[0])
model = load_model(modeling_config.paths.trained_model, custom_objects={'rmsle': val_rmsle})

In [23]:
y_pred_val = [x[0] for x in model.predict(X_val)]

In [49]:
y_pred_train = [x[0] for x in model.predict(X_train)]

In [55]:
with tf.Session() as sess:
    val_rmsle_score = val_rmsle(y_true=y_val, y_pred=y_pred_val).eval()
    train_rmsle_score = train_rmsle(y_true=y_train, y_pred=y_pred_train).eval()

In [57]:
print(f"""train RMSLE: {train_rmsle_score:.2f}
validation RMSLE: {val_rmsle_score:.2f}""")

train RMSLE: 1.74
validation RMSLE: 2.07
