In [67]:
import numpy as np
import pandas as pd
from sklearn import model_selection
from sklearn import metrics
import tensorflow as tf

In [102]:
train_features = pd.read_csv('dengue_features_train.csv',
                             index_col=[0,1,2])
train_labels = pd.read_csv('dengue_labels_train.csv',
                          index_col=[0,1,2])

In [72]:

def preprocess_data(data, labels):
    
    df = data
    
    # select features we want
    features = ['reanalysis_specific_humidity_g_per_kg', 
                 'reanalysis_dew_point_temp_k', 
                 'station_avg_temp_c', 
                 'station_min_temp_c']
    df = df[features]
    
    # fill missing values
    df.fillna(method='ffill', inplace=True)


    dfl = labels
    
    
    # separate san juan and iquitos
    sjfeats = df.loc['sj']
    iqfeats = df.loc['iq']
    
    sjlabs = dfl.loc['sj']
    iqlabs = dfl.loc['iq']
    
    return sjfeats, iqfeats, sjlabs, iqlabs


In [73]:
sj_train, iq_train, sj_target, iq_target = preprocess_data(train_features, train_labels)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [74]:
sjx_train, sjx_test, sjy_train, sjy_test = model_selection.train_test_split(sj_train, sj_target, 
                                                                    test_size=0.2, random_state=42)

iqx_train, iqx_test, iqy_train, iqy_test = model_selection.train_test_split(iq_train, iq_target, 
                                                                    test_size=0.2, random_state=42)

In [80]:
sj_feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(sjx_train)

sj_classifier = tf.contrib.learn.DNNRegressor(feature_columns=sj_feature_columns, 
                                            hidden_units=[25, 50, 20, 10],
                                           optimizer=tf.train.ProximalAdagradOptimizer(
                                           learning_rate=0.1,
                                           l1_regularization_strength=0.001)
)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8b59f5e2e8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': None}


In [79]:
iq_feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(iqx_train)

iq_classifier = tf.contrib.learn.DNNRegressor(feature_columns=iq_feature_columns, 
                                            hidden_units=[25, 50, 20, 10],
                                           optimizer=tf.train.ProximalAdagradOptimizer(
                                           learning_rate=0.1,
                                           l1_regularization_strength=0.001)
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8b59bab048>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': None}


In [81]:
sj_classifier.fit(sjx_train, sjy_train, steps=200)


Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Sav

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x7f8b59f5e240>, 'hidden_units': [25, 50, 20, 10], 'feature_columns': (_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float64, normalizer=None),), 'optimizer': <tensorflow.python.training.proximal_adagrad.ProximalAdagradOptimizer object at 0x7f8b59bab128>, 'activation_fn': <function relu at 0x7f8b7cc1e840>, 'dropout': None, 'gradient_clip_norm': None, 'embedding_lr_multipliers': None, 'input_layer_min_slice_size': None})

In [82]:
iq_classifier.fit(iqx_train, iqy_train, steps=200)

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Sav

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x7f8b59f409b0>, 'hidden_units': [25, 50, 20, 10], 'feature_columns': (_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float64, normalizer=None),), 'optimizer': <tensorflow.python.training.proximal_adagrad.ProximalAdagradOptimizer object at 0x7f8b5a3615f8>, 'activation_fn': <function relu at 0x7f8b7cc1e840>, 'dropout': None, 'gradient_clip_norm': None, 'embedding_lr_multipliers': None, 'input_layer_min_slice_size': None})

In [83]:
sj_predictions = list(sj_classifier.predict(sjx_test, as_iterable=True))
sj_score = metrics.mean_absolute_error(sjy_test, sj_predictions)
print('SJ Accuracy: {0:f}'.format(sj_score))

Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from /tmp/tmpesml9bwb/model.ckpt-200
SJ Accuracy: 29.168834


In [84]:
iq_predictions = list(iq_classifier.predict(iqx_test, as_iterable=True))
iq_score = metrics.mean_absolute_error(iqy_test, iq_predictions)
print('IQ Accuracy: {0:f}'.format(iq_score))

Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from /tmp/tmpt485j0eg/model.ckpt-200
IQ Accuracy: 6.345013


In [85]:
def preprocess_data_test(data):
    
    df = data
    
    # select features we want
    features = ['reanalysis_specific_humidity_g_per_kg', 
                 'reanalysis_dew_point_temp_k', 
                 'station_avg_temp_c', 
                 'station_min_temp_c']
    df = df[features]
    
    # fill missing values
    df.fillna(method='ffill', inplace=True)

    
    
    # separate san juan and iquitos
    sjfeats = df.loc['sj']
    iqfeats = df.loc['iq']

    return sjfeats, iqfeats

In [86]:
test_features = pd.read_csv('dengue_features_test.csv',
                          index_col=[0,1,2])

In [87]:
sj_test, iq_test = preprocess_data_test(test_features)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [101]:
sj_test_pred = list(sj_classifier.predict(sj_test, as_iterable=True))
iq_test_pred = list(iq_classifier.predict(iq_test, as_iterable=True))

sj_test_pred = list(map(int, sj_test_pred))
iq_test_pred = list(map(int, iq_test_pred))

Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from /tmp/tmpesml9bwb/model.ckpt-200
Instructions for updating:
Please switch to predict_scores, or set `outputs` argument.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from /tmp/tmpt485j0eg/model.ckpt-200


156

In [99]:
submission = pd.read_csv("submission_format.csv",
                         index_col=[0, 1, 2])

submission.total_cases = np.concatenate([sj_test_pred, iq_test_pred])
submission.to_csv("submission1.csv")