In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

train_path = './data/train.csv'
test_path = './data/test.csv'

In [10]:
class get_data:
    def __init__(self, path, is_train=True):
        self.df = pd.read_csv(path)
        if is_train:
            self.get_ts_vs()
        
    def get_ts_vs(self):
        # split df: feature & label
        df_y = self.df['label']
        df_x = self.df.drop('label', axis=1)
        # split df: training set & validation set
        self.ts_f, self.vs_f, self.ts_l, self.vs_l = \
        train_test_split(df_x, df_y, test_size=0.1, random_state=0)
        # normalize
#         self.ts_f, self.ts_l = self.normalize_inputs(self.ts_f, self.ts_l)
#         self.vs_f, self.vs_l = self.normalize_inputs(self.vs_f, self.vs_l)
        
    def normalize_inputs(self, x, y):
        x = x.values / 255
        return x, y

In [30]:
class create_model:
    def __init__(
        self,
        save_model=True,
        save_model_path='./models/estimator/default'
    ):
        self.save_model = save_model
        self.save_model_path = save_model_path
        
        self.create_checkpoint_config()
        self.create_feature_columns()
        
        
        model_arg = dict(
            feature_columns=self.feature_columns,
            hidden_units=[800],
            optimizer=tf.train.AdamOptimizer(1e-4),
            n_classes=10,
            dropout=0.1,
            config=self.checkpoint_config
        )
        if self.save_model:
            model_arg['model_dir'] = self.save_model_path
        self.model = tf.estimator.DNNClassifier(**model_arg)
        print('create model!')
        
    def create_checkpoint_config(self):
        self.checkpoint_config = tf.estimator.RunConfig(
            save_checkpoints_secs = 5 * 60,
            keep_checkpoint_max = 10
        )
        
    def create_feature_columns(self):
        self.feature_columns = [tf.feature_column.numeric_column(key="image", shape=(784, ))]
        
    def train(
        self,
        ts_f,
        ts_l,
        vs_f,
        vs_l
    ):
        self.ts_f = ts_f
        self.ts_l = ts_l
        self.vs_f = vs_f
        self.vs_l = vs_l
        
        self.create_train_spec()
        self.create_eval_spec()
        
        tf.estimator.train_and_evaluate(
            estimator=self.model,
            train_spec=self.train_spec,
            eval_spec=self.eval_spec
        )
        
    def create_train_spec(self):
        self.train_spec = tf.estimator.TrainSpec(
            input_fn=lambda: self.my_input_fn(self.ts_f, self.ts_l, 64, True),
            max_steps=150,
            hooks=None
        )
        
    def create_eval_spec(self):
        self.eval_spec = tf.estimator.EvalSpec(
            input_fn=lambda: self.my_input_fn(self.vs_f, self.vs_l, 64)
        )
        
    def my_input_fn(self, train_X, train_y, batch_size=64, shuffle=False):
        dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y))

        dataset = dataset.map(lambda x,y: self.preprocess_data(x, y))

        if shuffle:
            dataset = dataset.shuffle(buffer_size=128)

        dataset = dataset.batch(batch_size)

        itr = dataset.make_one_shot_iterator()
        features, target = itr.get_next()

        return features, target
    
    def preprocess_data(self, x, y):
        labels = tf.cast(y, tf.int32)
        input_data = tf.cast(x, tf.float32)
        return (dict({'image': input_data}), labels)
    
    def predict(self, test_df):
        test_x = test_df.values
        test_x = test_x / 255
        test_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"image": test_x},
            num_epochs=1,
            shuffle=False,
        )
        
        pred = self.model.predict(input_fn=test_input_fn)
        predicted_classes = [int(p["classes"]) for p in pred]
                
        output = pd.DataFrame(predicted_classes)
        output.index.name = 'ImageId'
        output = output.rename(columns = {0: 'Label'}).reset_index()
        output['ImageId'] = output['ImageId'] + 1
        self.result = output
    
    def save_result(self, path='./result/estimator/submission.csv'):
        self.result.to_csv(path, index=False)

In [17]:
train = get_data(train_path)
train.df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
model = create_model(
    save_model=False
)

INFO:tensorflow:Using config: {'_model_dir': '/var/folders/6k/5788fpjx36bfyc7y16wqyn7c0000gn/T/tmpkj10bglf', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 300, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 10, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10cb73b70>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
create model!


In [32]:
model.train(
    train.ts_f,
    train.ts_l,
    train.vs_f,
    train.vs_l
)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 300.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/6k/5788fpjx36bfyc7y16wqyn7c0000gn/T/tmpkj10bglf/model.ckpt.
INFO:tensorflow:loss = 9785.049, step = 1
INFO:tensorflow:global_step/sec: 6.01527
INFO:tensorflow:loss = 525.4099, step = 101 (16.600 se

In [34]:
test = get_data(test_path, is_train=False)
test.df.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
model.predict(test.df)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/6k/5788fpjx36bfyc7y16wqyn7c0000gn/T/tmpkj10bglf/model.ckpt-150
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


In [37]:
model.save_result()