In [21]:
import tensorflow as tf
import numpy as np
import time

class CharNet(object):
    """docstring for CharNet."""
    def __init__(self, conv_layers,
                        fc_layers,
                        l0,
                        alphabet_size,
                        encoder,
                        **args
    ):
        super(CharNet, self).__init__()
        tf.set_random_seed(time.time())
        self.l0 = l0
        self.conv_layers = conv_layers
        self.fc_layers = fc_layers
        self.alphabet_size = alphabet_size

        initializer = tf.contrib.layers.xavier_initializer()

        with tf.name_scope('Input'):
            self.input_num = tf.placeholder(tf.float32, shape=[None, 6],
                                           name='input_num')
            self.input_x = tf.placeholder(tf.int64, shape=[None, self.l0],
                                          name='input_x')
            self.input_y = tf.placeholder(tf.float32, shape=[None, 1],
                                          name='input_y')
            self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                    name='dropout_keep_prob')

        with tf.name_scope('Embedding'), tf.device('/cpu:0'):
            x = tf.nn.embedding_lookup(encoder, self.input_x)
            x = tf.expand_dims(x, -1)

        # Configure conv layers
        for i, layer_params in enumerate(conv_layers):
            with tf.name_scope("Convolution"):
                filter_param = [
                    layer_params[1],
                    x.get_shape()[2].value, # l0
                    x.get_shape()[3].value, # channels
                    layer_params[0]
                ]
                W = tf.Variable(initializer(filter_param), dtype='float32', name='filter')

                conv_layer = tf.nn.conv2d(x, W, [1, 1, 1, 1], 'VALID', name='conv')
                conv_layer = tf.nn.relu(conv_layer, name='act_relu')

            if not layer_params[-1] is None:
                with tf.name_scope("MaxPooling"):
                    pool_layer = tf.nn.max_pool(conv_layer,
                                            ksize=[1, layer_params[-1], 1, 1],
                                            strides=[1, layer_params[-1], 1, 1],
                                            padding='VALID')
                    x = tf.transpose(pool_layer, [0, 1, 3, 2])
            else:
                x = tf.transpose(conv_layer, [0, 1, 3, 2])

        # flatten conv output for fc
        with tf.name_scope("Flatten"):
            x = tf.contrib.layers.flatten(x)

        with tf.name_scope("Concat"):
            x = tf.concat([x, self.input_num], axis=1)

        # Configure fc layers
        for i, layer_units in enumerate(fc_layers):
            with tf.name_scope("FullyConnected"):
                W = tf.Variable(initializer([x.get_shape()[-1].value, layer_units]),
                                dtype='float32', name='W')
                b = tf.Variable(initializer([layer_units]),
                                dtype='float32', name='W')
                x = tf.nn.xw_plus_b(x, W, b, name='fully-connected')
                x = tf.nn.relu(x)

            with tf.name_scope("Dropout"):
                x = tf.nn.dropout(x, self.dropout_keep_prob)

        with tf.name_scope("Output"):
            W = tf.Variable(initializer([x.get_shape()[-1].value, 1]),
                            dtype='float32', name='W')
            b = tf.Variable(initializer([1]),
                            dtype='float32', name='W')
            self.yhat = tf.nn.xw_plus_b(x, W, b, name='output')

        with tf.name_scope("Loss"):
#             yhat = tf.log(self.yhat + 1)
#             y = tf.log(self.input_y + 1)
#             diff = yhat - y
#             self.loss = tf.sqrt(tf.reduce_mean(tf.square(diff)))
            yhat = tf.reshape(self.yhat, [-1])
            y = tf.reshape(self.input_y, [-1])
            self.loss = tf.keras.metrics.mean_squared_logarithmic_error(yhat, y)
#       

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

class Data(object):
    def __init__(self, file_path, alstr, is_dev=False, batch_size=128, **args):
        self.alstr = alstr
        self.is_dev = is_dev
        self.batch_size = batch_size
        self.raw_data = pd.read_csv(file_path, sep='\t')

        self.alphabet = self.make_alphabet(self.alstr)
        self.encoder, self.e_dict = self.one_hot_encoder(self.alphabet)
        self.alphabet_size = len(self.alphabet)

        self.x, self.y = self.format_data(self.raw_data)
        self.input_x = self.x['desc_vecs'].values
        self.input_num = self.x.drop(['desc_vecs'], axis=1).values
            
    def shuffling(self):
        shuffle_indices = np.random.permutation(np.arange(len(self.input_x)))
        self.input_x = self.input_x[shuffle_indices]
        self.input_num = self.input_num[shuffle_indices]
        self.y = self.y[shuffle_indices]

    def next_batch(self, batch_num):
        data_size = len(self.input_x)
        start = batch_num * self.batch_size
        end = min((batch_num + 1) * self.batch_size, data_size)
        batch_x = self.input_x[start:end]
        batch_num = self.input_num[start:end]
        batch_y = self.y[start:end]
        return batch_x, batch_num, batch_y

    def process_full_description(self, df):
        df['desc_vecs'] = df['item_description'].apply(
                lambda x: self.doc_process(x, self.e_dict)
        )
        return df

    def categorinizer(self, df,
                      col_lists=[
                          'brand_name',
                          'general_cat',
                          'subcat_1',
                          'subcat_2'
                      ]):
        for col in col_lists:
            df[col] = df[col].apply(lambda x: str(x))
            encoder = LabelEncoder()
            encoder.fit(df[col])
            df[col] = encoder.transform(df[col])
            del encoder

        return df

    def split_cat(self, text):
        try: return text.split("/")
        except: return ("No Label", "No Label", "No Label")

    def format_data(self, df):
        df['general_cat'], df['subcat_1'], df['subcat_2'] = \
            zip(*df['category_name'].apply(lambda x: self.split_cat(x)))

        df = self.categorinizer(df)
        # remove missing values in item description
        df = df[pd.notnull(df['item_description'])]
        df = self.process_full_description(df)
        df['item_description'] = df['name'] + ' ' + df['item_description']
        df = df.drop(columns=['name', 'category_name', 'item_description'])
        if self.is_dev:
            df = df.drop(columns=['test_id'])
            price = None
            features = df
        else:
            df = df.drop(columns=['train_id'])
            price = df['price'].values
            features = df.drop(columns=['price'])

        return features, price

    def one_hot_encoder(self, alphabet):
        encoder_dict = {}
        encoder = []

        encoder_dict['UNK'] = 0
        encoder.append(np.zeros(len(alphabet), dtype='float32'))

        for i, alpha in enumerate(alphabet):
            onehot = np.zeros(len(alphabet), dtype='float32')
            encoder_dict[alpha] = i + 1
            onehot[i] = 1
            encoder.append(onehot)

        encoder = np.array(encoder, dtype='float32')
        return encoder, encoder_dict

    def doc_process(self, desc, e_dict, l=200):
        desc = desc.strip().lower()
        min_len = min(l, len(desc))
        doc_vec = np.zeros(l, dtype='int64')
        for j in range(min_len):
            if desc[j] in e_dict:
                doc_vec[j] = e_dict[desc[j]]
            else:
                doc_vec[j] = e_dict['UNK']
        return doc_vec

    def make_alphabet(self, alstr):
        return [char for char in alstr]

# train = Data(train_file, config.alstr)


In [3]:
class CharNetConfig(object):
    def __init__(self, params=None):
        if params is None:
            self.conv_layers = [
                    [256, 7, 3],
                    [256, 7, 3],
                    [256, 3, None],
                    [256, 3, None],
                    [256, 3, None],
                    [256, 3, 3]
                ]
            self.fc_layers = [1024, 1024]
            self.l0 = 200
            self.alstr = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’’’/\|_@#$%ˆ&* ̃‘+-=<>()[]{}]'
            self.alphabet_size = len(self.alstr)
        else:
            self.conv_layers = params['conv_layers']
            self.fc_layers = params['fc_layers']
            self.l0 = params['l0']
            slef.alstr = params['alstr']

In [4]:
import tensorflow as tf
import os, time

def main(train_file, dev_file, config):
    with tf.Session() as sess:
        charnet = CharNet(config.conv_layers, config.fc_layers, config.l0, config.alphabet_size, train_data.encoder)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads = optimizer.compute_gradients(charnet.loss)
        train_op = optimizer.apply_gradients(grads)

        global_step = tf.Variable(0, trainable=False)

        # Summaries for grads
    #     grad_summaries = []
    #     for g, v in grads:
    #         if g is not None:
    #             grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
    #             sparsity_summary = tf.summary.histogram("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
    #             grad_summaries.append(grad_hist_summary)
    #             grad_summaries.append(sparsity_summary)

    #     grad_summaries_merged = tf.summary.merge(grad_summaries)

        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", charnet.loss)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables())

        sess.run(tf.global_variables_initializer())

        def train_step(num_batch, x_batch, y_batch):
            """
            A single training step
            """
            y_batch = np.reshape(y_batch, (-1, 1))
            mm = x_batch.tolist()
            x_batch = np.array([k.tolist() for k in mm])
            print(x_batch.shape)
            feed_dict = {
                charnet.input_num: num_batch,
                charnet.input_x: x_batch,
                charnet.input_y: y_batch,
                charnet.dropout_keep_prob: .5
            }

            _, step, summaries, loss, accuracy = sess.run(
                [train_op,
                 global_step,
                 train_summary_op,
                 charnet.loss],
                feed_dict
            )

            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def predict_on_test(num_batch, x_batch, results):
            feed_dict = {
                charnet.input_num: num_batch,
                charnet.input_x: x_batch,
                charnet.dropout_keep_prob: 1.0
            }

            results.append(sess.run([charnet.yhat], feed_dict))

        for epoch in range(100):
            train_data.shuffling()
            for i in range(int(len(train_data.y)/train_data.batch_size) + 1):
                input_x, input_num, y = train_data.next_batch(i)
                train_step(input_num, input_x, y)
                current_step = tf.train.global_step(sess, global_step)

                if current_step % 100 == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))

        results = []
        predict_on_test(dev_data.input_num, dev_data.input_x)
        submission = pd.DataFrame([[i for i in range(len(results))], results], columns=['id', 'price'])
        submission.to_csv('./submission.csv')

In [5]:
train_file = './train.tsv'
dev_file = './test.tsv'
config = CharNetConfig()

In [6]:
train_data = Data(train_file, config.alstr, is_dev=False)
dev_data = Data(dev_file, config.alstr, is_dev=True)
config = CharNetConfig()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [22]:
import os, time, datetime
with tf.Session() as sess:
    charnet = CharNet(config.conv_layers,
                      config.fc_layers,
                      config.l0,
                      config.alphabet_size,
                      train_data.encoder)
    optimizer = tf.train.AdamOptimizer(1e-3)
    grads = optimizer.compute_gradients(charnet.loss)
    train_op = optimizer.apply_gradients(grads)
    
    train_op = optimizer.minimize(charnet.loss)

    global_step = tf.Variable(0, trainable=False)

    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))

    # Summaries for loss and accuracy
    loss_summary = tf.summary.scalar("loss", charnet.loss)

    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables())

    sess.run(tf.global_variables_initializer())

    def train_step(num_batch, x_batch, y_batch, step):
        """
        A single training step
        """
        y_batch = np.reshape(y_batch, (-1, 1))
        mm = x_batch.tolist()
        x_batch = np.array([k.tolist() for k in mm])
        
        feed_dict = {
            charnet.input_num: num_batch,
            charnet.input_x: x_batch,
            charnet.input_y: y_batch,
            charnet.dropout_keep_prob: .5
        }
        
        _, summaries, loss = sess.run(
            [train_op,
             train_summary_op,
             charnet.loss],
            feed_dict=feed_dict
        )
#         yhat, y = sess.run([charnet.yhat, charnet.input_y], feed_dict=feed_dict)
#         print(yhat.shape)
#         print(y.shape)
#         time_str = datetime.datetime.now().isoformat()
        print("step {}, loss {:g}".format(step, loss))
        train_summary_writer.add_summary(summaries, step)

    def predict_on_test(num_batch, x_batch, results):
        mm = x_batch.tolist()
        x_batch = np.array([k.tolist() for k in mm])
        feed_dict = {
            charnet.input_num: num_batch,
            charnet.input_x: x_batch,
            charnet.dropout_keep_prob: 1.0
        }
        
        results.append(sess.run([charnet.yhat], feed_dict=feed_dict))
    
#     input_x, input_num, y = train_data.next_batch(i)
#     train_step(input_num, input_x, y, 0)

    for epoch in range(100):
        train_data.shuffling()
        for i in range(int(len(train_data.y)/train_data.batch_size) + 1):
            input_x, input_num, y = train_data.next_batch(i)
            train_step(input_num, input_x, y, i)
            current_step = tf.train.global_step(sess, global_step)

            if i % 1000 == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

#     results = []
#     predict_on_test(dev_data.input_num, dev_data.input_x, results)
#     submission = pd.DataFrame([[i for i in range(len(results))], results], columns=['id', 'price'])
#     submission.to_csv('./submission.csv')

Writing to /home/arac/aramac/CharNet/runs/1514534076

step 0, loss nan
Saved model checkpoint to /home/arac/aramac/CharNet/runs/1514534076/checkpoints/model-0

step 1, loss nan
step 2, loss nan
step 3, loss nan
step 4, loss nan
step 5, loss nan
step 6, loss nan
step 7, loss nan
step 8, loss nan
step 9, loss nan
step 10, loss nan
step 11, loss nan
step 12, loss nan
step 13, loss nan
step 14, loss nan
step 15, loss nan
step 16, loss nan
step 17, loss nan
step 18, loss nan
step 19, loss nan
step 20, loss nan
step 21, loss nan
step 22, loss nan
step 23, loss nan
step 24, loss nan
step 25, loss nan
step 26, loss nan
step 27, loss nan
step 28, loss nan
step 29, loss nan
step 30, loss nan
step 31, loss nan
step 32, loss nan
step 33, loss nan
step 34, loss nan
step 35, loss nan
step 36, loss nan
step 37, loss nan
step 38, loss nan
step 39, loss nan
step 40, loss nan
step 41, loss nan
step 42, loss nan
step 43, loss nan
step 44, loss nan
step 45, loss nan
step 46, loss nan
step 47, loss nan
ste

KeyboardInterrupt: 

In [None]:
results