In [17]:
#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import pandas
import sklearn

from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

import numpy as np
import pandas
from sklearn import metrics
import tensorflow as tf
import tensorflow.contrib as contrib

FLAGS = None

MAX_DOCUMENT_LENGTH = 10
EMBEDDING_SIZE = 50
n_words = 0

def bag_of_words_model(features, target):
  """A bag-of-words model. Note it disregards the word order in the text."""
  target = tf.one_hot(target, 15, 1, 0)
  features = tf.contrib.layers.bow_encoder(
      features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
  logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None)
  loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
  train_op = tf.contrib.layers.optimize_loss(
      loss,
      tf.contrib.framework.get_global_step(),
      optimizer='Adam',
      learning_rate=0.01)
  return ({
      'class': tf.argmax(logits, 1),
      'prob': tf.nn.softmax(logits)
  }, loss, train_op)


def sequence_model(features, target):
  # Convert indexes of words into embeddings.  
  # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and
  # then maps word indexes of the sequence into [batch_size, 
  # sequence_length, EMBEDDING_SIZE].  
  word_vectors = tf.contrib.layers.embed_sequence(      
    features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE, scope='words') 

  # Split into list of embedding per word, while removing doc length
  # dim. word_list results to be a list of tensors [batch_size, 
  # EMBEDDING_SIZE].  
  word_list = tf.unstack(word_vectors, axis=1)

  prediction = tf.contrib.layers.fully_connected(word_list, 15, activation_fn=None)
  loss = tf.contrib.losses.mean_squared_error(prediction, target)

  # Create a training op.
  train_op = tf.contrib.layers.optimize_loss(
      loss,
      tf.contrib.framework.get_global_step(),
      optimizer='Adam',
      learning_rate=0.01)

  return ({
      'class': tf.argmax(logits, 1),
      'prob': tf.nn.softmax(logits)
  }, loss, train_op)


def main(unused_argv):
  global n_words

  print(tf.__version__)
  # Prepare training and testing data
  data = pandas.read_csv('namefind_listings_20161109.csv')
  print(data.shape) 
  print(data.columns)
  y, X = data['price'], data[['domain','words','tld']]
  X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=0.33, random_state=42)

  # Process vocabulary
  vocab_processor = learn.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
  x_train = np.array(list(vocab_processor.fit_transform(X_train)))
  x_test = np.array(list(vocab_processor.transform(X_test)))
  n_words = len(vocab_processor.vocabulary_)
  print('Total words: %d' % n_words)

  # Build model
  # Switch between rnn_model and bag_of_words_model to test different models.
  model_fn = sequence_model

  if FLAGS.bow_model:
    model_fn = bag_of_words_model
  
  classifier = learn.Estimator(model_fn=model_fn)

  # Train and predict
  classifier.fit(x_train, y_train, steps=100)
  y_predicted = [
      p['class'] for p in classifier.predict(
          x_test, as_iterable=True)
  ]
  score = metrics.accuracy_score(y_test, y_predicted)
  print('Accuracy: {0:f}'.format(score))


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--test_with_fake_data',
      default=False,
      help='Test the example code with fake data.',
      action='store_true')
  parser.add_argument(
      '--bow_model',
      default=False,
      help='Run with BOW model instead of RNN.',
      action='store_true')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

0.12.1
(367102, 4)
Index(['domain', 'words', 'tld', 'price'], dtype='object')
Total words: 4
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_id': 0, '_is_chief': True, '_environment': 'local', 'save_checkpoints_steps': None, '_master': '', 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_task_type': None, 'tf_random_seed': None, 'save_summary_steps': 100, 'keep_checkpoint_every_n_hours': 10000, 'keep_checkpoint_max': 5, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x139110be0>, '_num_ps_replicas': 0, 'save_checkpoints_secs': 600}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled fr

TypeError: DataType float64 for attr 'TI' not in list of allowed values: uint8, int32, int64

In [9]:
!pip install tensorflow

Collecting tensorflow


  Could not find a version that satisfies the requirement tensorflow (from versions: )
No matching distribution found for tensorflow


In [8]:
!pip install --upgrade pip

Requirement already up-to-date: pip in c:\python27\lib\site-packages
