## Here is walk-through to help getting started with tensorflow

1) Simple Linear Regression with low-level TensorFlow  
2) Simple Linear Regression with a canned estimator  
3) Playing with real data: linear regressor and DNN  
4) Building a custom estimator to classify handwritten digits (MNIST)

### What's next? https://goo.gl/hZaLPA

## Dependencies

In [None]:
import collections

# tensorflow
import tensorflow as tf
print('Expected TensorFlow version is v1.3.0 or higher')
print('Your TensorFlow version:', tf.__version__)
import numpy as np
import pandas as pd

# visualization
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = [12,8]

## 1) Simple Linear Regression with low-level TensorFlow

### Generating data

This function creates a noisy dataset that's roughly linear, according to the equation y = mx + b + noise.

Notice that the expected value for m is 0.1 and for b is 0.3. This is the values we expect the model to predict.

In [None]:
def make_noisy_data(m=0.1, b=0.3, n=100):
    x = np.random.randn(n)
    noise = np.random.normal(scale=0.01, size=len(x))
    y = m * x + b + noise
    return x, y

Create training and testing data

In [None]:
x_train, y_train = make_noisy_data()
x_test, y_test = make_noisy_data()

Plot the training and testing data

In [None]:
plt.plot(x_train, y_train, 'b.')
plt.plot(x_test, y_test, 'g.')

### The Model

In [None]:
# input and output
x = tf.placeholder(shape=[None], dtype=tf.float32, name='x')
y_label = tf.placeholder(shape=[None], dtype=tf.float32, name='y_label')

# variables
W = tf.Variable(tf.random_normal([1], name="W")) # weight
b = tf.Variable(tf.random_normal([1], name="b")) # bias

# actual model
y = W * x + b

### The Loss and Optimizer

Define a loss function (here, squared error) and an optimizer (here, gradient descent).

In [None]:
loss = tf.reduce_mean(tf.square(y - y_label))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)

### The Training Loop and generating predictions

In [None]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
  sess.run(init) # initialize variables
  for i in range(100): # train for 100 steps
    sess.run(train, feed_dict={x: x_train, y_label:y_train})

  fake_data = np.linspace(-3, 3, 101)
  # using the trained model to predict values for the training data
  pred_train = sess.run(y, feed_dict={x: fake_data})

### Visualizing predictions

In [None]:
# plot training
plt.scatter(x_train, y_train)
plt.plot(fake_data, pred_train, 'g')

### What is the final weight and bias?

In [None]:
print('W:', final_W, 'expected: 0.1')
print('b:', final_b, 'expected: 0.3')

## 2) Simple Linear Regression with a canned estimator  

### Input Pipeline

In [None]:
x_dict = {'x': x_train}
train_input = tf.estimator.inputs.numpy_input_fn(x_dict, y_train,
                                                 shuffle=True,
                                                 num_epochs=None) # repeat forever

### Describe input feature usage

In [None]:
features = [tf.feature_column.numeric_column('x')] # because x is a real number

### Build and train the model

In [None]:
estimator = tf.estimator.LinearRegressor(features)
estimator.train(train_input, steps = 1000)

### Make some predictions with the model

In [None]:
x_test_dict = {'x': np.linspace(-5, 5, 11)}
data_source = tf.estimator.inputs.numpy_input_fn(x_test_dict, shuffle=False)

for y in estimator.predict(data_source):
    print(y['predictions'])

## 3) Playing with real data: linear regressor and DNN  

### Get the data

In [None]:
census_train_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
census_train_path = tf.contrib.keras.utils.get_file('census.train', census_train_url)

census_test_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
census_test_path = tf.contrib.keras.utils.get_file('census.test', census_test_url)

### Load the data

In [None]:
column_names = [
  'age', 'workclass', 'fnlwgt', 'education', 'education-num',
  'marital-status', 'occupation', 'relationship', 'race', 'sex',
  'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
  'income'
]

census_train = pd.read_csv(census_train_path, index_col=False, names=column_names) 
census_test = pd.read_csv(census_train_path, index_col=False, names=column_names) 

census_train_label = census_train.pop('income') == " >50K" 
census_test_label = census_test.pop('income') == " >50K"

In [None]:
census_train.head(10)

### Input pipeline

In [None]:
train_input = tf.estimator.inputs.pandas_input_fn(
    census_train, 
    census_train_label,
    shuffle=True, 
    num_epochs=None)

In [None]:
features, labels = train_input()
features

### Feature description

In [None]:
features = [
    tf.feature_column.numeric_column('education-num'),
    tf.feature_column.numeric_column('hours-per-week'),
    tf.feature_column.bucketized_column(tf.feature_column.numeric_column('age'), list(range(0,100,10))),
    tf.feature_column.categorical_column_with_vocabulary_list('sex',['male','female']),
    tf.feature_column.categorical_column_with_hash_bucket('native-country', 1000),
]

In [None]:
estimator = tf.estimator.LinearClassifier(features, n_classes=2, model_dir='census/linear')

In [None]:
estimator.train(train_input, steps=5000)

### Evaluate the model

In [None]:
test_input = tf.estimator.inputs.pandas_input_fn(
    census_test, 
    census_test_label, 
    shuffle=True, 
    num_epochs=1)

estimator.evaluate(test_input)

### Examine the results with [TensorBoard](http://0.0.0.0:6006)

In [None]:
%%bash
tensorboard --logdir census

## DNN model

### Update input pre-processing

In [134]:
features = [
    tf.feature_column.numeric_column('education-num'),
    tf.feature_column.numeric_column('hours-per-week'),
    tf.feature_column.bucketized_column(tf.feature_column.numeric_column('age'), list(range(0,100,10))),
    tf.feature_column.categorical_column_with_vocabulary_list('sex',['male','female']),
    tf.feature_column.embedding_column(
        tf.feature_column.categorical_column_with_hash_bucket('native-country', 1000), 0)
]

ValueError: Invalid dimension 0.

In [135]:
estimator = tf.estimator.DNNClassifier(features, n_classes=2, model_dir='census/linear')

AttributeError: 'module' object has no attribute 'DNNClassifier'

## Linear Regression - Custom Input Pipeline using Datasets API

### Get the data

In [None]:
imports85_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data'
imports85_path = tf.contrib.keras.utils.get_file('imports85.data', imports85_url)

### Read the data

In [None]:
def imports_85():
    def remove_question_marks(line):
        return tf.py_func(lambda x: "?" not in x, [line], tf.bool)
    
    dataset = (
        tf.contrib.data.TextLineDataset(imports85_path)
            .filter(remove_question_marks)
            .map(csv_decoder)
            .shuffle(buffer_size=100)
            .batch(32)
            .repeat())
    
    columns = dataset.make_one_shot_iterator().get_next()
    
    price = columns.pop('price')
    
    return columns, price/1000

In [None]:
csv_defaults = collections.OrderedDict([
    ('symboling', [0]),
    ('normalized-losses', [0.]),
    ('make', [""]),
    ('fuel-type', [""]),
    ('aspiration', [""]),
    ('num-of-doors', [""]),
    ('body-style', [""]),
    ('drive-wheels', [""]),
    ('engine-location', [""]),
    ('wheel-base', [0.]),
    ('length', [0.]),
    ('width', [0.]),
    ('height', [0.]),
    ('curb-weight', [0.]),
    ('engine-type', [""]),
    ('num-of-cylynders', [""]),
    ('engine-size', [0.]),
    ('fuel-system', [""]),
    ('bore', [0.]),
    ('stroke', [0.]),
    ('compression-ratio', [0.]), 
    ('horsepower', [0.]),
    ('peak-rpm', [0.]),
    ('city-mpg', [0.]),
    ('highway-mpg', [0.]),
    ('price', [0.])])

In [None]:
def csv_decoder(line):
  parsed = tf.decode_csv(line, csv_defaults.values())

  return dict(zip(csv_defaults.keys(), parsed))
  

### Try the input function

In [None]:
tf.reset_default_graph()
training_batch = imports_85()

In [None]:
with tf.Session() as sess:
    features, price = sess.run(training_batch)

In [None]:
print(features['highway-mpg'])

In [None]:
print(features['body-style'])

In [None]:
print(price)

### Input Processing

In [None]:
features = [
    tf.feature_column.numeric_column('curb-weight'),
    tf.feature_column.numeric_column('highway-mpg'),
    tf.feature_column.categorical_column_with_vocabulary_list(
        'body-style', ['sedan', 'hatchback', 'wagon','hardtop','convertible']),
    tf.feature_column.categorical_column_with_hash_bucket('make', 256)
]

### Build the model

In [None]:
estimator = tf.estimator.LinearRegressor(features)
estimator.train(imports_85, steps = 10000)

In [None]:
2.8236e+08**0.5

## 4) Building a custom estimator to classify handwritten digits (MNIST)