[Tutorial](https://www.tensorflow.org/tutorials/wide), [source code](https://github.com/tensorflow/models/blob/master/official/wide_deep/wide_deep.py) and [my blog article](https://wordpress.com/post/datafireball.com/4097). 

This notebook mostly revolves around how to read in CSV data as demonstrated in the the tensorflow wide_deep example. 

In [1]:
import tensorflow as tf
import pandas as pd
import collections
import sys
sess = tf.InteractiveSession()

  return f(*args, **kwds)


## OrderedDict

In [2]:
d1 = collections.OrderedDict([
    (1,1), 
    (2,2), 
    (3,3),
    (4,4)
])
print(d1)
d1[5] = 5
print(d1)
d1['5'] = 5
print(d1)

OrderedDict([(1, 1), (2, 2), (3, 3), (4, 4)])
OrderedDict([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
OrderedDict([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), ('5', 5)])


## decode_csv

In [6]:
_CSV_COLUMNS = ['y','x1','x2','x3']

input_text = """
1,,,""
2,5,2.0,"2"
3,6,3.0,"3"
""".strip()
records = input_text.split('\n')
defaults = [[0],[0],[0.0],[""]]
columns = tf.decode_csv(records, record_defaults=defaults)
features = dict(zip(_CSV_COLUMNS, columns))
features

{'x1': <tf.Tensor 'DecodeCSV:1' shape=(3,) dtype=int32>,
 'x2': <tf.Tensor 'DecodeCSV:2' shape=(3,) dtype=float32>,
 'x3': <tf.Tensor 'DecodeCSV:3' shape=(3,) dtype=string>,
 'y': <tf.Tensor 'DecodeCSV:0' shape=(3,) dtype=int32>}

In [7]:
for k, v in features.items():
    print(k, v.eval())

y [1 2 3]
x1 [0 5 6]
x2 [ 0.  2.  3.]
x3 [b'' b'2' b'3']


In [4]:
labels = features.pop('y')
features

{'x1': <tf.Tensor 'DecodeCSV:1' shape=(3,) dtype=int32>,
 'x2': <tf.Tensor 'DecodeCSV:2' shape=(3,) dtype=float32>,
 'x3': <tf.Tensor 'DecodeCSV:3' shape=(3,) dtype=string>}

In [5]:
print(labels)
print(labels.eval())
print(tf.equal(labels, 2).eval())

Tensor("DecodeCSV:0", shape=(3,), dtype=int32)
[1 2 3]
[False  True False]


## dict zip

In [6]:
z = zip([1,'k1','k2'], [1,'v1','v2'])
print(list(z))
print(list(z))
z = zip([1,'k1','k2'], [1,'v1','v2'])
#https://stackoverflow.com/questions/19777612/python-range-and-zip-object-type
#In Python3 z will not consume memory until needed as a generator on demand
print(z)
print(dict(z))
print(dict(zip([1,'k1','k2'], [1,'v1','v2'])))

[(1, 1), ('k1', 'v1'), ('k2', 'v2')]
[]
<zip object at 0x1818aa9888>
{1: 1, 'k1': 'v1', 'k2': 'v2'}
{1: 1, 'k1': 'v1', 'k2': 'v2'}


## TextLineDataset

In [2]:
data = pd.DataFrame({
    'y': [1,2,3],
    'x1':[4,5,6],
    'x2':[7.0,8.0,9.0],
    'x3':['ten','eleven','twelve']
})
file_path = 'tmp.csv'
data.to_csv(file_path, index=False, header=False)
data

Unnamed: 0,x1,x2,x3,y
0,4,7.0,ten,1
1,5,8.0,eleven,2
2,6,9.0,twelve,3


In [20]:
def parse_csv(line):
    _CSV_COLUMNS = ['x1','x2','x3','y']
    defaults = [[0],[0.0],[''],[0]]
    columns = tf.decode_csv(line, record_defaults=defaults)
    features = dict(zip(_CSV_COLUMNS, columns))
    labels = features.pop('y')
    return features, tf.equal(labels, 3)

dataset = tf.data.TextLineDataset(file_path)
dataset = dataset.map(parse_csv)

iterator = dataset.make_one_shot_iterator()
#https://stackoverflow.com/questions/48029704/tensorflow-parse-csv-iterator-shift-by-one-row
record = iterator.get_next()

while(1):
    try:
        # session.run will advance the iterator to get next
        # if you do features, labels = record
        # and then sess.run(features), sess.run(label) separately, that is going to advance unexpectedly.
        features, labels = sess.run(record)
        for k,v in features.items():
            print(k, v)
        print(labels)
    except:
        print(sys.exc_info())
        break
    finally:
        print('-'*50)

x1 4
x2 7.0
x3 b'ten'
False
--------------------------------------------------
x1 5
x2 8.0
x3 b'eleven'
False
--------------------------------------------------
x1 6
x2 9.0
x3 b'twelve'
True
--------------------------------------------------
(<class 'tensorflow.python.framework.errors_impl.OutOfRangeError'>, OutOfRangeError(), <traceback object at 0x7f1b202d5188>)
--------------------------------------------------


[source](https://www.tensorflow.org/programmers_guide/datasets#consuming_values_from_an_iterator)

"The Iterator.get_next() method returns one or more tf.Tensor objects that correspond to the symbolic next element of an iterator. Each time these tensors are evaluated, they take the value of the next element in the underlying dataset. (Note that, like other stateful objects in TensorFlow, calling Iterator.get_next() does not immediately advance the iterator. Instead you must use the returned tf.Tensor objects in a TensorFlow expression, and pass the result of that expression to *tf.Session.run()* to get the next elements and advance the iterator.)

If the iterator reaches the end of the dataset, executing the Iterator.get_next() operation will raise a tf.errors.OutOfRangeError. After this point the iterator will be in an unusable state, and you must initialize it again if you want to use it further."
