In [1]:
import tensorflow
import tensorflow as tf
print("You have version %s" % tf.__version__)

You have version 1.4.0


In [2]:
# For fast (data) visualization 
%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
# as a sanity check, let's use Pandas and numpy 
# as the previous method of data pipelining  
import pandas
import pandas as pd  
import numpy 
import numpy as np 

In [4]:
# Using Python (native, built-in) libraries os, sys 
# is a great way to check if we have the files we need in our 
# local file directories
import os, sys
print(os.getcwd()) # get the current, local file directory
print(os.listdir(os.getcwd() )) # list the current directory

/home/mobicfd/ReacCFD/advanced-tensorflow/LinearRegression
['LinReg_w_Estimator.ipynb', 'LinearRegression.ipynb', '.ipynb_checkpoints', 'graphs', 'LinReg_ex1data1']


In [5]:
print(os.sep)
PATH = '.'

/


In [6]:
# The CSV features in our training & test data
feature_names = ['X']

In [43]:
def from_txt_to_ds_input_fn(file_path, m_i, feature_names, 
                            repeat_count=None, perform_shuffle=False, 
                               shuffle_buffer_size=4096):
    """
    @fn from_txt_to_ds_input_fn
    @param file_path
    @param m_i, a positive integer, number of examples in a batch
    @param feature_names, list of strings to name your d features 
    @param repeat_count, a positive integer or None, number of times to repeat, 
                None is for indefinitely
    @param perform_shuffle = False , performs shuffling of data or not 
    @param shuffle_buffer_size = 4096, a positive integer
    """
        
    def decode_txt(line):
        parsed_line = tf.decode_csv(line, record_defaults=[[0.],[0.,]],
                                        field_delim=',')
        label = parsed_line[-1:] # Last element is the label
        del parsed_line[-1] # Delete last element
        features = parsed_line # Everything but last elements are the features 

        # X_i, y_i, only the last value in a line is the output value, y
        # prior values, associated with a "feature_name", are input values  
        d = dict(zip(feature_names, features)), label  
        return d  
    
    dataset = (tf.data.TextLineDataset(file_path) # Read text file 
                .map(decode_txt, num_parallel_calls=m_i) # Transform each elem by applying decode_csv fn
                .batch(m_i) # Batch size to use
              )
    if repeat_count is None:
        dataset = dataset.repeat() # repeat indefinitely
    else:
        dataset = dataset.repeat(repeat_count) # Repeats dataset this # times 

    if perform_shuffle:
        # Randomizes input using a window of shuffle_buffer_size elements (read into memory)
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
    
    # create iterator
    iterator = dataset.make_one_shot_iterator()
#    iterator = dataset.make_initializable_iterator()

    # Separate the input X data from the output y data
    batch_features, batch_labels= iterator.get_next() 

    return batch_features, batch_labels

In [8]:
LinRegClassPATH = PATH + os.sep + 'LinReg_ex1data1'
if not os.path.exists(LinRegClassPATH):
    os.makedirs(LinRegClassPATH)

In [9]:
PATH_DATASET = "../sample_datasets/"
os.listdir( PATH_DATASET )

['ex1data2.txt', 'ex1data1.txt']

In [10]:
FILE_ex1data1 = PATH_DATASET + os.sep + "ex1data1.txt"

In [11]:
# Create the feature_columns, which specifies the input to our model
# All our input features are numeric, so use numeric_column for each one 
feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]

# `tf.estimator.LinearRegressor`  

`__init__`  

```  
__init__(
    feature_columns,
    model_dir=None,
    label_dimension=1,
    weight_column=None,
    optimizer='Ftrl',
    config=None,
    partitioner=None
    )
```  

In [12]:
LinReg = tf.estimator.LinearRegressor(
    feature_columns=feature_columns, # The input features to our model
    model_dir=LinRegClassPATH) # Path to where checkpoints etc. are stored  

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7bbfa12450>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': './LinReg_ex1data1', '_save_summary_steps': 100}


In [26]:
LinReg.train(
    input_fn=lambda: from_txt_to_ds_input_fn(FILE_ex1data1,
                                                128, 
                                                feature_names,
                                                 5500, 
                                             True))

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./LinReg_ex1data1/model.ckpt-3000
INFO:tensorflow:Saving checkpoints for 3001 into ./LinReg_ex1data1/model.ckpt.
INFO:tensorflow:loss = 868.532, step = 3001
INFO:tensorflow:global_step/sec: 516.539
INFO:tensorflow:loss = 868.532, step = 3101 (0.194 sec)
INFO:tensorflow:global_step/sec: 576.126
INFO:tensorflow:loss = 868.532, step = 3201 (0.173 sec)
INFO:tensorflow:global_step/sec: 553.217
INFO:tensorflow:loss = 868.532, step = 3301 (0.181 sec)
INFO:tensorflow:global_step/sec: 543.564
INFO:tensorflow:loss = 868.532, step = 3401 (0.187 sec)
INFO:tensorflow:global_step/sec: 565.566
INFO:tensorflow:loss = 868.532, step = 3501 (0.175 sec)
INFO:tensorflow:global_step/sec: 586.868
INFO:tensorflow:loss = 868.532, step = 3601 (0.170 sec)
INFO:tensorflow:global_step/sec: 556.592
INFO:tensorflow:loss = 868.532, step = 3701 (0.179 sec)
INFO:tensorflow:global_step/sec: 532.393
INFO:tensorflow:loss = 868.532, step 

<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x7f7bbfa12990>

In [27]:
LinReg.get_variable_names()

['global_step',
 'linear/linear_model/X/weights',
 'linear/linear_model/X/weights/part_0/Ftrl',
 'linear/linear_model/X/weights/part_0/Ftrl_1',
 'linear/linear_model/bias_weights',
 'linear/linear_model/bias_weights/part_0/Ftrl',
 'linear/linear_model/bias_weights/part_0/Ftrl_1']

In [28]:
LinReg.get_variable_value('global_step' )

8500

In [29]:
LinReg.get_variable_value('linear/linear_model/X/weights' )

array([[ 1.19302714]], dtype=float32)

In [30]:
LinReg.get_variable_value( 'linear/linear_model/bias_weights' )

array([-3.8957181], dtype=float32)

## Case of multi-variate linear regression with a `.txt` file with no header, `ex1data2.txt`  

In [38]:
# The CSV features in our training & test data
feature_names = ['X1','X2']

In [33]:
FILE_ex1data2 = PATH_DATASET + os.sep + "ex1data2.txt"

In [39]:
LinRegClassPATH = PATH + os.sep + 'LinReg_ex1data2'
if not os.path.exists(LinRegClassPATH):
    os.makedirs(LinRegClassPATH)

In [40]:
feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]

In [47]:
LinReg_multi = tf.estimator.LinearRegressor(
    feature_columns=feature_columns, # The input features to our model
    model_dir=LinRegClassPATH) # Path to where checkpoints etc. are stored  

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7baa459090>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': './LinReg_ex1data2', '_save_summary_steps': 100}


In [48]:
LinReg_multi.train(
    input_fn=lambda: from_txt_to_ds_input_fn(FILE_ex1data2,
                                                128, 
                                                feature_names,
                                                 500, 
                                             True))

ValueError: Feature X2 is not in features dictionary.

In [46]:
feature_columns

[_NumericColumn(key='X1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='X2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]