In [None]:
pip install tensorflow




Tensorflow uses Graphs and Sessions. A Graph is a summary of the calculation space, including all variables and calculations. A Session is the execution space, allowing for parts or the whole graph to be evalutated.

Using a CUDA enabled GPU helps speed up the matrix mathematical operations


In [None]:
pip install tensorflow-gpu



In [None]:
%tensorflow_version 2.x
import tensorflow as tf


In [None]:
print(tf.__version__)


2.4.0


Creating Variables in tensorflow. In tensorflow these data types must be created a little differently, by creating a tf.Variable object and defining the data type as a tf.datatype

In [None]:
string = tf.Variable("This is a string", tf.string)
integer = tf.Variable(24, tf.int16)
floating_point = tf.Variable(1.1, tf.float64)


Tensors
Basically an n dimensional vector/array. The main object in Tensorflow. Tensors have a rank attribute which describes the amount of dimensionality. Simple variables have rank=0, whereas 1d array will have a rank=1, and a 2d array a rank=2.  

In [None]:
rank1 = tf.Variable([1, 2, 3], tf.int64) #1d dimensional array
rank2 = tf.Variable([[1, 2, 3],[4, 5, 6]], tf.int64) #2d dimensional array

In [None]:
print(tf.rank(string))
print(tf.rank(rank1))
print(tf.rank(rank2))


tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)


Tensors also have a .shape attribute which describes the length of the ranks/dimensions 

In [None]:
print(tf.shape(rank2))

tf.Tensor([2 3], shape=(2,), dtype=int32)


.reshape(object, output_shape)
.reshape function allows reshaping of tensors 


In [None]:
flattened = tf.reshape(rank2, [6]) #reshape a 2d tensor to a 1d tensor
print(flattened)
print(tf.rank(flattened))
print(tf.shape(flattened))

tf.Tensor([1 2 3 4 5 6], shape=(6,), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor([6], shape=(1,), dtype=int32)


Evaluating tensors (Sessions)
It is best to use context managers to use sessions

In [None]:
# with tf.Session() as sess:
  # tensor.eval()

In [None]:
t1 = tf.ones([5, 5, 5]) #create a tensor of 1s, 5x5x5 shape
print(tf.rank(t1))
print(tf.shape(t1))
t2 = tf.reshape(t1, [-1])
print(tf.rank(t2))
print(tf.shape(t2))

tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor([5 5 5], shape=(3,), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor([125], shape=(1,), dtype=int32)


Data manipulation:<br>
tf.data.Dataset objects<br>
estimator models require special formatted objects



In [None]:
import pandas as pd
data = {"One": [1, 2, 3], "Two": [4, 5, 6], "Three": [7, 8, 9]}
df1 = pd.DataFrame.from_dict(data)
df1

Unnamed: 0,One,Two,Three
0,1,4,7
1,2,5,8
2,3,6,9


Models require a feature_column object, that explains data type (numerical vs categorical) and all possible values within each feature. This requires a for loop to convert each column data into tf.feature_column object.

In [None]:
column_list = [item for item in df1.columns]
feature_cols = []
for name in column_list: #in this example the columns are only numeric, but categorical columns require .unique() arg to specify all possible values
  col_name = str(name)
  col = tf.feature_column.numeric_column(col_name, dtype=tf.int64) #col name must be tf.string dtype
  feature_cols.append(col)

feature_cols


[NumericColumn(key='One', shape=(1,), default_value=None, dtype=tf.int64, normalizer_fn=None),
 NumericColumn(key='Two', shape=(1,), default_value=None, dtype=tf.int64, normalizer_fn=None),
 NumericColumn(key='Three', shape=(1,), default_value=None, dtype=tf.int64, normalizer_fn=None)]

The feature cols are used in instantiating a model

In [None]:
linear = tf.estimator.LinearClassifier(feature_columns=feature_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpb1h00cr5', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


we will create a target dataset to train the model with



In [None]:
target = {"Value":[1, 2, 3]}

the dataframe must be converted to tensors for the model to train on each column, this is done by tf.data.Datasets.from_tensor_slice((feature_data,target_data)) *Note, for this function data must be in a form that can be parsed as a tensor (list, array, dict). Also estimators have to be fed, a function to iterate over the tensors

In [None]:
tensors = tf.data.Dataset.from_tensor_slices(df1)
tensors

<TensorSliceDataset shapes: (3,), types: tf.int64>

In [None]:
tens_list = list(tensors.as_numpy_iterator())
print(tens_list)


[array([1, 4, 7]), array([2, 5, 8]), array([3, 6, 9])]


In [None]:
def input_function(feature_df, target_tensor):
  def process_data():
    data = tf.data.Dataset.from_tensor_slices((dict(feature_df), dict(target_tensor)))
    data = data.batch(3)
    return data
  return process_data



In [None]:
model_data = input_function(df1, target)

In [None]:
linear.train(model_data)

INFO:tensorflow:Calling model_fn.




TypeError: ignored