Tensorflow 
===
A language in a language

TensorFlow is a great framework capable of providing an incredible abstraction for complex algebra and deep learning.

Tensorflow is a a part of keras


In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.utils import np_utils

Using TensorFlow backend.


Symbolic variables
==========

## Placeholders/ Constants 

TensorFlow has it's own variables and functions

In [2]:
x1 = tf.constant(3.0, dtype=tf.float32)

In [3]:
x2 = tf.constant(4.0)

In [4]:
print(x1, x2)

Tensor("Const:0", shape=(), dtype=float32) Tensor("Const_1:0", shape=(), dtype=float32)


Print just shows two nodes to display the content of the nodes we need to evaluate them 

We need to start a session 

In [5]:
sess = tf.Session()
print(sess.run([x1, x2]))

[3.0, 4.0]


In [6]:
x3 = tf.add(x1, x2)
print("x3: ", x3)
print("sess.run(x3): ",sess.run(x3))


x3:  Tensor("Add:0", shape=(), dtype=float32)
sess.run(x3):  7.0


## Placeholders

In [7]:
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
adder_node = a + b 

In [8]:
print(sess.run(adder_node, {a: 3, b:4.5}))
print(sess.run(adder_node, {a: [1,3], b: [2, 4]}))


7.5
[ 3.  7.]


In [9]:
add_and_triple = adder_node * 3.
print(sess.run(add_and_triple, {a: 3, b:4.5}))


22.5


## Variables

Tensorflow can change variables in contrast to constants.

In [10]:
W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)

# data is put into X
# Tensorflow is expected to learn W and b

x = tf.placeholder(tf.float32)
linear_model = W * x + b

In [11]:
# Function initializes all variables from above. 
# If you do not define a value for the variables, Tensorflow assigns smal random numbers to the function.

init = tf.global_variables_initializer()
sess.run(init)


In [12]:
print(sess.run(linear_model, {x:[1,2,3,4]}))

[ 0.          0.30000001  0.60000002  0.90000004]


In [13]:
# New placeholder y for true outputs
y = tf.placeholder(tf.float32)

# Defintion of loss function: square (tf.square) of the linear model (linear_model) and the true value (y)

squared_deltas = tf.square(linear_model - y)

# Now we sum of squared deltas (of the squared residuals)

loss = tf.reduce_sum(squared_deltas)

# Now we compute the loss for x and y
print(sess.run(loss, {x:[1,2,3,4], y:[0,-1,-2,-3]}))

23.66


In [14]:
# Change manually the values of W and b:

fixW = tf.assign(W, [-1.])
fixb = tf.assign(b, [1.])

# initialize W and b
sess.run([fixW, fixb])

# run the loss function again
print(sess.run(loss, {x:[1,2,3,4], y:[0,-1,-2,-3]}))


0.0


## Training

In [15]:
# Optimizer is a function of tensorflow (Stochastic gradient Descent Optimizer, see slides)

optimizer = tf.train.GradientDescentOptimizer(0.01)

# Apply the optimizer to the loss function
train = optimizer.minimize(loss)


In [16]:
# Run the train-function 1000 times

sess.run(init) # reset values to incorrect defaults.
for i in range(1000):
  sess.run(train, {x:[1,2,3,4], y:[0,-1,-2,-3]})

In [17]:
# Now print the optimal weights according to the optimizer

print(sess.run([W, b]))

[array([-0.9999969], dtype=float32), array([ 0.99999082], dtype=float32)]


#### About the data

The Otto Group is one of the world’s biggest e-commerce companies, A consistent analysis of the performance of products is crucial. However, due to diverse global infrastructure, many identical products get classified differently.
For this competition, we have provided a dataset with 93 features for more than 200,000 products. The objective is to build a predictive model which is able to distinguish between our main product categories. 
Each row corresponds to a single product. There are a total of 93 numerical features, which represent counts of different events. All features have been obfuscated and will not be defined any further.

https://www.kaggle.com/c/otto-group-product-classification-challenge/data

In [20]:
def load_data(path, train=True):
#def load_data(~bozhinvi/Dropbox/my_python_scripts/Part_2/, train=True):

    """Load data from a CSV File
    
    Parameters
    ----------
    path: str
        The path to the CSV file
        
    train: bool (default True)
        Decide whether or not data are *training data*.
        If True, some random shuffling is applied.
        
    Return
    ------
    X: numpy.ndarray 
        The data as a multi dimensional array of floats
    ids: numpy.ndarray
        A vector of ids for each sample
    """
    df = pd.read_csv(path)
    X = df.values.copy()
    if train:
        np.random.shuffle(X)  # https://youtu.be/uyUXoap67N8
        X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]
        return X, labels
    else:
        X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
        return X, ids

In [21]:
# Scaling data to mean 0, Std. Dev. 1

def preprocess_data(X, scaler=None):
    """Preprocess input data by standardise features 
    by removing the mean and scaling to unit variance"""
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler

# Recodes labels to numbes

def preprocess_labels(labels, encoder=None, categorical=True):
    """Encode labels with values among 0 and `n-classes-1`"""
    if not encoder:
        encoder = LabelEncoder()
        encoder.fit(labels)
    y = encoder.transform(labels).astype(np.int32)
    if categorical:
        y = np_utils.to_categorical(y)
    return y, encoder

In [23]:
# Load data
X_full, labels = load_data('data/otto/train.csv', train=True)

# Preprocess data
X_full, scaler = preprocess_data(X_full)

# Preprocess the labels
Y_full, encoder = preprocess_labels(labels)

X_full, _ = preprocess_data(X_full, scaler)

# Split data to train and test data and 
X_train, X_test, Y_train, Y_test = train_test_split(X_full, Y_full, test_size=0.33, random_state=42)

# Gives back the dimensionality of Y_train (col
nb_classes = Y_train.shape[1]
print(nb_classes, 'classes')

# Gives back the dimensionality of X_train -> Varable number
dims = X_train.shape[1]
print(dims, 'dims')

9 classes
93 dims


Now lets create and train a logistic regression model.

#### Hands On - Logistic Regression

In [27]:
#from __future__ import print_function

import tensorflow as tf


# Parameters
learning_rate = 0.01 # Step we do everytime we do a stochastic gradient descent
training_epochs = 25 # Number of times, the gradients are passed / iteration number
batch_size = 100     # Mini batch, 100 points for which the gradient is computed and averaged
display_step = 1     # Gives information of the progress


# tf Graph Input
x = tf.placeholder(tf.float32, [None, 93]) #93 dim
y = tf.placeholder(tf.float32, [None, 9]) # 9 classes

# Set model weights: due to the randomization, the result might differ a bit on different machines. Moreover,
# different machines yield slightly different results due to CPU/GPU characteristics.
W = tf.Variable(tf.zeros([93, 9])) # Setting model parameters: weights, initialized with small random numbers
b = tf.Variable(tf.zeros([9]))     # Setting model parameters: betas, initialized with small random numbers

# y = [0, 0, 0, 0, 1, 0, 0, 0, 0]
#
# Inbetween we have a weight matrix with 93x9 dimension mapping from x to y -> y = Wx
#
# x = [1, 4, 5, ... , 8] (length = 93)

# Construct model
pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax, see Slide 33

# Minimize error using cross entropy: mean of the sum of the logistic regression
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))

# Gradient Descent minimizes cost function (cost)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        
        # Monitors the decrease of the loss function
        avg_cost = 0.
        
        # Loop over all batches: batch of dataset is passed
        for start, end in zip(range(0, len(X_train), batch_size), range(batch_size, len(X_train), batch_size)):
            _,c = sess.run([optimizer, cost], feed_dict={x: X_train[start:end], y: Y_train[start:end]})

        # Compute average loss
        avg_cost += c / (X_train.shape[0]/batch_size)
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))

Epoch: 0001 cost= 0.003332357
Epoch: 0002 cost= 0.002992140
Epoch: 0003 cost= 0.002793532
Epoch: 0004 cost= 0.002655072
Epoch: 0005 cost= 0.002550682
Epoch: 0006 cost= 0.002468359
Epoch: 0007 cost= 0.002401410
Epoch: 0008 cost= 0.002345693
Epoch: 0009 cost= 0.002298490
Epoch: 0010 cost= 0.002257920
Epoch: 0011 cost= 0.002222643
Epoch: 0012 cost= 0.002191660
Epoch: 0013 cost= 0.002164216
Epoch: 0014 cost= 0.002139720
Epoch: 0015 cost= 0.002117706
Epoch: 0016 cost= 0.002097795
Epoch: 0017 cost= 0.002079684
Epoch: 0018 cost= 0.002063121
Epoch: 0019 cost= 0.002047899
Epoch: 0020 cost= 0.002033845
Epoch: 0021 cost= 0.002020817
Epoch: 0022 cost= 0.002008693
Epoch: 0023 cost= 0.001997371
Epoch: 0024 cost= 0.001986764
Epoch: 0025 cost= 0.001976797
Optimization Finished!
Accuracy: 0.725367
