In [3]:
import numpy as np
import os

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import mpl_toolkits
#from mpl_toolkits.mpot3d import Axes3D
import time



import tensorflow as  tf

In [4]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Linear model 


$y(x) = a + b_1 * X_1 + b_2*X_2 + b_3*X_3 + \sigma \epsilon$

$\epsilon \approx N(0,1)$

$b_1, b_2, b_3 = (0.5, 0.2, 0.1) $

$\sigma=0.1$


$X_1, X_2, X_3$ uniformally distributed in [-1,1]

In [5]:
#Generate Data

n_points = 5000
n_features = 3

bias = np.ones(n_points).reshape((-1,1))
low  = -1 * np.ones((n_points, n_features),dtype='float')
high  = np.ones((n_points, n_features),dtype='float')

#similated Features
X =  np.random.uniform(low=low, high=high)


#simulated Noise
noise = np.random.normal(size=(n_points,1))

#outputr
weights = np.array([1.0, 0.5, 0.2, 0.1])
noise_std = 0.1

Y = weights[0] * bias + np.dot(X, weights[1:] .reshape(-1,1)) + noise_std * noise


In [6]:
#View X
print(X.shape)
X

(5000, 3)


array([[ 0.76760326,  0.64923997, -0.19078515],
       [-0.4022349 ,  0.33327195, -0.54046968],
       [-0.04021788, -0.75049729, -0.87978977],
       ..., 
       [-0.17505061, -0.62128382, -0.77944038],
       [ 0.05090891, -0.45479944,  0.54582029],
       [ 0.67501371,  0.39949065, -0.01286789]])

In [7]:
#View Y
print(Y.shape)
Y

(5000, 1)


array([[ 1.47804987],
       [ 0.88610159],
       [ 0.65398938],
       ..., 
       [ 0.57014164],
       [ 1.06970906],
       [ 1.38890441]])

In [8]:
#split the data into train and test sets
train_test_split = 4

n_test = int(n_points/train_test_split)
n_train = n_points - n_test

X_train = X[:n_train, :]
Y_train = Y[:n_train].reshape((-1,1))

X_test = X[n_train:,:]
Y_test = Y[n_train:].reshape((-1,1))

In [9]:
X_test.shape, Y_test.shape, X_train.shape, Y_train.shape

((1250, 3), (1250, 1), (3750, 3), (3750, 1))

In [10]:
#tf.placeholder(dtype, shape=None, name=None)

In [11]:
#We can do directly - but let us use Minibatch GRadient Descent
#tf.placeholder(dtype, shape=None, name=None)

reset_graph()

n_epochs = 500
learning_rate = 0.01


X = tf.placeholder(tf.float32, shape=(None, n_features + 1), name = "X")
y = tf.placeholder(tf.float32, shape=(None, 1) , name = "y")
theta = tf.Variable(tf.random_uniform([n_features + 1, 1], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name = "mse")
gradients = 2.0/n_train * tf.matmul(tf.transpose(X), error)

training_op = tf.assign(theta, theta - learning_rate* gradients)

#Then define the batch size and compute the number of batches
#batch_size = 100
#n_batches  = np.ceil(n_test, batch_size)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        sess.run(training_op, feed_dict={X: np.hstack([np.ones(X_train.shape[0]).reshape(-1,1),X_train]), 
                                         y: Y_train})
        
    best_theta = theta.eval()
    
    
    

In [12]:
best_theta

array([[ 0.99993044],
       [ 0.47880352],
       [ 0.17912023],
       [ 0.11758925]], dtype=float32)

In [13]:
#Diff from actual 
np.round(weights.reshape(-1,1) - best_theta,3)

array([[ 0.   ],
       [ 0.021],
       [ 0.021],
       [-0.018]])

In [14]:
#Visualizing Using the tensorboard

from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
#'tf_logs/run-20180805063540/'


reset_graph()

n_epochs = 500
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n_features + 1), name = "X")
y = tf.placeholder(tf.float32, shape=(None, 1) , name = "y")
theta = tf.Variable(tf.random_uniform([n_features + 1, 1], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name = "mse")
gradients = 2.0/n_train * tf.matmul(tf.transpose(X), error)
  
training_op = tf.assign(theta, theta - learning_rate* gradients)

#Then define the batch size and compute the number of batches
#batch_size = 100
#n_batches  = np.ceil(n_test, batch_size)

mse_summary = tf.summary.scalar("MSE", mse)
file_witer  = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        sess.run(training_op, feed_dict={X: np.hstack([np.ones(X_train.shape[0]).reshape(-1,1),X_train]), 
                                         y: Y_train})
        if(epoch %10 == 0):
            summary_str = mse_summary.eval(feed_dict={X: np.hstack([np.ones(X_train.shape[0]).reshape(-1,1),X_train]), 
                                         y: Y_train})
            file_witer.add_summary(summary_str, epoch)
        
    best_theta = theta.eval()
    
file_witer.close()

## Tidy Version

In [15]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs2"
logdir = "{}/run-{}/".format(root_logdir, now)

reset_graph()

n_epochs = 500
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n_features + 1), name = "X")
y = tf.placeholder(tf.float32, shape=(None, 1) , name = "y")
theta = tf.Variable(tf.random_uniform([n_features + 1, 1], -1.0, 1.0), name = "theta")

y_pred = tf.matmul(X, theta, name = "predictions")

#============= Tidy ==================#
# TensorBoard will give better grouped flow display
#=====================================#
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse   = tf.reduce_mean(tf.square(error), name = "mse")

    
gradients = 2.0/n_train * tf.matmul(tf.transpose(X), error)  
training_op = tf.assign(theta, theta - learning_rate* gradients)

mse_summary = tf.summary.scalar("MSE", mse)
file_witer  = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        sess.run(training_op, feed_dict={X: np.hstack([np.ones(X_train.shape[0]).reshape(-1,1),X_train]), 
                                         y: Y_train})
        if(epoch %10 == 0):
            summary_str = mse_summary.eval(feed_dict={X: np.hstack([np.ones(X_train.shape[0]).reshape(-1,1),X_train]), 
                                         y: Y_train})
            file_witer.add_summary(summary_str, epoch)
        
    best_theta = theta.eval()
    
file_witer.close()

In [16]:
best_theta
#Should be 1 and (0.5, 0.2, 0.1)

array([[ 0.99993044],
       [ 0.47880352],
       [ 0.17912023],
       [ 0.11758925]], dtype=float32)