In [40]:
import numpy as np
import os

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import mpl_toolkits
from mpl_toolkits.mpot3d import Axes3D
import time



import tensorflow as  tf

ModuleNotFoundError: No module named 'mpl_toolkits.mpot3d'

In [7]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Linear model 


$y(x) = a + b_1 * X_1 + b_2*X_2 + b_3*X_3 + \sigma \epsilon$

$\epsilon \approx N(0,1)$

$b_1, b_2, b_3 = (0.5, 0.2, 0.1) $

$\sigma=0.1$


$X_1, X_2, X_3$ uniformally distributed in [-1,1]

In [21]:
#Generate Data

n_points = 5000
n_features = 3

bias = np.ones(n_points).reshape((-1,1))
low  = -1 * np.ones((n_points, n_features),dtype='float')
high  = np.ones((n_points, n_features),dtype='float')

#similated Features
X =  np.random.uniform(low=low, high=high)


#simulated Noise
noise = np.random.normal(size=(n_points,1))

#outputr
weights = np.array([1.0, 0.5, 0.2, 0.1])
noise_std = 0.1

Y = weights[0] * bias + np.dot(X, weights[1:] .reshape(-1,1)) + noise_std * noise


In [22]:
Y

array([[ 0.46769372],
       [ 1.24090559],
       [ 1.41068818],
       ..., 
       [ 1.11978003],
       [ 1.07924581],
       [ 0.75105445]])

In [23]:
#split the data into train and test sets
train_test_split = 4

n_test = int(n_points/train_test_split)
n_train = n_points - n_test

X_train = X[:n_train, :]
Y_train = Y[:n_train].reshape((-1,1))

X_test = X[n_train:,:]
Y_test = Y[n_train:].reshape((-1,1))

In [24]:
X_test.shape, Y_test.shape, X_train.shape, Y_train.shape

((1250, 3), (1250, 1), (3750, 3), (3750, 1))

### Linear Regression with numpy

In [29]:


#add colum of ones
X = np.hstack((np.ones(n_train).reshape((-1,1)), X_train))

theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y_train)

print(theta_numpy)

[[ 1.00030061]
 [ 0.5046615 ]
 [ 0.20047932]
 [ 0.0978062 ]]


### Linear Regression with sklearn

In [31]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X_train, Y_train)

np.r_[lin_reg.intercept_.reshape(-1,1), lin_reg.coef_.T]

array([[ 1.00030061],
       [ 0.5046615 ],
       [ 0.20047932],
       [ 0.0978062 ]])

### Linear Regression with Tensorflow

In [39]:
X_np = np.hstack((np.ones(n_train).reshape((-1,1)), X_train))

X = tf.constant(X_np, dtype=tf.float32, name = "X")
y = tf.constant(Y_train, dtype=tf.float32, name = "y")

XT = tf.transpose(X)

theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)),XT),y)

with tf.Session() as sess:
    theta_value = theta.eval()
    
theta_value

array([[ 1.00030077],
       [ 0.5046615 ],
       [ 0.20047921],
       [ 0.0978062 ]], dtype=float32)

## A simple class for Linear Regression

Implement the Normal Equation and MLE solutions

In [67]:
import tensorflow as tf

class Model:
    
    def __init__(self, n_features, learning_rate=0.05, L=0):
        
        #input placeholders
        self.X = tf.placeholder(tf.float32, [None, n_features], name="X")
        self.Y = tf.placeholder(tf.float32, [None, 1], name = "Y")
        
        #regression paramaters for analytical Solution using Normal Equan
        self.theta_in = tf.placeholder(tf.float32, [n_features+1,None])
        
        #Augmented data matrix is obtained by adding a column of ones to
        #the data matrix
        data_plus_bias = tf.concat([tf.ones([tf.shape(self.X,)[0],1]), self.X], axis=1)
        
        XT = tf.transpose(data_plus_bias)
        
        ###########################################
        # The normal equation for Linear Regression
        ###########################################
        self.theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,data_plus_bias)),XT),self.Y)
        
        
        #mean square error in terms of theta = theta_in
        self.lr_mse = tf.reduce_mean(tf.square(tf.matmul(data_plus_bias, self.theta_in)) - self.Y)
        
        ##############################################
        #Estimate Model using MLE
        
        #Regression paramaters for the maximum likelihood method
        # Note that there are n_features+2 parameters, as one is added for the intercept
        # and another one for the std dev for the noise
        self.weights = tf.Variable(tf.random_normal([n_features+2, 1]))
        
        #prediction from the model
        self.output = tf.matmul(data_plus_bias, self.weights[:-1,:])
        
        gauss = tf.distributions.Normal(loc=0.0, scale = 1.0)
        
        #STadard Deviation of the Gaussian noise is modelled as a square of the last
        #model weight
        sigma = 0.0001 + tf.square(self.weights[-1])
        
        log_LL = tf.log(0.00001 + (1/sigma) * gauss.prob((self.Y - self.output)/sigma))
        
        #Loss is the negative log-likelihood
        self.loss = -tf.reduce_mean(log_LL)
        
        #TF node defining an Op for on training set
        self.train.step = (tf.train.AdamOptimizer(learning_rate).minimize(self.loss), self.loss)
        
    def generate_date(n_points = 10000,
                     n_features = 3,
                     weights = np.array([1.0, 0.5, 0.2, 0.1]),
                     noise_std = 0.1):
        
        bias = np.ones(n_points).reshape((-1,1))        
        low  = -1 * np.ones((n_points, n_features),dtype='float')
        high  = np.ones((n_points, n_features),dtype='float')
        
        #simulated Features
        X =  np.random.uniform(low=low, high=high)
        
        #simulated Noise
        noise = np.random.normal(size=(n_points,1))
        
        #output
        weights = np.array([1.0, 0.5, 0.2, 0.1])
        noise_std = 0.1
        
        Y = weights[0] * bias + np.dot(X, weights[1:] .reshape(-1,1)) + noise_std * noise
        
        return(X,Y)
        

In [68]:
def main():
    n_points = 5000
    n_features = 3
    
    #n_features + 1 weights (one for constant feature)
    weights = np.array([1.0, 0.5, 0.2, 0.1])
    noise_std = 0.1
    
    learning_rate = 0.01
    
    #Make data
    (X,Y) = Model.generate_date(n_points = n_points,
                                 n_features = n_features,
                                 weights = weights,
                                 noise_std = noise_std)
    
    #split the data into train and test sets
    train_test_split = 4 #1/4 to be used for test
    
    n_test = int(n_points/train_test_split)
    n_train = n_points - n_test
    
    X_train = X[:n_train, :]
    Y_train = Y[:n_train].reshape((-1,1))
    
    X_test = X[n_train:,:]
    Y_test = Y[n_train:].reshape((-1,1))
    
    #create an instance of the Linear Regression model class
    model = Model(n_features=n_features, learning_rate=learning_rate)
    
    #train the model
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        #Normal Equations for Linear Regression
        theta_value = sess.run(model.theta, feed_dict={
            model.X: X_train,
            model.Y: Y_train
        })
        
        lr_mse_train = sess.run(model.lr_mse, feed_dict={
            model.X: X_train,
            model.Y: Y_train,
            model.theta_in: theta_value
        })
        
        lr_mse_test = sess.run(model.lr_mse, feed_dict={
            model.X: X_test,
            model.Y: Y_test,
            model.theta_in: theta_value
        })
        
        print("====== Linear Regression with the Normal Equation =======")
        print(theta_value)
        print("Linear regression train error:", lr_mse_train)
        print("Linear regression test error :", lr_mse_test)
        
        #Now train the MLE parameters
        for i in range(0,1000):
            (_,loss), weights = sess.run((model.train_step, model.weights), 
                                         feed_dict={
                                                    model.X: X_train,
                                                    model.Y: Y_train
                                                })
            
        #make test predictions
        Y_test_predicted = sess.run(model.output, feed_dict = {
                                                        model.X: X_test
                                                    })
        
        #output std sigma is a square of the last weight
        std_model = weights[-1]**2
        print("===== Linear Regression with MLE =========")
        print("Negative Log-Likelihood", loss)
        print("MLE fitted parameters: ")
        print(weights[0:-1])
        print("Fitted std of noise: ", std_model)
        
        fig.plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(X_test[:,1], X_test[:,2], Y_test, s=1, c="#000000")
        ax.scatter(X_test[:,1], X_test[:,2], Y_test_predicted, s=1, c="#FF0000")
        plt.show()
        
        sess.close()
        


In [71]:

if __name__ == "__main__":
    main()

AttributeError: module 'tensorflow' has no attribute 'distributions'