In [86]:
% matplotlib notebook
import numpy as np
import unittest

In [87]:
def load_data(file_name):
    with open(file_name) as f:
        X = []
        y = []
        
        lines = f.readlines()
        for line in lines:
            cols = line.strip().split('\t')
            X.append(np.array(cols[0:len(cols) - 1]).astype(float).tolist())
            y.append([float(cols[-1])])
        f.close()
        return X, y

In [88]:
def standard_regression(X, y):
    X_mat = np.mat(X)
    y_mat = np.mat(y)
    X_tx = X_mat.T * X_mat
    if np.linalg.det(X_tx) == 0.0:
        print('This matrix is singular, cannot do inverser')
        return
    y_predicted = X_tx.I * (X_mat.T * y_mat)
    return y_predicted

In [145]:
def plot_figure(X, y, ws):
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    
    X_mat = np.mat(X)
    y_mat = np.mat(y)
    
    X_arg = X_mat[:, 1].flatten().A[0]
    y_arg = y_mat.T.flatten().A[0]

    ax.scatter(X_arg, y_arg)
    
    # Prediction line
    X_arg2 = X_mat.copy()
    X_arg2.sort(0)
    y_hat = X_arg2 * ws
    ax.plot(X_arg2[:, 1], y_hat)
    
    # Getting the correlation coefficient
    y_hat2 = X_mat * ws
    print('correlation coefficient: {}'.format(np.corrcoef(y_hat2.T, y_mat.flatten())))

In [228]:
def locally_weighted_linear_regression(X_test, X, y, k = 1.0):
    X_mat = np.mat(X)
    y_mat = np.mat(y).T
    m = np.shape(X_mat)[0]
    weights = np.mat(np.eye((m)))
    
    for j in range(m):
        diff_mat = X_test - X_mat[j, :]
        weights[j, j] = np.exp(diff_mat * diff_mat.T / (-2.0 * k ** 2))
    X_tx = X_mat.T * (weights * X_mat)
    if np.linalg.det(X_tx) == 0.0:
        print('The matrix is singular, cannot do inverse')
        return
    ws = X_tx.I * (X_mat.T * (weights * y_mat.T))    
    return X_test * ws

In [250]:
def locally_weighted_linear_regression_test(X_test, X, y, k = 1.0):
    m = np.shape(X_test)[0]
    y_hat = np.zeros(m)
    for i in range(m):
        y_hat[i] = locally_weighted_linear_regression(X_test[i], X, y, k)
    return y_hat

In [278]:
class LinearRegressionTestCase(unittest.TestCase):
    def test_load_data(self):
        X, y = load_data('ex0.txt')
        self.assertEqual(len(X), len(y))
    
    def test_standard_regression(self):
        X, y = load_data('ex0.txt')
        y_pred = standard_regression(X, y)
        self.assertTrue(len(y_pred) == 2)
    
    def test_plot_figure(self):
        X, y = load_data('ex0.txt')
        
        ws = standard_regression(X, y)
        plot_figure(X, y, ws)
    
    def test_locally_weighted_linear_regression(self):
        import matplotlib.pyplot as plt
        X, y = load_data('ex0.txt')        
        y_hat = locally_weighted_linear_regression_test(X, X, y, 0.003)
        X_mat = np.mat(X)
        sorted_indices = X_mat[:, 1].argsort(0)
        X_sorted = X_mat[sorted_indices][:,0,:]
        
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(X_sorted[:,1], y_hat[sorted_indices])
        ax.scatter(X_mat[:,1].flatten().A[0], 
                np.mat(y).T.flatten().A[0],
                s = 2,
                c = 'red')
        
        

if __name__ == '__main__':
    unittest.main(argv = ['ignore-first-arg'], exit = False)

.

<IPython.core.display.Javascript object>

.

<IPython.core.display.Javascript object>

..

correlation coefficient: [[ 1.          0.98647356]
 [ 0.98647356  1.        ]]



----------------------------------------------------------------------
Ran 4 tests in 1.224s

OK
