# Linear Regression with Multiple Variables

For original packed homework instrcutions, refer to the file `machine-learning-ex1.zip`.

## Problem Description

You will implement linear regression with multiple variables to predict the prices of houses. Suppose you are selling your house and you want to know what a good market price would be. One way to do this is to first collect information on recent houses sold and make a model of housing prices.

The dataset contains 3 columns based on housing prices in Oregon. The first column is the size of the house (in square feet), the second column is the number of bedrooms, and the third column is the price of the house.

## Preparation

In [2]:
# Import the libraries

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [3]:
# Define the function for reading & parsing data from file

def read_data_from(file, seperator=',', dtype=float, *args, **kwargs):
    """
    Read x and y (as column vectors) respectively from the file object given.
    """
    x = []
    y = []
    xlen = 0
    for line in file:
        data = line.split(seperator)
        x.append(data[:-1])
        y.append(data[-1:])
    return np.array(x, *args, dtype=dtype, **kwargs), np.array(y, *args, dtype=dtype, **kwargs)

In [4]:
# Define the function to print a particular expression

def nprint(str, seperator=' =\n', *args, **kwargs):
    """Print str as well as the expression specified by str."""
    print('{}{}{}'.format(str, seperator, eval(str)))

In [5]:
# Define the cost function

def _mse(predicted, actual):
    """Calculate the MSE between predicted data and actual data (both column vectors) given."""
    err = np.array(predicted) - np.array(actual)
    return err.transpose().dot(err)[0][0]

def mse(x, y, f):
    """Calculate the MSE between the predicted data (with x and function f) and actual data (both colum vectors)."""
    return _mse(np.array([[f(element[0])] for element in x]), np.array(y))

cost = mse

In [6]:
# Define the model representation

def _linear(thetas):
    """Return a linear function based on the thetas given (as a column vector)."""
    thetas = np.array(thetas)
    def f(x):
        """Linear function with input x (as a column vector, 1 included)."""
        return thetas.transpose().dot(np.array(x))[0]
    return f

def linear(thetas):
    _f = _linear(thetas)
    def f(x):
        """Linear function with input x (as a column vector, 1 not included)."""
        return _f(np.array([[1]] + list(x)))
    return f

model = linear

## Solution

### Data Loading & Visualization

In [7]:
# Load data from the file

with open('ex1data2.txt') as file:
    x, y = read_data_from(file)

In [8]:
# Display loaded x and y

nprint('x')
nprint('y')

# Visualization - pass

x =
[[2.104e+03 3.000e+00]
 [1.600e+03 3.000e+00]
 [2.400e+03 3.000e+00]
 [1.416e+03 2.000e+00]
 [3.000e+03 4.000e+00]
 [1.985e+03 4.000e+00]
 [1.534e+03 3.000e+00]
 [1.427e+03 3.000e+00]
 [1.380e+03 3.000e+00]
 [1.494e+03 3.000e+00]
 [1.940e+03 4.000e+00]
 [2.000e+03 3.000e+00]
 [1.890e+03 3.000e+00]
 [4.478e+03 5.000e+00]
 [1.268e+03 3.000e+00]
 [2.300e+03 4.000e+00]
 [1.320e+03 2.000e+00]
 [1.236e+03 3.000e+00]
 [2.609e+03 4.000e+00]
 [3.031e+03 4.000e+00]
 [1.767e+03 3.000e+00]
 [1.888e+03 2.000e+00]
 [1.604e+03 3.000e+00]
 [1.962e+03 4.000e+00]
 [3.890e+03 3.000e+00]
 [1.100e+03 3.000e+00]
 [1.458e+03 3.000e+00]
 [2.526e+03 3.000e+00]
 [2.200e+03 3.000e+00]
 [2.637e+03 3.000e+00]
 [1.839e+03 2.000e+00]
 [1.000e+03 1.000e+00]
 [2.040e+03 4.000e+00]
 [3.137e+03 3.000e+00]
 [1.811e+03 4.000e+00]
 [1.437e+03 3.000e+00]
 [1.239e+03 3.000e+00]
 [2.132e+03 4.000e+00]
 [4.215e+03 4.000e+00]
 [2.162e+03 4.000e+00]
 [1.664e+03 2.000e+00]
 [2.238e+03 3.000e+00]
 [2.567e+03 4.000e+00]
 [1.200

### Gradient Descent

In [9]:
# Epsilon + Vectorization

# f = model((1, 2, 3))
# x = (1, 2)
# print('f({}) = {}'.format(x, f(x)))

In [10]:
# Somewhat enhanced gradient descent with epsilon

def init_parameters():
    return 0, 0, 0

def linear(b, k1, k2):
    def f(x)

alpha = 0.001

b, k1, k2 = init_parameters()

e = [] # recorded errors

eps = 1e-10 # declare convergence if the absolute interval between two neighbor errors is less than eps
interval = 1

i = 0
e.append((i, mse(x, linear(b, k), y)))

while True:
    if i % interval == 0:
        print('Epoch {}: b = {}, k = {}, error = {}'.format(i, b, k, e[-1][1]))
    f = linear(b, k)
    b_offset, k_offset = 0, 0
    for j in range(m):
        term = f(x[j]) - y[j]
        b_offset += term
        k_offset += term * x[j]
    b_offset = b_offset * alpha / m
    k_offset = k_offset * alpha / m
    b -= b_offset
    k -= k_offset
    e.append((i, mse(x, linear(b, k), y)))
    i += 1
    if abs(e[-1][1] - e[-2][1]) < eps:
        break;

print('Epoch {}: b = {}, k = {}, error = {}'.format(i, b, k, e[-1][1]))

plot_errors()
visualize(linear(b, k))

### Normal Equation

In [11]:
# Core part of normal equation

pass

In [12]:
np.repeat?

[1;31mSignature:[0m [0mnp[0m[1;33m.[0m[0mrepeat[0m[1;33m([0m[0ma[0m[1;33m,[0m [0mrepeats[0m[1;33m,[0m [0maxis[0m[1;33m=[0m[1;32mNone[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Repeat elements of an array.

Parameters
----------
a : array_like
    Input array.
repeats : int or array of ints
    The number of repetitions for each element.  `repeats` is broadcasted
    to fit the shape of the given axis.
axis : int, optional
    The axis along which to repeat values.  By default, use the
    flattened input array, and return a flat output array.

Returns
-------
repeated_array : ndarray
    Output array which has the same shape as `a`, except along
    the given axis.

See Also
--------
tile : Tile an array.

Examples
--------
>>> np.repeat(3, 4)
array([3, 3, 3, 3])
>>> x = np.array([[1,2],[3,4]])
>>> np.repeat(x, 2)
array([1, 1, 2, 2, 3, 3, 4, 4])
>>> np.repeat(x, 3, axis=1)
array([[1, 1, 1, 2, 2, 2],
       [3, 3, 3, 4, 4, 4]])
>>> np.repeat(x