# Simple Linear Regression. Minimal example

### Import the relevant libraries

In [1]:
# We must always import the relevant libraries for our problem at hand. NumPy is a must for this example.
import numpy as np

# matplotlib and mpl_toolkits are not necessary. We employ them for the sole purpose of visualizing the results.  
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

### Generate random input data to train on

In [6]:
# First, we should declare a variable containing the size of the training set we want to generate.
observations = 1000

# We will work with two variables as inputs. You can think about them as x1 and x2 in our previous examples.
# We have picked x and z, since it is easier to differentiate them.
# We generate them randomly, drawing from an uniform distribution. There are 3 arguments of this method (low, high, size).
# The size of xs and zs is observations by 1. In this case: 1000 x 1.
xs = np.random.uniform(low=-10, high=10, size=(observations,1))
zs = np.random.uniform(-10, 10, (observations,1))

# Combine the two dimensions of the input into one input matrix. 
# This is the X matrix from the linear model y = x*w + b.
# column_stack is a Numpy method, which combines two vectors into a matrix. Alternatives are stack, dstack, hstack, etc.
inputs = np.column_stack((xs,zs))

# Check if the dimensions of the inputs are the same as the ones we defined in the linear model lectures. 
# They should be n x k, where n is the number of observations, and k is the number of variables, so 1000 x 2.
print (inputs.shape)
inputs

(1000, 2)


array([[ 6.17547261, -2.51080877],
       [ 3.62806318, -1.76534951],
       [ 2.41236113, -1.66507848],
       ..., 
       [ 5.19511434, -0.76954957],
       [ 3.64526286, -7.71404405],
       [ 5.49262908, -1.82198243]])

### Generate the targets we will aim at

In [5]:
# We want to "make up" a function, use the ML methodology, and see if the algorithm has learned it.
# We add a small random noise to the function i.e. f(x,z) = 2x - 3z + 5 + <small noise>
noise = np.random.uniform(-1, 1, (observations,1))

# Produce the targets according to the f(x,z) = 2x - 3z + 5 + noise definition.
# In this way, we are basically saying: the weights should be 2 and -3, while the bias is 5.
targets = 2*xs - 3*zs + 5 + noise

# Check the shape of the targets just in case. It should be n x m, where m is the number of output variables, so 1000 x 1.
print (targets.shape)
targets

(1000, 1)


array([[-22.25357091],
       [ 23.73791545],
       [ 38.77977599],
       [ 11.49277114],
       [ 22.782774  ],
       [ 27.77776074],
       [ 25.19590397],
       [ 22.86896419],
       [-12.4579918 ],
       [ -3.31957483],
       [ 13.63091897],
       [-14.96904747],
       [ -4.06093969],
       [-12.61933114],
       [  3.98530065],
       [-15.24294519],
       [  9.29515487],
       [  4.65132999],
       [ 28.03189694],
       [ -6.11439726],
       [-12.92803481],
       [ 22.92884142],
       [  2.6578507 ],
       [-12.65125659],
       [-15.60023223],
       [-14.37691609],
       [ -9.26500032],
       [ -5.46430329],
       [ 22.17713421],
       [ 39.86467287],
       [-36.23944074],
       [ 23.07129611],
       [ -0.2422759 ],
       [-14.19654282],
       [ 41.64821571],
       [  1.85065164],
       [ -6.56982326],
       [ 25.25581933],
       [-11.13351959],
       [-35.5662967 ],
       [  5.85645734],
       [-32.44204805],
       [-27.61295271],
       [ -0