In [1]:
import numpy as np

## Initialization

Intialize a numpy array using custom integers

In [2]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [3]:
a.shape # 1-D array with 3 elements

(3,)

Intialize a numpy array using zeroes

In [16]:
a = np.zeros(3)
a

array([0., 0., 0.])

Intialize a numpy array using random numbers

In [18]:
a = np.random.rand(4)
a

array([0.79903439, 0.68808781, 0.60346174, 0.76108166])

Intialize a numpy array using range

In [20]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

#### 1-D array

In [53]:
w = np.array([2])
print(w)
w.shape # 1-D array with 1 element

[2]


(1,)

#### 2-D array

In [54]:
X = np.array([[1],[2],[3],[4]])  # 2-D array with 4 rows & 1 column
print(X)
X.shape # (4, 1) --> # An array of 4 arrays each holding just 1 element inside.

[[1]
 [2]
 [3]
 [4]]


(4, 1)

In [56]:
a = np.zeros((2, 5))    
print(a)
print(a.shape)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
(2, 5)


In [62]:
X = np.array([[1],[2],[3],[4]])
X.shape

(4, 1)

In [63]:
X = np.array([1,2,3,4])
X.shape

(4,)

# Indexing

In [22]:
# Indexing an 1-D array
a = np.arange(5)
print(a)
print(a[0]) # first element
print(a[-1]) # last element

[0 1 2 3 4]
0
4


In [69]:
# Indexing a 2-D array
a = np.array([[1,2],[5,6],[3,4],[7,8]]) 
print(a)
print(a[1])
print(a[1,1])
print(a[-1,0]) # last element's first item

[[1 2]
 [5 6]
 [3 4]
 [7 8]]
[5 6]
6
7


## Slicing

Extract a subset of an array

In [29]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
a[2:10:1]

array([2, 3, 4, 5, 6, 7, 8, 9])

In [31]:
a[2::2]

array([2, 4, 6, 8])

## Vector Operations

In [32]:
a = np.array([1,2,3,4])
b = -a
print(b)

# sum all elements of a, returns a scalar
b = np.sum(a) 
print(b) # scalar

# mean
b = np.mean(a)
print(b) # scalar

# square each item in 'a' array
b = a**2
print(b) # vector

[-1 -2 -3 -4]
10
2.5
[ 1  4  9 16]


## Vector Vector element-wise operations

In [35]:
a = np.array([1,2,3,4])
b = np.array([2,4,6,8])

In [36]:
c = a+b
print(c)

[ 3  6  9 12]


## Scalar Vector element-wise operations

In [37]:
a = np.array([1,2,3,4])
c = 5*a
c

array([ 5, 10, 15, 20])

## Dot product using a *for* loop

$$ \mathbf{a} + \mathbf{b} = \sum_{i=0}^{n-1} a_i + b_i $$

In [50]:
sum = 0
for i in range(len(a)):
    sum += a[i] * b[i]
print(sum)

14


## Dot product using a Vector Vector Dot Product

In [51]:
a = np.array([1,2,3,4])
b = np.array([1,1,1,2])

c = np.dot(a, b)
print(c)
print(c.shape) # scalar value

14
()


## Try-Except block

In [61]:
# error if dimensions are not same
a = np.array([1,2,3,4])
b = np.array([[1],[1],[1]])
try:
    c = np.dot(a, b)
    print(c)
except Exception as e:
    print("The error message you'll see is:")
    print(e)

The error message you'll see is:
shapes (4,) and (3,1) not aligned: 4 (dim 0) != 3 (dim 0)


## Convert 1-D array to 2-D Matrices using ***reshape*** function

In [71]:
a = np.arange(6).reshape(3, 2) # 3 denotes no. of rows and 2 represents no. of columns
print(a)

[[0 1]
 [2 3]
 [4 5]]


NOTE: In reshape, you can also use -1 to denote a to-be-inferred value.

In [72]:
# The -1 argument tells the routine to compute the number of rows given the size of the array and the number of columns.
np.reshape(a, (-1, 2))
# -1 is an unspecified value inferred to be 3 here (for 6 elements, infer the no. of rows required when no. of columns = 2)

array([[0, 1],
       [2, 3],
       [4, 5]])

## Slicing a 2-D array

In [80]:
# vector 2-D slicing operations
a = np.arange(20).reshape(-1, 10)
print(f"a = \n{a}")

# access 5 consecutive elements (start:stop:step)
print(a[0, :5])

# access 5 consecutive elements (start:stop:step) in two rows
print(a[:, :5])

# access all elements
print(a[:, :])

# access all elements in one row (very common usage)
print(a[1, :])

# same as
print(a[1])

a = 
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
[0 1 2 3 4]
[[ 0  1  2  3  4]
 [10 11 12 13 14]]
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
[10 11 12 13 14 15 16 17 18 19]
[10 11 12 13 14 15 16 17 18 19]


# Multiple Linear Regression

| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|-------------- |  
| 2104            | 5                   | 1                | 45           | 460           |  
| 1416            | 3                   | 2                | 40           | 232           |  
| 852             | 2                   | 1                | 35           | 178           | 

$$\mathbf{X} = 
\begin{pmatrix}
 x^{(0)}_0 & x^{(0)}_1 & \cdots & x^{(0)}_{n-1} \\ 
 x^{(1)}_0 & x^{(1)}_1 & \cdots & x^{(1)}_{n-1} \\
 \cdots \\
 x^{(m-1)}_0 & x^{(m-1)}_1 & \cdots & x^{(m-1)}_{n-1} 
\end{pmatrix}
$$

In [6]:
X_train = np.array([[2104, 5, 1, 45], 
                    [1416, 3, 2, 40], 
                    [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

$$\mathbf{w} = \begin{pmatrix}
w_0 \\ 
w_1 \\
\cdots\\
w_{n-1}
\end{pmatrix}
$$
And $b$ is a scalar parameter. 

## Gradient Descent for Multiple Linear Regression

The model's prediction with multiple variables is given by the linear model:

$$ f_{\mathbf{w},b}(\mathbf{x}) =  w_0x_0 + w_1x_1 +... + w_{n-1}x_{n-1} + b $$
or in vector notation:
$$ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + b $$ 

Gradient descent for multiple variables:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j}  \; & \text{for j = 0 to n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously (until convergence) and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})
\end{align}
$$

NOTE: The derivative dj\dw is also a 1-D vector of same shape as the w vector.

In [7]:
def compute_gradient(X, y, w, b): 
    m,n = X.shape #(number of records, number of features)
    dj_dw = np.zeros((n,)) # derivative is a 1-D vector
    dj_db = 0.

    # For each row of data x[i] and y[i],
    for i in range(m):                             
        err = (np.dot(X[i], w) + b) - y[i] 
        # For dj/dw,
        # Now for each column,
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * X[i, j]    
        
        # For dj/db,
        dj_db = dj_db + err                        
    
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m                                
        
    return dj_db, dj_dw

In [11]:
def gradient_descent(X, y, w_in, b_in, compute_gradient, alpha, num_iters): 
    w = w_in  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = compute_gradient(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient (VECTOR calculation)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        
    return w, b

In [22]:
# initialize parameters
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])
# return an array of 0s with the same shape and type as a given array
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 50000
alpha = 5.0e-7
# run gradient descent
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b,
                                            compute_gradient,
                                            alpha, iterations)
print(f"w and b found by gradient descent: {w_final}, {b_final:0.2f}")

m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

w and b found by gradient descent: [ 0.23874923  0.14936437 -0.46194258 -1.43812571], -0.04
prediction: 437.85, target value: 460
prediction: 280.02, target value: 232
prediction: 152.87, target value: 178
