In [64]:
import numpy as np 

# training data : 

| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|-------------- |  
| 2104            | 5                   | 1                | 45           | 460           |  
| 1416            | 3                   | 2                | 40           | 232           |  
| 852             | 2                   | 1                | 35           | 178           |  



In [65]:
# Training data: each row represents one house
# Features (columns):
# [0] Size in square feet
# [1] Number of bedrooms
# [2] Number of floors
# [3] Age of home in years
x_train = np.array([[2104, 5, 1, 45],
                    [1416, 3, 2, 40],
                    [852, 2, 1, 35]])

# Target values: house prices in $1000's
y_train = np.array([460, 232, 178])

print(x_train)

# Print the shape of the input feature matrix (m examples, n features)
print(x_train.shape)

print(y_train)

# Print the shape of the output vector
print(y_train.shape)


[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
(3, 4)
[460 232 178]
(3,)


In [66]:
# Initialize model parameters 

# Initial bias term 
b_init = 785.1811367994083

# Initial weights for each feature:
# [0] Size in square feet
# [1] Number of bedrooms
# [2] Number of floors
# [3] Age of home in years
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

print(w_init.shape)


# Number of training examples (rows in x_train)
m = x_train.shape[0]


(4,)


# model prediction :
$$ f_{\mathbf{w},b}(\mathbf{x}) =  w_0x_0 + w_1x_1 +... + w_{n-1}x_{n-1} + b \tag{1}$$
or in vector notation:
$$ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + b  \tag{2} $$ 


In [67]:
# Model prediction from scratch using linear regression

def linear_regression_function(w, x, b):
    """    
    Args:
        x (ndarray): Shape (n,) example with multiple features
        w (ndarray): Shape (n,) model weights (parameters)    
        b (scalar): Bias term (intercept)    

    Returns:
        final_f (scalar): Predicted price
    """
    n = x.shape[0]  # Number of features
    f = 0         
    for i in range(n):
        f += x[i] * w[i]  
    final_f = f + b       
    return final_f        


In [68]:
# Example: choose the first training sample with 4 features
x_sample = x_train[0, 0:4]  

# Call the prediction function with initial weights and bias
price = linear_regression_function(w_init, x_sample, b_init)

# Output the prediction in a readable format
print(f"The price for a house with {x_train[0][0]} sqft, {x_train[0][1]} bedrooms, "
      f"{x_train[0][2]} floors, and {x_train[0][3]} years old is: ${round(price)}")

The price for a house with 2104 sqft, 5 bedrooms, 1 floors, and 45 years old is: $460


In [69]:
# model prediction using np.dot() :
def linear_regression_function2(w,x,b):
    """
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters   
      b (scalar):             model parameter 
      
    Returns:
        (scalar):  price
    """
    return  np.dot(x, w) + b

In [70]:
# Example: choose the first training sample with 4 features
x_sample = x_train[0, 0:4]  

# Call the prediction function with initial weights and bias
price = linear_regression_function2(w_init, x_sample, b_init)

# Output the prediction in a readable format
print(f"The price for a house with {x_train[0][0]} sqft, {x_train[0][1]} bedrooms, "
      f"{x_train[0][2]} floors, and {x_train[0][3]} years old is: ${round(price)}")

The price for a house with 2104 sqft, 5 bedrooms, 1 floors, and 45 years old is: $460


# cost function 

$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \tag{3}$$ 
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)} + b  \tag{4} $$ 


In [71]:
# cost function 
def cost_function (w,x,y,b):
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    cost = 0 
    for i in range(m): 
        y_hat = np.dot(x[i], w) + b
        cost += (y_hat - y[i])**2
    final_cost = (1 / (2 * m)) * cost 
    return final_cost

In [72]:
cost =cost_function(w_init,x_train,y_train,b_init)
print(cost)

1.5578904330213735e-12



$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{6}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{7}
\end{align}
$$


In [81]:
def compute_gradient(X, y, w, b): 
    m, n = X.shape  # X is of shape (m examples, n features)

    # Initialize gradients for weights and bias
    dj_dw = np.zeros((n,))  
    dj_db = 0.              

    # Loop over each training example
    for i in range(m):                             
        # Compute the error between prediction and actual value
        cost = (np.dot(X[i], w) + b) - y[i]   

        # Accumulate the gradient for each weight
        for j in range(n):                         
            dj_dw[j] += cost * X[i, j]  

        # Accumulate the gradient for the bias
        dj_db += cost                        

    dj_dw = dj_dw / m                              
    dj_db = dj_db / m                              

 
    return dj_db, dj_dw


In [82]:
# testing gradient derivative :
dj_db, dj_dw =compute_gradient(x_train,y_train,w_init,b_init)
print(dj_db, dj_dw)

-1.6739251122999121e-06 [-2.72623577e-03 -6.27197263e-06 -2.21745571e-06 -6.92403379e-05]
