In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('Housing.csv')

In [3]:
data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [11]:
def mapp_furnisher_status(x):
    if x == "unfurnished":
        return 1
    elif x == "semi-furnished":
        return 2
    elif x == "furnished":
        return 3
    else:
        return x
def prepare_data(data):
    data[['mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea']] = data[['mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea']].applymap(lambda x:1 if x == "yes" else 0)
    data['furnishingstatus'] = data['furnishingstatus'].apply(mapp_furnisher_status)
    X = data.iloc[:,1:]
    # print(data.to_markdown())
    X['area'] = X['area']/np.linalg.norm(X['area'])
    m,n_x = X.shape
    X = X.T.to_numpy().reshape(n_x,m)
    Y = data.iloc[:,0]
    Y = Y/np.linalg.norm(Y)
    Y = Y.T.to_numpy().reshape(1,m)
    return X,Y

In [12]:
X,Y = prepare_data(data)

In [8]:
# X = data['area'].T.to_numpy().reshape(1,len(data))
# X = X/np.linalg.norm(X)

In [120]:
# Y = data['price'].to_numpy().reshape(1,len(data))
# Y = Y/np.linalg.norm(Y)

In [13]:
X.shape,Y.shape

((12, 545), (1, 545))

In [34]:
np.random.seed(1)
np.random.randn(1)

array([1.62434536])

In [46]:
def initialize_params(n_x,m):
    W = np.random.randn(n_x,1)
    b = np.zeros(1).reshape(1,1)
    parameters = {"W":W,"b":b}
    return parameters

In [30]:
def forward_propagation(X,parameters):
    W = parameters["W"]
    b = parameters["b"]
    Y_pred = np.dot(W.T,X) + b
    return Y_pred

In [20]:
def compute_cost(Y_pred,Y):
    m = Y_pred.shape[1]
    cost = (1/2*m) * np.sum(np.square(Y_pred-Y))
    cost = np.squeeze(cost)
    return cost

In [47]:
def backward_propagation(parameters,Y_pred,X,Y):
    m = X.shape[1]
    W = parameters["W"]
    b = parameters["b"]
    dW = (1/m) * (np.dot(X,(Y_pred-Y).T))
    db = (1/m) * np.sum((Y_pred-Y),axis=1,keepdims=True)
    # db = db.reshape(1,m)
    grads = {"dW":dW,"db":db}
    return grads

In [53]:
def update_parameters(parameters,grads, learning_rate=0.1):
    W = parameters["W"]
    b = parameters["b"]
    dW = grads["dW"]
    db = grads["db"]
    W = W - (learning_rate)*dW
    b = b - (learning_rate)*db
    parameters = {"W":W, "b":b}
    return parameters

In [22]:
n_x = X.shape[0]
m = X.shape[1]
print(n_x,m)

12 545


In [38]:
def linear_regression(X,Y,num_iterations=10000):
    np.random.seed(1111)
    n_x = X.shape[0]
    m = X.shape[1]
    parameters = initialize_params(n_x,m)
    for iter in range(num_iterations):
        Y_pred = forward_propagation(X,parameters)
        cost = compute_cost(Y_pred,Y)
        grads = backward_propagation(parameters,Y_pred,X,Y)
        parameters = update_parameters(parameters,grads)
        if iter % 1000 == 0:
            print(f"Cost after iteration {iter}: {cost}")
    return parameters

In [55]:
linear_regressor_params = linear_regression(X,Y,num_iterations=80000)

Cost after iteration 0: 3585941.1573026725
Cost after iteration 1000: 105.00528414272627
Cost after iteration 2000: 100.88026242589945
Cost after iteration 3000: 96.9755418911394
Cost after iteration 4000: 93.24891940043572
Cost after iteration 5000: 89.6922650396361
Cost after iteration 6000: 86.2978267032638
Cost after iteration 7000: 83.05820585440358
Cost after iteration 8000: 79.96634139720913
Cost after iteration 9000: 77.0154942864965
Cost after iteration 10000: 74.1992328392847
Cost after iteration 11000: 71.5114187162629
Cost after iteration 12000: 68.94619354263148
Cost after iteration 13000: 66.49796613915541
Cost after iteration 14000: 64.161400335598
Cost after iteration 15000: 61.93140333997517
Cost after iteration 16000: 59.803114638276
Cost after iteration 17000: 57.771895400460515
Cost after iteration 18000: 55.83331836963869
Cost after iteration 19000: 53.98315821239653
Cost after iteration 20000: 52.21738230923572
Cost after iteration 21000: 50.532141965053086
Cost a

In [41]:
linear_regressor_params['W'].shape

(12, 1)

In [56]:
linear_regressor_params['b'].shape

(1, 1)

In [57]:
linear_regressor_params

{'W': array([[ 0.09341583],
        [ 0.00159681],
        [ 0.01005289],
        [ 0.00447511],
        [ 1.50182182],
        [-0.02392945],
        [ 0.27594888],
        [-0.96639308],
        [-1.58031468],
        [ 0.00438206],
        [-1.54766708],
        [ 0.00335146]]),
 'b': array([[0.00093993]])}

In [17]:
x = np.array([0.1])

In [18]:
x

array([0.1])

In [19]:
np.squeeze(x)

array(0.1)