In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib as plt

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
## функция гипотезы
def price_prediction(A,x): ## A coefficients calculated based on the dataset
    return np.dot(A,x)     ## x - weights

In [6]:
### mse = (1/2m)Sum(y_hypo_i - yi)**2
def mean_squared_error(actual_y,assumed_y):
    m = len(actual_y)
    return np.sum((actual_y-assumed_y)**2)/(2*m)

In [7]:
#### make a one step of the gradient descent
## w = wprev - alpha (1/m) Sum(h(xi) - yi)xi, i{1,m})
def gradient_step(X, y, w, learning_rate):
    m = len(y)
    alpha = learning_rate
    grad = (X.T @ (price_prediction(X, w) - y)) / m
    w -= alpha * grad
    return w

In [8]:
def gradient_descent(X0, y, learning_rate, num_iter, eps):
    ones = np.ones((X0.shape[0], 1))
    X = np.hstack((ones, X0))
    w = np.zeros(X.shape[1])  # Initialize weights

    assumed_y = price_prediction(X, w)  
    loss = mean_squared_error(y, assumed_y)
    loss_history = [loss]

    for _ in range(num_iter):
        w = gradient_step(X, y, w, learning_rate)
        assumed_y = price_prediction(X, w)
        loss = mean_squared_error(y, assumed_y)
        if abs(loss - loss_history[-1]) < eps:
            loss_history.append(loss)
            break

        loss_history.append(loss)

    return w, loss_history

In [9]:
# read data and normilize it
df = pd.read_csv("Housing.csv")
norm = df.copy()
columns = ['price', 'area', 'bedrooms', 'bathrooms']
for column in columns[1:]:
    norm[column] = (df[column] - df[column].mean()) / df[column].std()


# design matrix X and target values y
X = norm[['area', 'bedrooms', 'bathrooms']].values
y = norm['price'].values

# for gradient descent
learning_rate = 0.001
num_iter = 100000
eps = 1e-12

# # gradient descent
w_gradient, loss_history = gradient_descent(X, y, learning_rate, num_iter, eps)
print('           Gradient Descent - w:', w_gradient)

# analytical calculation
ones = np.ones((X.shape[0], 1))
X = np.hstack((ones, X))

# w_analytical = np.linalg.pinv(X.T @ X) @ X.T @ y
w_analytical = np.linalg.pinv(X) @ y
print('Analytical Solution - Optimal w:', w_analytical)

## using pandas solvers
regressor = LinearRegression().fit(X,y)
regressor.coef_[0] = y.mean()
print('    Pandas Solution -         w:', regressor.coef_)


           Gradient Descent - w: [4766729.23694497  821968.67120246  300259.79711489  696447.09123892]
Analytical Solution - Optimal w: [4766729.24770642  821968.58935343  300259.16468032  696447.75898579]
    Pandas Solution -         w: [4766729.24770642  821968.58935343  300259.16468032  696447.75898579]
