# Import Libraries

In [173]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [172]:
SEED = 42

# Linear Regression

Функція гіпотези лінійної регресії у векторному вигляді

In [156]:
def h(x, w):
    return np.dot(x, w)

Функція для обчислення функції втрат у векторному вигляді

In [157]:
def loss_function(w, x, y_true): 
    m = len(x)
    y_pred = h(x, w)
    cost = np.sum((y_pred - y_true) ** 2)
    return cost/(2*m)

Крок градієнтного спуску

In [158]:
def gradient_descent_step(w, x, y_true, learning_rate=0.00000001):
    m = len(x)
    y_pred = h(x, w)
    gradient = np.dot(x.T, (y_pred - y_true)) / m
    w -= learning_rate * gradient
    return w 

Пошук коефіцієнтів w

In [159]:
df = pd.read_csv('Data\\Housing.csv')

In [160]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [161]:
df.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [162]:
x = df[['area', 'bathrooms', 'bedrooms']].values
ones_column = np.ones((X.shape[0], 1)).astype(int)
Y = df['price'].values

In [163]:
max_iterations = 200
stopping_thershold = 0.000001
X = np.hstack((ones_column, x))
weight = np.zeros(X.shape[1])
previuos_loss = None
history_loss = []

for i in range(max_iterations):
    weight = gradient_descent_step(weight, X, Y)
    loss = loss_function(weight, X, Y)

    if  previuos_loss and abs(previuos_loss - loss) < stopping_thershold:
        break

    previuos_loss = loss
    history_loss.append(loss)


print("coef:", weight[1:])
print("intercept:", weight[0])

coef: [855.70789481   1.72174462   3.15074722]
intercept: 0.8598483198127763


Ці ж коефіцієнти за допомогою аналітичного рішення

In [164]:
weight_analitic = np.linalg.inv(np.dot(X.T, X)).dot(X.T).dot(Y)
print("coef:", weight_analitic[1:])
print("intercept:", weight_analitic[0])

coef: [3.78762754e+02 1.38604950e+06 4.06820034e+05]
intercept: -173171.60763262442


## sklearn LinearRegression

In [165]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [169]:
dict_model ={
    'regressor': LinearRegression(),
    'ridge': Ridge(),
    'lasso': Lasso()
}

In [170]:
for name, model in dict_model.items():
    model.fit(x, Y)
    print(f'Model: {name}')
    print(f"coef: {model.coef_}")
    print(f"intercept: {model.intercept_}")

Model: regressor
coef: [3.78762754e+02 1.38604950e+06 4.06820034e+05]
intercept: -173171.60763263796
Model: ridge
coef: [3.79194636e+02 1.37499319e+06 4.08064766e+05]
intercept: -164865.78649543412
Model: lasso
coef: [3.78762967e+02 1.38604591e+06 4.06819014e+05]
intercept: -173165.06378573086


## Result

In [171]:
print(f'y_true = {Y[0]}')
print(f'y_gradient = {h(weight, X[0])}')
print(f'y_analitic = {h(weight_analitic, X[0])}')
for name, model in dict_model.items():
    print(f"y_{name}: {model.predict(x[0].reshape(1, -1))}")

y_true = 13300000
y_gradient = 6349369.48583733
y_analitic = 7036627.15462758
y_regressor: [7036627.15462756]
y_ridge: [7031003.85257466]
y_lasso: [7036624.02039613]
