### Напишіть функцію гіпотези лінійної регресії у векторному вигляді; створіть функцію для обчислення функції втрат у векторному вигляді; реалізуйте один крок градієнтного спуску;

In [5]:
import numpy as np
import pandas as pd

# Functions for linear regression
def hypothesis(X, theta):
    return np.dot(X, theta)

# Loss functions in vector form
def compute_cost(X, y, theta):
    m = len(y)  # number of training examples
    predictions = X.dot(theta)
    errors = predictions - y
    squared_errors = errors.T.dot(errors)
    cost = (1 / (2 * m)) * squared_errors
    return cost

# Perform one step of gradient descent
def gradient_descent_step(X, y, theta, alpha):
    m = len(y)
    errors = hypothesis(X, theta) - y
    gradient = (1 / m) * np.dot(X.T, errors)
    return theta - alpha * gradient

### Знайти найкращі параметри w⃗ для датасету використовуючи написані вами функції, прогнозуючу ціну на будинок залежно від площі, кількості ванних кімнат та кількості спалень;

In [6]:
# Loading data
housing_data = pd.read_csv('Housing.csv')
X = housing_data[['area', 'bedrooms', 'bathrooms']]
y = housing_data['price']

# Data normalization
X_normalized = (X - X.mean()) / X.std()
y_normalized = (y - y.mean()) / y.std()
X_normalized.insert(0, 'intercept', 1)

# Parameter initialization
theta = np.zeros(X_normalized.shape[1])
alpha = 0.01
iterations = 1000

# Gradient descent
for i in range(iterations, 0 ,-1):
    theta = gradient_descent_step(X_normalized.to_numpy(), y_normalized.to_numpy(), theta, alpha)
    if i % 100 == 0:
        cost = compute_cost(X_normalized.to_numpy(), y_normalized.to_numpy(), theta)
        print(f"Iteration {i}: Cost = {cost:.4f}, Theta = {theta}")

# Final output of values
print("Final theta values and their descriptions:")
print(f"Theta_0 (bias): {theta[0]:.2e}")
print(f"Theta_1 (coefficient for area): {theta[1]:.3f}")
print(f"Theta_2 (coefficient for number of bedrooms): {theta[2]:.3f}")
print(f"Theta_3 (coefficient for number of bathrooms): {theta[3]:.3f}")

Iteration 1000: Cost = 0.4923, Theta = [7.29284115e-19 5.35013864e-03 3.65821560e-03 5.16595715e-03]
Iteration 900: Cost = 0.2717, Theta = [1.58413547e-16 3.06131682e-01 1.68069716e-01 2.75000398e-01]
Iteration 800: Cost = 0.2576, Theta = [2.33448327e-16 3.95057516e-01 1.82787216e-01 3.40950977e-01]
Iteration 700: Cost = 0.2562, Theta = [2.63943811e-16 4.23728032e-01 1.75618349e-01 3.59588219e-01]
Iteration 600: Cost = 0.2560, Theta = [2.78513196e-16 4.33666698e-01 1.68950672e-01 3.66185057e-01]
Iteration 500: Cost = 0.2560, Theta = [2.77682057e-16 4.37288935e-01 1.64964762e-01 3.69100049e-01]
Iteration 400: Cost = 0.2560, Theta = [2.81797012e-16 4.38645790e-01 1.62826047e-01 3.70579497e-01]
Iteration 300: Cost = 0.2560, Theta = [2.81055505e-16 4.39158162e-01 1.61716131e-01 3.71376741e-01]
Iteration 200: Cost = 0.2560, Theta = [2.83744486e-16 4.39349948e-01 1.61144593e-01 3.71814374e-01]
Iteration 100: Cost = 0.2560, Theta = [2.89973957e-16 4.39419845e-01 1.60849765e-01 3.72055095e-01]

In [7]:
def predict_price(area, bedrooms, bathrooms, theta, X_mean, X_std, y_mean, y_std):
    
    # Normalizing the input data
    area_norm = (area - X_mean['area']) / X_std['area']
    bedrooms_norm = (bedrooms - X_mean['bedrooms']) / X_std['bedrooms']
    bathrooms_norm = (bathrooms - X_mean['bathrooms']) / X_std['bathrooms']

    # Creating the feature vector
    X_vector = np.array([1, area_norm, bedrooms_norm, bathrooms_norm])  # 1 for intercept

    # Predicting the normalized price
    predicted_norm_price = np.dot(X_vector, theta)

    # Denormalizing the predicted price (converted back to the actual price scale)
    predicted_price = (predicted_norm_price * y_std) + y_mean

    return predicted_price

# Example model parameters, means, and standard deviations
theta_example = np.array([0, 0.5, 0.2, 0.1])
X_mean = {'area': 2000, 'bedrooms': 3, 'bathrooms': 2}
X_std = {'area': 500, 'bedrooms': 1, 'bathrooms': 1}
y_mean = 500000
y_std = 150000

# Using the function to make a prediction
predicted_price = predict_price(7420, 4, 2, theta_example, X_mean, X_std, y_mean, y_std)
print(f"Predicted House Price: ${predicted_price:.2f}")

Predicted House Price: $1343000.00


### знайдіть ці ж параметри за допомогою аналітичного рішення;

In [8]:
# Calculation of parameters using normal equations
X_transpose = X_normalized.to_numpy().T
theta_analytical = np.linalg.inv(X_transpose.dot(X_normalized.to_numpy())).dot(X_transpose).dot(y_normalized.to_numpy())

# Display of final parameter values with analytical solution descriptions
print("Final theta values and their descriptions from the analytical solution:")
print(f"Theta_0 (bias): {theta_analytical[0]:.2e}")
print(f"Theta_1 (coefficient for area): {theta_analytical[1]:.3f}")
print(f"Theta_2 (coefficient for number of bedrooms): {theta_analytical[2]:.3f}")
print(f"Theta_3 (coefficient for number of bathrooms): {theta_analytical[3]:.3f}")


Final theta values and their descriptions from the analytical solution:
Theta_0 (bias): 2.40e-16
Theta_1 (coefficient for area): 0.439
Theta_2 (coefficient for number of bedrooms): 0.161
Theta_3 (coefficient for number of bathrooms): 0.372


### для перевірки спрогнозованих значень, використайте LinearRegression з бібліотеки scikit-learn та порівняйте результати

In [9]:
from sklearn.linear_model import LinearRegression

# Creating and training the LinearRegression model
model = LinearRegression()
model.fit(X_normalized, y_normalized)

# Output of coefficients
sklearn_theta = [model.intercept_] + list(model.coef_[1:])  # intercept + slopes for area, bedrooms, bathrooms

# Display of parameter values obtained using scikit-learn
print("Theta values from scikit-learn LinearRegression:")
print(f"Theta_0 (bias): {sklearn_theta[0]:.2e}")
print(f"Theta_1 (coefficient for area): {sklearn_theta[1]:.3f}")
print(f"Theta_2 (coefficient for number of bedrooms): {sklearn_theta[2]:.3f}")
print(f"Theta_3 (coefficient for number of bathrooms): {sklearn_theta[3]:.3f}")

Theta values from scikit-learn LinearRegression:
Theta_0 (bias): 3.11e-16
Theta_1 (coefficient for area): 0.439
Theta_2 (coefficient for number of bedrooms): 0.161
Theta_3 (coefficient for number of bathrooms): 0.372
