## Normal Equation + Gradient Descent w/ Boston Housing Prediction Dataset

### Normal Equation 

In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("fedesoriano/the-boston-houseprice-data")

import os
import pandas as pd
file_path = os.path.join(path, "boston.csv")

df = pd.read_csv(file_path)
print(df.head())
print('---')
print(df.columns)

  from .autonotebook import tqdm as notebook_tqdm


      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296.0   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242.0   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242.0   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222.0   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222.0   

   PTRATIO       B  LSTAT  MEDV  
0     15.3  396.90   4.98  24.0  
1     17.8  396.90   9.14  21.6  
2     17.8  392.83   4.03  34.7  
3     18.7  394.63   2.94  33.4  
4     18.7  396.90   5.33  36.2  
---
Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'MEDV'],
      dtype='object')


### Import Packages

In [24]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Stochastic Gradient Descent Regressor
from sklearn.linear_model import SGDRegressor

from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge, RidgeCV

In [11]:
data = df.iloc[:,0:13]
target = df.iloc[:,13:14]
print(data.shape)
print(target.shape)

(506, 13)
(506, 1)


### Model Preprocessing

In [15]:
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size = 0.2, random_state = 42)
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

### Model Training

In [None]:
estimator = LinearRegression(fit_intercept = True)
estimator.fit(x_train, y_train)
print(f'weights: {estimator.coef_}')
print('---')
print(f'intercept: {estimator.intercept_}')

weights: [[-1.00213533  0.69626862  0.27806485  0.7187384  -2.0223194   3.14523956
  -0.17604788 -3.0819076   2.25140666 -1.76701378 -2.03775151  1.12956831
  -3.61165842]]
---
intercept: [22.79653465]


In [25]:
y_pred = estimator.predict(x_test)
print(f'MSE = {mean_squared_error(y_test, y_pred)}')
print(f'RMSE = {root_mean_squared_error(y_test, y_pred)}')
print(f'MAE = {mean_absolute_error(y_test, y_pred)}')

MSE = 24.291119474973527
RMSE = 4.928602182665338
MAE = 3.1890919658878496


### Gradient Descent 

In [27]:
estimator = SGDRegressor(fit_intercept = True, learning_rate = 'constant', eta0 = 0.01)
estimator.fit(x_train, y_train)
print(f'weights = {estimator.coef_}')
print('---')
print(f'intercept = {estimator.intercept_}')

weights = [-0.89009338  0.14838013  0.63288643  0.03047774 -1.8098546   2.60250162
  0.0712525  -3.400862    1.99180031 -1.67310565 -2.09901502  1.10381436
 -3.39767522]
---
intercept = [23.03318383]


  y = column_or_1d(y, warn=True)
