In [None]:
!python --version

In [None]:
# Import fundamental packages for scientific computing
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Import load_boston() function
from sklearn.datasets import load_boston

In [None]:
# Load required functions from skearn library
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Print Python library versions
print('numpy version: {}'.format(np.__version__))
print('pandas version: {}'.format(pd.__version__))
print('seaborn version: {}'.format(sns.__version__))

In [None]:
# Load_boston dataset will return bunch object containing all the required information about boston housing dataset
boston = load_boston(return_X_y=False)

In [None]:
# Print the type of above loaded object
type(boston)

In [None]:
# Display all the available keys
boston.keys()

In [None]:
# Print descriptive information about boston housing 
print(boston.DESCR)

In [None]:
# Print all the feature names of boston dataset
print(boston.feature_names)

In [None]:
columns_list = list(boston.feature_names)
columns_list.append('MEDV')

In [None]:
# Create Pandas dataframe from objects loaded bunch object 'boston'
boston_df = pd.DataFrame(data = np.c_[boston.data, boston.target], columns = columns_list)

In [None]:
# Print the number of rows and columns in the boston housing dataframe
print('Number of rows: {}'.format(boston_df.shape[0]))
print('Number of columns: {}'.format(boston_df.shape[1]))

In [None]:
# Print top 5 records of created dataframe
boston_df.head(n=5)

In [None]:
boston_df.describe(include='all')

In [None]:
boston_df.info()

As per above output, there are no missing values in any of the attribtutes. Good!!

In [None]:
# Create dataframe of correlation matrix
corr = boston_df.corr()

In [None]:
# Plot the correlation matrix
fig = plt.figure(figsize=(16,16))
sns.heatmap(corr, annot=True)
plt.show()

In [None]:
#sns.set_theme(style='ticks')
#sns.pairplot(data=boston_df)
#plt.show()

In [None]:
# Plot the boxplot of target variable MEDV
fig = plt.figure(figsize=(15,7))
sns.boxplot(x = boston_df['MEDV'], width=0.5,palette='Set3')
plt.show()

In [None]:
X = boston.data
Y = boston.target

In [None]:
# Split the data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=4)

In [None]:
# Print number of observations in training datset
print('Rows in Training datset: {}'.format(X_train.shape[0]))

In [None]:
# Print number of observations in test datset
print('Rows in Test datset: {}'.format(X_test.shape[0]))

In [None]:
# Instantiate object of LinearRegression class
model = LinearRegression(fit_intercept=True)  

In [None]:
# Fit the Least Square Regression model to the training data
model.fit(X_train, Y_train)

In [None]:
# Model intercept
print(model.intercept_)

In [None]:
# Estimated coefficients
print(model.coef_)

**Estimated linear model equation**
#### Y ~ 35.55 - 0.115 * CRIM + 0.0471 * ZN + 0.008 * INDUS +  3.234 * CHAS - 16.686 * NOX + 3.884 * RM-0.010 * AGE - 1.541 * DIS + 0.293 * RAD - 0.013 * TAX - 0.0906 * PTRATIO + 0.0088 * B - 0.457 * LSTAT

In [None]:
# Predict the median housing values for test data
Y_pred = model.predict(X_test)

In [None]:
# Calculate residuals
residuals = Y_test - Y_pred
print(residuals)

In [None]:
model.score(X_train, Y_train)

In [None]:
model.score(X_test, Y_test)

In [None]:
# Calculate & print Mean Squared Error (MSE) & Root Mean Squared Error (RMSE)
MSE = mean_squared_error(Y_test, Y_pred)
RMSE = math.sqrt(MSE)
print('Mean Squared Error (MSE): {0:.2f}'.format(MSE))
print('Root Mean Squared Error (MSE): {0:.2f}'.format(RMSE))

In [None]:
# Calculate coefficient of determination (R²)
r2_test = r2_score(Y_test, Y_pred)
print('Coefficient of determination: {0:.2f}'.format(r2_test))

In [None]:
model = LinearRegression(fit_intercept=True)

In [None]:
mse = cross_val_score(model, X_train, Y_train, scoring='neg_mean_squared_error', cv = 10)

In [None]:
print(np.mean(mse))

## Fitting Lasso regularized regression model

In [None]:
lasso_rig = Lasso(fit_intercept=True, alpha=0.5, max_iter = 1000, tol=0.001)

In [None]:
model = lasso_rig.fit(X_train, Y_train)

In [None]:
model.score(X_train, Y_train)

In [None]:
model.score(X_test, Y_test)

## Fitting Ridge regularized regression model

In [None]:
ridge_rig = Ridge(fit_intercept=True, alpha=0.5, max_iter = 1000, tol=0.001)

In [None]:
model = ridge_rig.fit(X_train, Y_train)

In [None]:
model.score(X_train, Y_train)

In [None]:
model.score(X_test, Y_test)

## Fitting Linear Regression model after standardizing features

In [None]:
sc_X = StandardScaler()

In [None]:
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [None]:
model = LinearRegression(fit_intercept=True)

In [None]:
model.fit(X_train, Y_train)

In [None]:
model.score(X_train, Y_train)

In [None]:
model.score(X_test, Y_test)