In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline

Lets load the Boston House Pricing Dataset

In [None]:
from sklearn.datasets import fetch_california_housing


In [None]:
boston_df=fetch_california_housing()

In [None]:
boston_df.keys()

In [None]:
## Lets check the description of the dataset
print(boston_df.DESCR)

In [None]:
print(boston_df.data)

In [None]:
print(boston_df.target)

In [None]:
print(boston_df.feature_names)

# Preparing The Dataset

In [None]:
dataset=pd.DataFrame(boston_df.data,columns=boston_df.feature_names)

In [None]:
dataset.head()

In [None]:
dataset['price']=boston_df.target

In [None]:
dataset.head()

In [None]:
dataset.info()

Summarizing The States of the data

In [None]:
dataset.describe()

In [None]:
## Check the missing values
dataset.isnull().sum()

In [None]:
### Exploratory Data Analysis
## Correlation

dataset.corr()

In [None]:
import seaborn as sns
sns.pairplot(dataset)

In [None]:
plt.scatter(dataset['MedInc'], dataset['price'])
plt.xlabel("Crime Rate")
plt.ylabel("Price")

In [None]:
plt.scatter(dataset['HouseAge'], dataset['price'])
plt.xlabel("RM")
plt.ylabel("Price")

In [None]:
import seaborn as sns
sns.regplot(x="HouseAge", y="price", data=dataset)

In [None]:
sns.regplot(x="HouseAge", y="Population", data=dataset)

In [None]:
sns.regplot(x="AveOccup", y="price", data=dataset)

In [None]:
## Independent and Dependent features

x = dataset.iloc[:,:-1]
y = dataset.iloc[:,-1]

In [None]:
x.head()

In [None]:
y

In [None]:
## Train Test Split

from sklearn.model_selection import train_test_split
x_train,x_test,y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
x_train

In [None]:
x_test

In [None]:
## Standardize the dataset

from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [None]:
x_train=scaler.fit_transform(x_train)

In [None]:
x_train=scaler.transform(x_test)

In [None]:
x_train

Model Training

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)


In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
regression = LinearRegression()
regression.fit(x_train,y_train)

In [None]:
## print the cofficients and the intercept 
print(regression.coef_)

In [None]:
print(x_train.shape)
print(y_train.shape)


In [None]:
print(regression.intercept_)

In [None]:
## On which parameters the model has been trained
regression.get_params()

In [None]:
### Predicaton with Test Data
reg_pred = regression.predict(x_test)

In [None]:
reg_pred

In [None]:
## Plot a scatter for predication
plt.scatter(y_test,reg_pred)

In [None]:
## Residuals
residuals = y_test-reg_pred

In [None]:
residuals

In [None]:
## Plot this residuals

sns.displot(residuals,kind="kde")


In [None]:
## Scatter plot with respect to predication and residuals
## uniform distribution

plt.scatter(reg_pred,residuals)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


print(mean_absolute_error(y_test,reg_pred))
print(mean_squared_error(y_test,reg_pred))
print(np.sqrt(mean_squared_error(y_test,reg_pred)))

R Square and adjusted R square
R^2=1 SSR/SST

R^2 = coefficient of determination SSR = sum of squares of residuals SST = total sum of square 

In [None]:
from sklearn.metrics import r2_score
score=r2_score(y_test,reg_pred)
print(score)

In [None]:
# Display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-x_test.shape[1]-1)

Adjusted R2 = 1 - [(1-R2)*(n-1)(n-k-1)]
where:

R2: The R2 the model n: The number of observation K: The number of predicator variable

# New Data Predication

In [None]:
boston_df.data[0].reshape(1,-1)

In [None]:
## transformation of new data

scaler.transform(boston_df.data[0].reshape(1,-1))

In [None]:
regression.predict(scaler.transform(boston_df.data[0].reshape(1,-1)))

# Pickling The Model file For Deployment

In [None]:
import pickle

In [None]:
pickle.dump(regression,open('regmodel.pkl','wb'))

In [None]:
pickle_model = pickle.load(open('regmodel.pkl', 'rb'))

In [None]:
## Predication
pickle_model.predict(scaler.transform(boston_df.data[0].reshape(1,-1)))