# Simple Linear Regression and ML implementation

By Andan M
1. GitHub: https://github.com/andanm 
2. LinkedIn: https://www.linkedin.com/in/andan-m-854bb824a/

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [None]:
from sklearn.datasets import load_boston

In [None]:
boston=load_boston()

In [None]:
print(boston)

In [None]:
boston.keys()

In [None]:
print(boston.DESCR)

In [None]:
print(boston.data)

In [None]:
print(boston.target)

In [None]:
print(boston.feature_names)

In [None]:
#Lets prepare the dataframe
df=pd.DataFrame(boston.data,columns=boston.feature_names)

In [None]:
df.head()

In [None]:
df["Price"]=boston.target

In [None]:
df.head()

In [None]:
#Checking the Info of dataset
df.info()

In [None]:
df.describe()

In [None]:
##Checking for missing values
df.isnull().sum()

Observation: There are no null values in the given dataset

In [None]:
##EDA
df.corr()

In [None]:
sns.pairplot(df)

In [None]:
sns.set(rc={'figure.figsize':(10,5)})
sns.heatmap(df.corr(), annot=True)

In [None]:
boston.feature_names

In [None]:
for feature in boston.feature_names:
    plt.scatter(df[feature], df['Price'])
    plt.xlabel(feature)
    plt.ylabel("Price")
    plt.show()

In [None]:
for feature in boston.feature_names:
    sns.regplot(x=feature,y='Price',data=df)
    plt.xlabel(feature)
    plt.ylabel("Price")
    plt.show()

In [None]:
for feature in boston.feature_names:
    sns.boxplot(data=df, y=feature, color='y')
    plt.title(feature)
    plt.show()

In [None]:
df.head()

In [None]:
##Independent and Dependent features
X=df.iloc[:,:-1]
Y=df.iloc[:,-1]

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.33, random_state=10)

In [None]:
X_train.head()

In [None]:
#Standardize the datasets
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [None]:
scaler

In [None]:
X_train=scaler.fit_transform(X_train)

In [None]:
X_test=scaler.transform(X_test)

In [None]:
X_train

In [None]:
X_test

In [None]:
import pickle
pickle.dump(scaler,open('scaling.pkl','wb'))

# Model Training

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
Linear_reg= LinearRegression()

In [None]:
Linear_reg

In [None]:
Linear_reg.fit(X_train, Y_train)

In [None]:
## Print the coefficicent and the intercept
print(Linear_reg.coef_)

In [None]:
print(Linear_reg.intercept_)

In [None]:
#on which parameter the model has been trained
Linear_reg.get_params()

In [None]:
#Prediction for test data
Linear_reg_pred=Linear_reg.predict(X_test)

In [None]:
Linear_reg_pred

In [None]:
##Assumptions
plt.scatter(Y_test,Linear_reg_pred)
plt.xlabel("Test Truth Data")
plt.ylabel("Test Predicted Data")

In [None]:
##Residuals
Linear_residuals=Y_test-Linear_reg_pred

In [None]:
Linear_residuals

In [None]:
sns.displot(Linear_residuals, kind='kde')

In [None]:
## Scatter plot with prediction and residuals
# uniform distribution
plt.scatter(x=Linear_reg_pred, y=Linear_residuals)
plt.xlabel('Prediction')
plt.ylabel('Residuals')

In [None]:
##Performaces matrices

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print('MSE of the linear regression model is {}'.format(mean_squared_error(Y_test,Linear_reg_pred)))
print('MAE of the linear regression model is {}'.format(mean_absolute_error(Y_test,Linear_reg_pred)))
print('RMSE of the linear regression model is {}'.format(np.sqrt(mean_squared_error(Y_test, Linear_reg_pred))))

In [None]:
#R squared and Adjusted R square

In [None]:
from sklearn.metrics import r2_score
Linear_R2_square= r2_score(Y_test, Linear_reg_pred)
print('Linear Regression model has {}% accuracy'.format(round(Linear_R2_square*100,3)))

Linear_Adjusted_R2_square= 1 - (1-Linear_R2_square)*(len(Y_test)-1)/(len(Y_test)-X_test.shape[1]-1)
print('Linear Adjusted R sqaure accuarcy is {}%'.format(round(Linear_Adjusted_R2_square*100,3)))

# New data prediction

In [None]:
boston.data[0]

In [None]:
#Reshaping the dataset
boston.data[0].reshape(1,-1)

In [None]:
##Transforamtion of the dataset
scaler.transform(boston.data[0].reshape(1,-1))

In [None]:
Linear_reg.predict((scaler.transform(boston.data[0].reshape(1,-1))))

# Pickling the model file deployment

In [None]:
import pickle

In [None]:
pickle.dump(Linear_reg, open("regmodel.pkl",'wb'))

In [None]:
pickled_model = pickle.load(open('regmodel.pkl','rb'))

In [None]:
##Prediction
pickled_model.predict((scaler.transform(boston.data[0].reshape(1,-1))))