# Elastic Net Regression

# Implementation


In [1]:
import warnings
warnings.filterwarnings('ignore')

# Install necessary libraries

In [2]:
# ! pip install pandas plotly scikit-learn seaborn

In [3]:
import pandas as pd
import seaborn as sb
import plotly.express as px
from sklearn.datasets import load_boston

from sklearn.model_selection import train_test_split

# Import regressors

In [4]:
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

In [5]:
def get_regressor(regressor_name='SVR'):
    reg_dict = {
        'SVR':SVR,
        'decision_tree_regressor': DecisionTreeRegressor,
        'gradient_boost_regressor':GradientBoostingRegressor,
        'random_forest_regressor': RandomForestRegressor,
        'xbg_regressor': XGBRegressor
    }
    try:
        return reg_dict[regressor_name]
    except:
        print("Please select from: ",list(reg_dict.keys()))

In [6]:
import matplotlib.pyplot as plt
%matplotlib inline

# Loading Dataset
Loading boston dataset from the default Scikit Learn dataset

In [7]:
dataset = load_boston()

`dataset` is a dictionary object. Let's take a look at what keys are available.

In [8]:
dataset.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])

Let's extract the features(X) and target variables(Y) and convert it into the pandas dataframe.

In [9]:
X = pd.DataFrame(
    dataset['data'],
    columns=dataset['feature_names']
    )

Y = pd.DataFrame(
    dataset['target'],
    columns = ['Y']
)

In [10]:
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [11]:
Y.head()

Unnamed: 0,Y
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


Let's convert the data load process into a function

In [12]:
def load_dataset()->(pd.DataFrame, pd.DataFrame):
    dataset = load_boston()
    X = pd.DataFrame(
    dataset['data'],
    columns=dataset['feature_names']
    )

    Y = pd.DataFrame(
        dataset['target'],
        columns = ['Y']
    )
    return X,Y

In [13]:
X, Y = load_dataset()

# Splitting dataset into training and test sets

In [14]:
 X_train, X_test, y_train, y_test = train_test_split(
     X,
     Y, 
     test_size=0.33, 
     random_state=42
 )

# Creating a simple class for Linear Regressor. 

In [19]:
class Regressor:
    def __init__(self,regressor, x:pd.DataFrame, y:pd.DataFrame):
        self.x = x
        self.y = y
        self.regressor = regressor()
        
    def train(self):
        self.regressor.fit(self.x, self.y)
        
    def predict(self, x:pd.DataFrame):
        try:
            return self.regressor.predict(x)
        except Exception as e:
            print("The regression model has not been fitted. The regressor will be trained on the provided data.")
            self.train()
        
        return self.regressor.predict(x)

In [25]:
regressor = 'xbg_regressor'

In [26]:
regressor = Regressor(get_regressor(regressor),X_train,y_train)

In [27]:
prediction = regressor.predict(X_test)

The regression model has not been fitted. The regressor will be trained on the provided data.


# Visualizing the result

Plot using Plotly

In [28]:
fig = px.line( 
    y=[prediction.reshape(167), 
    y_test.values.reshape(167)],
    title = "Testing Set vs Prediction Set",\
    

)
def custom_legend_name(new_names):
    for i, new_name in enumerate(new_names):
        fig.data[i].name = new_name

fig.update_layout(
    yaxis_title="Housing Price",
    xaxis_visible=False, 
    xaxis_showticklabels=False
)
custom_legend_name(['Prediction','Ground Truth'])

In [29]:
fig.show()