# Linear Regression

In [1]:
import pandas as pd
import numpy as np

In [6]:
bmi_life_data = pd.read_csv('bmi_and_life_expectancy.csv', delimiter=',')
bmi_life_data[:5]

Unnamed: 0,Country,Life expectancy,BMI
0,Afghanistan,52.8,20.62058
1,Albania,76.8,26.44657
2,Algeria,75.5,24.5962
3,Andorra,84.6,27.63048
4,Angola,56.7,22.25083


In [27]:
bmi = np.array(bmi_life_data["BMI"].tolist())
print(bmi[:5])
bmi = bmi.reshape(-1, 1)
print(bmi.shape)

[20.62058 26.44657 24.5962  27.63048 22.25083]
(163, 1)


In [28]:
life_expectancy = np.array(bmi_life_data["Life expectancy"])
life_expectancy[:5]

array([52.8, 76.8, 75.5, 84.6, 56.7])

In [29]:
from sklearn.linear_model import LinearRegression
bmi_life_model = LinearRegression()
bmi_life_model.fit(bmi, life_expectancy)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [32]:
laos_life_exp = bmi_life_model.predict([[21.07931]])
laos_life_exp

array([60.31564716])

# Multiple linear regression 

In [33]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

In [34]:
# Load the data from the boston house-prices dataset 
boston_data = load_boston()
x = boston_data['data']
y = boston_data['target']

In [35]:
# Make and fit the linear regression model
# TODO: Fit the model and assign it to the model variable
model = LinearRegression().fit(x, y)

In [36]:
# Make a prediction using the model
sample_house = [[2.29690000e-01, 0.00000000e+00, 1.05900000e+01, 0.00000000e+00, 4.89000000e-01,
                6.32600000e+00, 5.25000000e+01, 4.35490000e+00, 4.00000000e+00, 2.77000000e+02,
                1.86000000e+01, 3.94870000e+02, 1.09700000e+01]]
# TODO: Predict housing price for the sample_house
prediction = model.predict(sample_house)

In [37]:
prediction

array([23.68284712])

# Polynomial regression

In [56]:
# Assign the data to predictor and outcome variables
# TODO: Load the data
train_data = pd.read_csv('polynomial_regression.csv', delimiter=',')
train_data

Unnamed: 0,Var_X,Var_Y
0,-0.33532,6.66854
1,0.0216,3.86398
2,-1.19438,5.16161
3,-0.65046,8.43823
4,-0.28001,5.57201
5,1.93258,-11.1327
6,1.2262,-5.31226
7,0.74727,-4.63725
8,3.32853,3.8065
9,2.87457,-6.06084


In [42]:
X = np.array(train_data["Var_X"]).reshape(-1,1)
X.shape

(20, 1)

In [43]:
y = np.array(train_data["Var_Y"])
y.shape

(20,)

In [50]:
from sklearn.preprocessing import PolynomialFeatures

In [57]:
# Create polynomial features
# TODO: Create a PolynomialFeatures object, then fit and transform the
# predictor feature
poly_feat = PolynomialFeatures(4)
X_poly = poly_feat.fit_transform(X)

In [58]:

# Make and fit the polynomial regression model
# TODO: Create a LinearRegression object and fit it to the polynomial predictor
# features
poly_model = LinearRegression().fit(X_poly, y)

# Regularization

In [65]:
# Assign the data to predictor and outcome variables
# TODO: Load the data
train_data = pd.read_csv('regularization.csv', delimiter=',', header=None)
train_data[:5]

Unnamed: 0,0,1,2,3,4,5,6
0,1.25664,2.04978,-6.2364,4.71926,-4.26931,0.2059,12.31798
1,-3.89012,-0.37511,6.14979,4.94585,-3.57844,0.0064,23.67628
2,5.09784,0.9812,-0.29939,5.85805,0.28297,-0.20626,-1.53459
3,0.39034,-3.06861,-5.63488,6.43941,0.39256,-0.07084,-24.6867
4,5.84727,-0.15922,11.41246,7.52165,1.69886,0.29022,17.54122


In [68]:
X = np.array(train_data.iloc[:, :-1])
X[0]

array([ 1.25664,  2.04978, -6.2364 ,  4.71926, -4.26931,  0.2059 ])

In [70]:
y = np.array(train_data.iloc[:, -1])
y[0]

12.31798

In [72]:
# TODO: Create the linear regression model with lasso regularization.
from sklearn import linear_model
lasso_reg = linear_model.Lasso()

In [73]:
# TODO: Fit the model.
lasso_fit = lasso_reg.fit(X, y)

In [74]:
# TODO: Retrieve and print out the coefficients from the regression model.
reg_coef = lasso_fit.coef_
print(reg_coef)

[ 0.          2.35793224  2.00441646 -0.05511954 -3.92808318  0.        ]


In [76]:
#Checking co-efficients without regularization
linear_reg = LinearRegression()
fit = linear_reg.fit(X, y)
coef = fit.coef_
print(coef)

[-6.19918532e-03  2.96325160e+00  1.98199191e+00 -7.86249920e-02
 -3.95818772e+00  9.30786141e+00]


# Feature scaling

In [80]:
from sklearn.preprocessing import StandardScaler
# TODO: Create the standardization scaling object.
scaler = StandardScaler()

In [81]:
# TODO: Fit the standardization parameters and scale the data.
X_scaled = scaler.fit_transform(X)

In [82]:
# TODO: Create the linear regression model with lasso regularization.
lasso_reg = linear_model.Lasso()
# TODO: Fit the model.
lasso_fit = lasso_reg.fit(X_scaled, y)
# TODO: Retrieve and print out the coefficients from the regression model.
reg_coef = lasso_fit.coef_
print(reg_coef)

[  0.           3.90753617   9.02575748  -0.         -11.78303187
   0.45340137]


In [83]:
#When the data's been scaled, the first coefficient is still regularized to 0, 
#but now it's the fourth coefficient (and not the sixth coefficient) that gets set to 0. 
#You might want to explore descriptive statistics for the original data to see how the standardization changed each column.