# Processing Boston Dataset

In [1]:
import numpy as np
import sklearn
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

# Import the boston dataset from sklearn
boston_data = load_boston()

#  Create X and Y variables - X holding the .data and Y holding .target 
X = boston_data.data
y = boston_data.target

#  Reshape Y to be a rank 2 matrix 
y = y.reshape(X.shape[0], 1)

# Proprocesing by adding a column of 1's to the front of X
one_col = np.ones((X.shape[0],1))
X = np.hstack((one_col, X))

# Create a PolynomialFeatures object with degree = 2. 
# Transform X and save it into X_2. Simply copy Y into Y_2 
# Note: PolynomialFeatures creates a column of ones as the first feature
poly = PolynomialFeatures(degree=2)
X_2 = poly.fit_transform(X)
y_2 = y

X_train,X_test,y_train,y_test=train_test_split(X_2, y_2, test_size=0.3,random_state=0)

# Linear Regression (Boston)

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr = LinearRegression().fit(X_train, y_train)

Linear_predict = lr.predict(X_test)

Linear_train_score = lr.score(X_train,y_train)
Linear_test_score = lr.score(X_test, y_test)

print("linear regression train score:\t", Linear_train_score)
print("linear regression test score:\t", Linear_test_score)
print("linear MSE test:", mean_squared_error(Linear_predict, y_test))

linear regression train score:	 0.9512182010883049
linear regression test score:	 0.65203541351031
linear MSE test: 28.973445848667456


# Lasso Regression (Boston)

In [3]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

lasso = Lasso(alpha=100, max_iter=10e5)
lasso.fit(X_train,y_train)

Lasso_predict = lasso.predict(X_test)

Lasso_train_score=lasso.score(X_train,y_train)
Lasso_test_score=lasso.score(X_test,y_test)

print("\u03BB = 100")
print("lasso regression train score:\t", Lasso_train_score)
print("lasso regression test score:\t", Lasso_test_score)
print("lasso MSE test:", mean_squared_error(Lasso_predict, y_test))

λ = 100
lasso regression train score:	 0.8103743709618847
lasso regression test score:	 0.695779447011784
lasso MSE test: 25.33107695520305


# Processing Digits Dataset

In [4]:
import numpy as np
import sklearn
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

digits_data = load_digits()

X = digits_data.data
y = digits_data.target

X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.3,random_state=0)

# Linear Regression (Digits)

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr = LinearRegression().fit(X_train, y_train)

Linear_predict = lr.predict(X_test)

Linear_train_score = lr.score(X_train,y_train)
Linear_test_score = lr.score(X_test, y_test)

print("linear regression train score:\t", Linear_train_score)
print("linear regression test score:\t", Linear_test_score)
print("linear MSE test:", mean_squared_error(Linear_predict, y_test))

linear regression train score:	 0.5979800626243363
linear regression test score:	 0.5740460038277273
linear MSE test: 3.452475459530763


# Lasso Regression (Digits)

In [6]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

lasso = Lasso(alpha=.01, max_iter=1000)
lasso.fit(X_train,y_train)

Lasso_predict = lasso.predict(X_test)

Lasso_train_score=lasso.score(X_train,y_train)
Lasso_test_score=lasso.score(X_test,y_test)

print("\u03BB = .01")
print("lasso regression train score:\t", Lasso_train_score)
print("lasso regression test score:\t", Lasso_test_score)
print("lasso MSE test:", mean_squared_error(Lasso_predict, y_test))

λ = .01
lasso regression train score:	 0.595761068107747
lasso regression test score:	 0.5753167560957702
lasso MSE test: 3.4421756594115345
