# Linear Regression Models with no Preprocessing

In [1]:
from data_unpacker import unpack_data
import pandas as pd

all_data = unpack_data('data/train.csv')

# Use two thirds of data to train
targets = all_data.SalePrice
train_data = all_data[0:1094]
train_data = train_data.drop('SalePrice', axis=1)
train_data = train_data.drop('Id', axis=1)
train_targets = targets[0:1094]
test_data = all_data[1095:1459]
test_data = test_data.drop('SalePrice', axis=1)
test_data = test_data.drop('Id', axis=1)
test_targets = targets[1095:1459]

## Linear Regression

Build and predict on a basic linear model

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Create model
linear_regr_model = LinearRegression()

# Train model
linear_regr_model.fit(train_data, train_targets)

# Predict with model
linear_regr_pred = linear_regr_model.predict(test_data)

# Calculate and print mean squared error
linear_regr_error = mean_squared_error(test_targets, linear_regr_pred)
print(f"Mean squared error = {linear_regr_error}")


Mean squared error = 1859607927.330847


## Ridge Regression

Build and predict using a ridge regression model

In [3]:
from sklearn.linear_model import Ridge

def train_ridge_model(alphas, train_data, train_targets, test_data, test_targets) -> [Ridge, float]:
    '''Train a ridge model that performs best given an array of alphas'''
    import sys
    model: Ridge
    lowest_error = sys.float_info.max
    for alpha in alphas:
        test_model = Ridge(alpha=alpha)
        test_model.fit(train_data, train_targets)
        predictions = test_model.predict(test_data)
        error = mean_squared_error(test_targets, predictions)
        if (error < lowest_error):
            lowest_error = error
            model = test_model
    return model, lowest_error


alphas = [0.5, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
ridge_model, ridge_error = train_ridge_model(alphas, train_data, train_targets, test_data, test_targets)

print(f"Alpha chosen: {ridge_model.alpha}")
print(f"Mean squared error: {ridge_error}")

Alpha chosen: 10
Mean squared error: 1608511231.0750628


## Lasso Regression

Build and predict using a lasso model

In [4]:
from sklearn.linear_model import Lasso

def train_lasso_model(alphas, train_data, train_targets, test_data, test_targets) -> [Lasso, float]:
    '''Train a lasso model that performs best given an array of alphas'''
    import sys
    model: Lasso
    lowest_error = sys.float_info.max
    for alpha in alphas:
        test_model = Lasso(alpha=alpha)
        test_model.fit(train_data, train_targets)
        predictions = test_model.predict(test_data)
        error = mean_squared_error(test_targets, predictions)
        if (error < lowest_error):
            lowest_error = error
            model = test_model
    return model, lowest_error


alphas.extend(range(55, 100, 5))
lasso_model, lasso_error = train_lasso_model(alphas, train_data, train_targets, test_data, test_targets)

print(f"Alpha chosen: {lasso_model.alpha}")
print(f"Mean squared error: {lasso_error}")

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Alpha chosen: 60
Mean squared error: 1582802242.0152326


# Linear Regression Models with Normalization

In [5]:
normalized_data = unpack_data('data/train.csv', preprocess=True)

# Use two thirds of data to train
norm_targets = normalized_data.SalePrice
norm_train_data = normalized_data[0:1094]
norm_train_data = norm_train_data.drop('SalePrice', axis=1)
norm_train_data = norm_train_data.drop('Id', axis=1)
norm_train_targets = norm_targets[0:1094]
norm_test_data = normalized_data[1095:1459]
norm_test_data = norm_test_data.drop('SalePrice', axis=1)
norm_test_data = norm_test_data.drop('Id', axis=1)
norm_test_targets = norm_targets[1095:1459]

## Linear Regression

Build and predict on a basic linear model

In [6]:
# Create model
norm_linear_regr_model = LinearRegression()

# Train model
norm_linear_regr_model.fit(norm_train_data, norm_train_targets)

# Predict with model
norm_linear_regr_pred = norm_linear_regr_model.predict(test_data)

# Calculate and print mean squared error
norm_linear_regr_error = mean_squared_error(norm_test_targets, norm_linear_regr_pred)
print(f"Mean squared error = {norm_linear_regr_error}")


Mean squared error = 2269343.288339153


## Ridge Regression

Build and predict using a ridge regression model

In [7]:
norm_ridge_model, norm_ridge_error = train_ridge_model(alphas, norm_train_data, norm_train_targets, norm_test_data, norm_test_targets)

print(f"Alpha chosen: {norm_ridge_model.alpha}")
print(f"Mean squared error: {norm_ridge_error}")

Alpha chosen: 0.5
Mean squared error: 1.5690554966057596e-05


## Lasso Regression

Build and predict using a lasso model

In [8]:
norm_lasso_model, norm_lasso_error = train_lasso_model(alphas, norm_train_data, norm_train_targets, norm_test_data, norm_test_targets)

print(f"Alpha chosen: {norm_lasso_model.alpha}")
print(f"Mean squared error: {norm_lasso_error}")

Alpha chosen: 0.5
Mean squared error: 3.001891143106693e-05
