# Chapter 08 - Multivariate Linear Regression

* Hypotesis:
    
    
    $y = b_0 + b_1 * x_1 + b_2 * x_2 + ...$

In [14]:
from random import seed
from Codes.ch01_load_and_convert_data import load_csv, str_column_to_float
from Codes.ch02_scale_data_functions import dataset_minmax, normalize_dataset
from Codes.ch03_resampling_methods import cross_validation_split
from Codes.ch06_algorithm_test_harnesses import evaluate_algorithm_kfold_reg

In [1]:
# Make predictions with coefficients
def predict(row, coefficients):
    yhat = coefficients[0]
    for i in range(len(row)-1):
        yhat += coefficients[i+1] * row[i]
    return yhat

In [2]:
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
coef = [0.4, 0.8]
for row in dataset:
    yhat = predict(row, coef)
    print("Expected=%.3f, Predicted=%.3f" % (row[-1], yhat))

Expected=1.000, Predicted=1.200
Expected=3.000, Predicted=2.000
Expected=3.000, Predicted=3.600
Expected=2.000, Predicted=2.800
Expected=5.000, Predicted=4.400


### Estimating Coefficients

Requires two parameters:

* Learning Rate: Used to limit the amount that each coefficient is corrected each time it is updated. 
* Epochs: The number of times to run through the training data while updating the coefficients.

Calculating these parameters:

* $b_0(t + 1) = b_0(t) - Learning Rate * error(t)$
* $b_n(t + 1) = b_n(t) - Learning Rate * error(t) * x_n(t)$

In [9]:
# Estimate linear regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
    coef = [0.0 for i in range(len(train[0]))]
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            yhat = predict(row, coef)
            error = yhat - row[-1]
            sum_error += error**2
            coef[0] = coef[0] - l_rate * error
            for i in range(len(row)-1):
                coef[i+1] = coef[i+1] - l_rate * error * row[i]
        print('>epoch=%d, lrate=%.3f, error=%.3f'% (epoch, l_rate, sum_error))
    return coef

In [11]:
# Calculate coefficients
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
l_rate = 0.001
n_epoch = 50
coef = coefficients_sgd(dataset, l_rate, n_epoch)
print(coef)

>epoch=0, lrate=0.001, error=46.236
>epoch=1, lrate=0.001, error=41.305
>epoch=2, lrate=0.001, error=36.930
>epoch=3, lrate=0.001, error=33.047
>epoch=4, lrate=0.001, error=29.601
>epoch=5, lrate=0.001, error=26.543
>epoch=6, lrate=0.001, error=23.830
>epoch=7, lrate=0.001, error=21.422
>epoch=8, lrate=0.001, error=19.285
>epoch=9, lrate=0.001, error=17.389
>epoch=10, lrate=0.001, error=15.706
>epoch=11, lrate=0.001, error=14.213
>epoch=12, lrate=0.001, error=12.888
>epoch=13, lrate=0.001, error=11.712
>epoch=14, lrate=0.001, error=10.668
>epoch=15, lrate=0.001, error=9.742
>epoch=16, lrate=0.001, error=8.921
>epoch=17, lrate=0.001, error=8.191
>epoch=18, lrate=0.001, error=7.544
>epoch=19, lrate=0.001, error=6.970
>epoch=20, lrate=0.001, error=6.461
>epoch=21, lrate=0.001, error=6.009
>epoch=22, lrate=0.001, error=5.607
>epoch=23, lrate=0.001, error=5.251
>epoch=24, lrate=0.001, error=4.935
>epoch=25, lrate=0.001, error=4.655
>epoch=26, lrate=0.001, error=4.406
>epoch=27, lrate=0.001,

### Wine Quality Case Study

In [13]:
# Linear Regression Algorithm with Stochastic Gradient Descent
def linear_regression_sgd(train, test, l_rate, n_epoch):
    predictions = list()
    coef = coefficients_sgd(train, l_rate, n_epoch)
    for row in test:
        yhat = predict(row, coef)
        predictions.append(yhat)
    return (predictions)

In [15]:
# Linear Regression on wine quality dataset
seed(1)

# Load and prepare data
filename = './data/winequality-white.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])):
    str_column_to_float(dataset, i)

# Normalize
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

# Evaluate algorithm
n_folds = 5
l_rate = 0.01
n_epoch = 50
scores = evaluate_algorithm_kfold_reg(dataset, linear_regression_sgd, n_folds, l_rate, n_epoch)
print('Scores: %s' % scores)
print('Mean RMSE: %.3f' % (sum(scores)/float(len(scores))))

>epoch=0, lrate=0.010, error=79.062
>epoch=1, lrate=0.010, error=67.512
>epoch=2, lrate=0.010, error=65.919
>epoch=3, lrate=0.010, error=65.037
>epoch=4, lrate=0.010, error=64.511
>epoch=5, lrate=0.010, error=64.185
>epoch=6, lrate=0.010, error=63.975
>epoch=7, lrate=0.010, error=63.836
>epoch=8, lrate=0.010, error=63.742
>epoch=9, lrate=0.010, error=63.676
>epoch=10, lrate=0.010, error=63.628
>epoch=11, lrate=0.010, error=63.593
>epoch=12, lrate=0.010, error=63.566
>epoch=13, lrate=0.010, error=63.544
>epoch=14, lrate=0.010, error=63.527
>epoch=15, lrate=0.010, error=63.512
>epoch=16, lrate=0.010, error=63.500
>epoch=17, lrate=0.010, error=63.489
>epoch=18, lrate=0.010, error=63.479
>epoch=19, lrate=0.010, error=63.469
>epoch=20, lrate=0.010, error=63.461
>epoch=21, lrate=0.010, error=63.452
>epoch=22, lrate=0.010, error=63.444
>epoch=23, lrate=0.010, error=63.437
>epoch=24, lrate=0.010, error=63.430
>epoch=25, lrate=0.010, error=63.423
>epoch=26, lrate=0.010, error=63.416
>epoch=27, 

## Future Works

* Tune The Example. Tune the learning rate, number of epochs and even the data
preparation method to get an improved score on the Wine Quality dataset.
* Batch Stochastic Gradient Descent. Change the stochastic gradient descent algorithm
to accumulate updates across each epoch and only update the coefficients in a batch at
the end of the epoch.
* Additional Regression Problems. Apply the technique to other regression problems
on the UCI machine learning repository.