# Keras regression

Example how to use Keras for solving regression task and exploring difference between model configurations.

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
import numpy as np
import matplotlib.pyplot as plt

## Downloading data

Data are downloaded from URL bellow. Pandas can load data locally or download it from internet.

https://archive.ics.uci.edu/ml/datasets/auto+mpg

In [None]:
# url for downloading the data
URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
# column names for dataset, column names are not included in csv file, so we need to specify them separately.
COLUMN_NAMES = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']

dataset = pd.read_csv(
    # URL data source
    URL, 
    # set column names in dataset
    names=COLUMN_NAMES,
    # there are some unknown values in data marked by '?', we will use None value instead of them
    na_values='?', 
    # values in csv are separated by space
    sep=' ',
    # ignore values starting with tab - we don't need to know vehicle names in this case
    comment='\t',  
    # skip spaces after separator
    skipinitialspace=True)

Display downloaded data.

In [None]:
dataset

## Cleaning the data

Show how many unknown values are in the data.

In [None]:
dataset.isna().sum()

Delete records with unknwon data.

In [None]:
dataset = dataset.dropna()
display(dataset.isna().sum())

## Standartization

$$ {X - \mu \over \sigma} $$

Where:
* $ \mu $ is mean 
* $ \sigma $ is standard deviation 

In [None]:
def standardize(data):
    # data could be pandas.Series or whole DataFrames
    return (data-data.mean())/data.std()

## Linear regression with one feature

First we will use neural network with only one fully connected unit.

Separate data to train and test sets by assigning 80% of data to train.

In [None]:
train_dataset = dataset.sample(frac = 0.8, random_state = 42)
test_dataset = dataset.drop(train_dataset.index)

Extracting labels.

In [None]:
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

Standardize input feature - horsepower.

In [None]:
train_features = standardize(train_dataset['Horsepower'])
test_features = standardize(test_dataset['Horsepower'])

In [None]:
train_features

### Setting up the model

I will use only one layer model with one unit and without activation function. This is basically linear regression model:
$$ y = wx + b $$

In [None]:
# set up Input layer - input data will have one feature
input_layer = Input(shape=(1,))
# add layer with 1 unit and without activation function
output_layer = Dense(1)(input_layer)
# create model
horsepower_model = Model(inputs = input_layer, outputs = output_layer)

Summary of the model.

In [None]:
horsepower_model.summary()

### Prediction with untrained model

Function for plotting results.

In [None]:
def plot_horsepower(x, y):
    # scatter plot with train features on X and labels on Y axis
    plt.scatter(train_features, train_labels, label='Data')
    # plot the linear regression plane
    plt.plot(x, y, color='k', label='Predictions')
    # setup the labels
    plt.xlabel('Horsepower')
    plt.ylabel('MPG')
    plt.legend()

Plot the results.

In [None]:
# create 250 item array from minimum train value to maximum to plot the regression plane
x = np.linspace(train_features.min(), train_features.max(), 250)
# run prediction on untrained model with random weights
y = horsepower_model.predict(x)

plot_horsepower(x, y)

### Model training

Setting up model for training.

In [None]:
horsepower_model.compile(
    # using Adam as an optimizer
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    # mean absolute error loss function - less sensitive to outliers than MSE
    loss='mean_absolute_error')

In [None]:
%%time
history = horsepower_model.fit(
    # train data
    train_features,
    # labels
    train_labels,
    # epoch count
    epochs=100,
    # log level
    verbose=1,
    # 20% of train data take as validation set 
    validation_split = 0.2)

Plotting loss function

In [None]:
def plot_loss(history):
    # plot loss function on training set
    plt.plot(history.history['loss'], label='loss')
    # plot loss function on validation set
    plt.plot(history.history['val_loss'], label='val_loss')
    # set Y axis limit
    plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error [MPG]')
    plt.legend()
    plt.grid(True)

In [None]:
plot_loss(history)

### Evaluating results

In [None]:
x = np.linspace(train_features.min(), train_features.max(), 250)
y = horsepower_model.predict(x)

plot_horsepower(x, y)

Evaluating model on test data.

In [None]:
error_on_test_data = horsepower_model.evaluate(test_features, test_labels, verbose=0)
print(error_on_test_data)

In [None]:
test_results = {}
test_results['horsepower_model'] = error_on_test_data

## Linear regression with multiple features

Now we can try if model gets better using more features.

In [None]:
train_dataset.shape

In [None]:
# we are using all features we have, not just horsepower like before
train_features = standardize(train_dataset)
test_features = standardize(test_dataset)

### Setting up the model

Only difference is in number of input features - 7 instead of 1.

In [None]:
# now we have 7 input features instead of one!
input_layer = Input(shape=(7,))
# add layer with 1 unit and without activation function
output_layer = Dense(1)(input_layer)
# create model
linear_model = Model(inputs = input_layer, outputs = output_layer)

In [None]:
linear_model.summary()

### Model training

In [None]:
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = linear_model.fit(
    train_features,
    train_labels,
    epochs=100,
    verbose=1,
    validation_split = 0.2)

In [None]:
plot_loss(history)

### Evaluating results

In [None]:
error_on_test_data = linear_model.evaluate(test_features, test_labels, verbose=0)
print(error_on_test_data)

In [None]:
test_results['linear_model'] = error_on_test_data

## Deep neural network with only single input feature

What if we use deep non-linear model for only single input feature.

In [None]:
train_features = standardize(train_dataset['Horsepower'])
test_features = standardize(test_dataset['Horsepower'])

### Setting up the model

Using more complicated architecture - with hidden layers, multiple units and ReLu activation functions.

In [None]:
# we are using only horse power again
input_layer = Input(shape=(1,))
# adding relu as an activation function for hidden layers
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
# output layer for the regression is always single unit Dense layer without activation function
output_layer = Dense(1)(x)
dnn_horsepower_model = Model(inputs = input_layer, outputs = output_layer)

In [None]:
dnn_horsepower_model.summary()

### Model training

In [None]:
dnn_horsepower_model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())

In [None]:
%%time
history = dnn_horsepower_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=1, 
    epochs=100)

In [None]:
plot_loss(history)

### Evaluating results

In [None]:
error_on_test_data = dnn_horsepower_model.evaluate(test_features, test_labels, verbose=0)
print(error_on_test_data)

In [None]:
test_results['dnn_horsepower_model'] = error_on_test_data

Plotting the prediction line.

In [None]:
x = np.linspace(train_features.min(), train_features.max(), 250)
y = dnn_horsepower_model.predict(x)

plot_horsepower(x, y)

## Deep neural network with multiple input features

Last but not least - deep neural network with all input features we can use.

In [None]:
train_features = standardize(train_dataset)
test_features = standardize(test_dataset)

### Setting up the model

In [None]:
input_layer = Input(shape=(7,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
output_layer = Dense(1)(x)
dnn_model = Model(inputs = input_layer, outputs = output_layer)

In [None]:
dnn_model.summary()

### Model training

In [None]:
dnn_model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=1, 
    epochs=100)

In [None]:
plot_loss(history)

### Evaluating results

In [None]:
error_on_test_data = dnn_model.evaluate(test_features, test_labels, verbose=0)
print(error_on_test_data)

In [None]:
test_results['dnn_model'] = error_on_test_data

## Total result

Summary of our models performance.

In [None]:
# load result dictionary to pandas DataFrame and transpose it for nice result table.
pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T