In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Normalization
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import display

In [None]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\t', sep=' ', skipinitialspace=True)
dataset = raw_dataset.copy()
dataset = dataset.dropna()
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')

dataset.tail()

train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')
test_results = {}

normalizer = Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
horsepower = np.array(train_features['Horsepower'])
horsepower_normalizer = Normalization(input_shape=[1,], axis=None)
horsepower_normalizer.adapt(horsepower)

def plot_loss(history, subplot_rows, subplot_columns, subplot_place): 
    plt.subplot(subplot_rows, subplot_columns, subplot_place)
    plt.title("Training history")
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('Error [MPG]')
    plt.ylim([0, 10])
    plt.legend()
    plt.grid(True)

def plot_horsepower_predictions(x, y, subplot_rows, subplot_columns, subplot_place):
    plt.subplot(subplot_rows, subplot_columns, subplot_place)
    plt.title("Predictions over real data")
    plt.scatter(train_features['Horsepower'], train_labels, label='Data')
    plt.plot(x, y, color='k', label='Predictions')
    plt.xlabel('Horsepower')
    plt.ylabel('MPG')
    plt.legend()
    
def plot_predictions(test_predictions, test_labels, subplot_rows, subplot_columns, subplot_place):
    plt.subplot(subplot_rows, subplot_columns, subplot_place)
    plt.title("Model predictions vs. real test labels")
    plt.scatter(test_labels, test_predictions)
    plt.xlabel('True Values [MPG]')
    plt.ylabel('Predictions [MPG]')
    lims = [0, 50]
    plt.xlim(lims)
    plt.ylim(lims)
    plt.plot(lims, lims, color='lightblue')

def error_distribution(test_predictions, test_labels, subplot_rows, subplot_columns, subplot_place):
    error = test_predictions - test_labels
    
    plt.subplot(subplot_rows, subplot_columns, subplot_place)
    plt.title("Error distribution")
    plt.hist(error, bins=25)
    plt.xlabel('Prediction Error [MPG]')
    plt.ylabel('Count')

display(train_features)

The user should edit this function:

In [1]:
def build_and_compile_model(norm):
  model = Sequential([
      norm,
      Dense(64, activation='relu'),
      Dense(64, activation='relu'),
      Dense(1)
  ])

  learning_rate = 0.001

  model.compile(loss='mean_absolute_error',
                optimizer=Adam(learning_rate))
  return model

Training only on horsepower:

In [None]:
dnn_horsepower_model = build_and_compile_model(horsepower_normalizer)
dnn_horsepower_model.summary()

print("The model is training, please wait...")
history = dnn_horsepower_model.fit(
    train_features['Horsepower'],
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)
print("Finished! Printing results");

x = tf.linspace(0.0, 250, 251)
y = dnn_horsepower_model.predict(x)

test_results = {}
test_results['dnn_horsepower_model'] = dnn_horsepower_model.evaluate(test_features['Horsepower'], test_labels, verbose=0)
display(pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T)

fig = plt.figure()
fig.set_figheight(4)
fig.set_figwidth(15)

plot_loss(history, 1, 3, 1)
test_predictions = dnn_horsepower_model.predict(test_features['Horsepower']).flatten()
plot_horsepower_predictions(x, y, 1, 3, 2)
error_distribution(test_predictions, test_labels, 1, 3, 3)

plt.show()

Training on all features:

In [None]:
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

print("The model is training, please wait...")
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)
print("Finished! Printing results");

test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)
display(pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T)

fig = plt.figure()
fig.set_figheight(4)
fig.set_figwidth(15)

plot_loss(history, 1, 3, 1)
test_predictions = dnn_model.predict(test_features).flatten()
plot_predictions(test_predictions, test_labels, 1, 3, 2)
error_distribution(test_predictions, test_labels, 1, 3, 3)

plt.show()

This is some filler text. It should get removed soon.