# Regression example with a neural network


This example is taken from: https://www.tensorflow.org/tutorials/keras/basic_regression

# Exercises
First run the script to the end.

 * Read through the script and try to understand the code (see the documentation at)
 * Do you think that 200 epochs is enough or too much
 * Try modifying the layers in the neural network to reduce the loss further. (Or just remove some layers to see what happens)



In [None]:
from __future__ import absolute_import, division, print_function

import pathlib

import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)


## IO section
Get the data set and clean it up
The data is downloaded as a comma seperated list. It is read using the pandas module:
https://pandas.pydata.org/

The data set came from the UCI machine learning archive: https://archive.ics.uci.edu/ml/datasets/auto+mpg

The data concerns city-cycle fuel consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5 continuous attributes." (Quinlan, 1993)

In [None]:

dataset_path = keras.utils.get_file("auto-mpg.data", "httpsarchive.ics.uci.autom")


column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin'] 
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

dataset = raw_dataset.copy()
print(dataset.tail() )



## Clean the data up

In [None]:
print(dataset.isna().sum())
dataset = dataset.dropna()

In [None]:

origin = dataset.pop('Origin')

dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)


Normalize the data

In [None]:

train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
print(train_stats)

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')


def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)


# Define the Neural Network

In [None]:

def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
    layers.Dense(64, activation=tf.nn.relu),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error'])
  return model


model = build_model()

In [None]:
print(model.summary())

##  Train the neural network

In [None]:
EPOCHS = 200

#history = model.fit(normed_train_data, train_labels,epochs=EPOCHS, validation_split = 0.2, verbose=0)

history = model.fit(normed_train_data, train_labels,epochs=EPOCHS)



In [None]:
print(history.history.keys())


# Plot the results of training

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['mean_absolute_error'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.ylim([0,20])

plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# See the final error

In [None]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)

print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))


# Another way to check the model

In [None]:
test_predictions = model.predict(normed_test_data).flatten()

plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
