In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print(tf.__version__)

In [None]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)

In [None]:
dataset = raw_dataset.copy()
dataset = dataset.dropna()
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')

dataset.head()

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')

In [None]:
train_dataset.describe().transpose()

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')

## Data Preprocessing

### Normalisation

In [None]:
normalizer = preprocessing.Normalization()
normalizer.adapt(np.array(train_features))

print(normalizer.mean.numpy())

In [None]:
first = np.array(train_features[:1])

with np.printoptions(precision=2, suppress=True):
    print('First example:', first)
    print()
    print('Normalized:', normalizer(first).numpy())

In [None]:
horsepower = np.array(train_features['Horsepower']).reshape(-1,1)

horsepower_normalizer = preprocessing.Normalization(input_shape=[1,])
horsepower_normalizer.adapt(horsepower)


horsepower_model = tf.keras.Sequential([
    horsepower_normalizer,
    layers.Dense(units=1)
])

horsepower_model.summary()

In [None]:
horsepower_model.predict(horsepower[:10])

In [None]:
horsepower_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = horsepower_model.fit(
    train_features['Horsepower'], train_labels,
    epochs=100,
    # suppress logging
    verbose=0,
    # Calculate validation results on 20% of the training data
    validation_split = 0.2)

In [None]:
pd.DataFrame(history.history)

In [None]:
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])

linear_model.predict(np.array(train_features[:10]))

In [None]:
linear_model.layers[1].kernel

In [None]:
linear_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = linear_model.fit(
    train_features, train_labels, 
    epochs=100,
    # suppress logging
    verbose=0,
    # Calculate validation results on 20% of the training data
    validation_split = 0.2)

In [None]:
test_results = {}

test_results['linear_model'] = linear_model.evaluate(
    test_features, test_labels, verbose=0)

In [None]:
test_results

In [None]:
def build_and_compile_model(norm):
    model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
    ])

    model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
    return model

In [None]:
dnn_horsepower_model = build_and_compile_model(horsepower_normalizer)
dnn_horsepower_model.summary()

In [None]:
%%time
history = dnn_horsepower_model.fit(
    train_features['Horsepower'], train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
def plot_horsepower(x, y):
    plt.scatter(train_features['Horsepower'], train_labels, label='Data')
    plt.plot(x, y, color='k', label='Predictions')
    plt.xlabel('Horsepower')
    plt.ylabel('MPG')
    plt.legend()

x = tf.linspace(0.0, 250, 251)
y = dnn_horsepower_model.predict(x)

plot_horsepower(x, y)

In [None]:
normalizer = preprocessing.Normalization(input_shape=[9,])
normalizer.adapt(np.array(train_features))

dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features, train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.legend()
  plt.grid(True)

plot_loss(history)

In [None]:
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)

In [None]:
dnn_model.save('dnn_model')

## Trial with 950s

In [None]:
import pandas as pd
ML_input_data = pd.read_csv('./ML_input_data.csv')

retail_950s = ML_input_data[(ML_input_data['Forecast Sales Model Number'] == "950") &
                            (ML_input_data['Sales Channel'] == "Retail") &
                            (ML_input_data['Global Market'] == "AMN")] \
                            .reset_index() \
                            [['Final Usage Hours', 'Age of Machine', 'Relative Price Change']]

In [None]:
retail_950s

In [None]:
retail_950_usage = np.array(retail_950s['Final Usage Hours']).reshape(-1,1)

retail_950_usage_normalizer = preprocessing.Normalization(input_shape=[1,])
retail_950_usage_normalizer.adapt(np.array(retail_950_usage))

In [None]:
def build_and_compile_model(norm):
    model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
    ])

    model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
    return model

In [None]:
dnn_950_retail_usage_model = build_and_compile_model(retail_950_usage_normalizer)
dnn_950_retail_usage_model.summary()

In [None]:
%%time
history = dnn_950_retail_usage_model.fit(
    retail_950s['Final Usage Hours'], retail_950s['Relative Price Change'],
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
def plot_usage(x, y):
    plt.scatter(retail_950s['Final Usage Hours'], retail_950s['Relative Price Change'], label='Data')
    plt.plot(x, y, color='k', label='Predictions')
    plt.xlabel('Usage (Hours)')
    plt.ylabel('Relative Price Change')
    plt.legend()

x = tf.linspace(0.0, 70000, 200)
y = dnn_950_retail_usage_model.predict(x)

plot_usage(x, y)

In [None]:
retail_950_features = np.array(retail_950s[['Final Usage Hours', 'Age of Machine']])

retail_950_normalizer = preprocessing.Normalization(input_shape=[2])
retail_950_normalizer.adapt(retail_950_features)

retail_950_dnn_model = build_and_compile_model(retail_950_normalizer)
retail_950_dnn_model.summary()

In [None]:
%%time
history = retail_950_dnn_model.fit(
    retail_950_features, retail_950s['Relative Price Change'],
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
retail_950_dnn_model.predict(np.array(retail_950s[['Final Usage Hours', 'Age of Machine']]))

In [None]:
test_pred = np.array([1000,1])

In [None]:
test_pred.shape

In [None]:
np.array(retail_950s[['Final Usage Hours', 'Age of Machine']])[0]

In [None]:
np.arra