In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses

In [3]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

In [4]:
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']

In [5]:
Raw_Dataset = pd.read_csv(dataset_path, names=column_names,na_values = "?", comment='\t',sep=" ", skipinitialspace=True)

In [6]:
dataset = Raw_Dataset.copy()
dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [7]:
dataset.isna().sum() # Calculate the number of missing values
dataset = dataset.dropna() # Drop missing value records
dataset.isna().sum() #

MPG             0
Cylinders       0
Displacement    0
Horsepower      0
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [8]:
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
393,27.0,4,140.0,86.0,2790.0,15.6,82,1.0,0.0,0.0
394,44.0,4,97.0,52.0,2130.0,24.6,82,0.0,1.0,0.0
395,32.0,4,135.0,84.0,2295.0,11.6,82,1.0,0.0,0.0
396,28.0,4,120.0,79.0,2625.0,18.6,82,1.0,0.0,0.0
397,31.0,4,119.0,82.0,2720.0,19.4,82,1.0,0.0,0.0


In [9]:
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [10]:
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

In [11]:
train_stats = train_dataset.describe()

train_stats = train_stats.transpose()

In [12]:
def norm(x): # minus mean and divide by std
    return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [13]:
print(normed_train_data.shape,train_labels.shape)
print(normed_test_data.shape, test_labels.shape)

(314, 9) (314,)
(78, 9) (78,)


In [14]:
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
train_db = train_db.shuffle(100).batch(32)

In [15]:
class Network(keras.Model):
# regression network
 def __init__(self):
    super(Network, self).__init__()
# create 3 fully-connected layers
    self.fc1 = layers.Dense(64, activation='relu')
    self.fc2 = layers.Dense(64, activation='relu')
    self.fc3 = layers.Dense(1)
 def call(self, inputs, training=None, mask=None):
# pass through the 3 layers sequentially
    x = self.fc1(inputs)
    x = self.fc2(x)
    x = self.fc3(x)
    return x

In [16]:
model = Network()
model.build(input_shape=(4, 9))
model.summary()

Model: "network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               multiple                  640       
                                                                 
 dense_1 (Dense)             multiple                  4160      
                                                                 
 dense_2 (Dense)             multiple                  65        
                                                                 
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________


In [17]:
optimizer = tf.keras.optimizers.RMSprop(0.001)

In [18]:
for epoch in range(200): # 200 Epoch
    for step, (x,y) in enumerate(train_db): # Loop through training set once
# Set gradient tape
        with tf.GradientTape() as tape:
            out = model(x) # Get network output
            loss = tf.reduce_mean(losses.MSE(y, out))
# Calculate MSE
            mae_loss = tf.reduce_mean(losses.MAE(y, out))
# Calculate MAE
        if step % 10 == 0: # Print training loss every 10 steps
            print(epoch, step, float(loss))
# Calculate and update gradients
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

0 0 607.0946044921875
1 0 547.747314453125
2 0 403.5117492675781
3 0 350.0345458984375
4 0 246.92588806152344
5 0 231.3605194091797
6 0 173.29437255859375
7 0 108.79281616210938
8 0 150.3138885498047
9 0 114.7806396484375
10 0 126.94346618652344
11 0 82.41068267822266
12 0 83.20850372314453
13 0 80.25990295410156
14 0 75.65640258789062
15 0 86.33999633789062
16 0 71.1882095336914
17 0 65.57893371582031
18 0 81.96446990966797
19 0 74.68051147460938
20 0 46.821083068847656
21 0 52.5870361328125
22 0 89.47389221191406
23 0 39.160858154296875
24 0 64.99615478515625
25 0 84.43963623046875
26 0 64.30469512939453
27 0 77.60485076904297
28 0 72.38444519042969
29 0 59.090911865234375
30 0 58.07189178466797
31 0 69.54924011230469
32 0 67.54106140136719
33 0 51.78081512451172
34 0 79.47235107421875
35 0 38.74066925048828
36 0 68.00235748291016
37 0 34.98283386230469
38 0 59.65888977050781
39 0 54.67547607421875
40 0 79.70903015136719
41 0 73.78872680664062
42 0 47.46501159667969
43 0 52.913047790