In [9]:
import tensorflow as tf
import pandas as pd

In [38]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses

In [7]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

Downloading data from http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data


In [8]:
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']

In [11]:
Raw_Dataset = pd.read_csv(dataset_path, names=column_names,na_values = "?", comment='\t',sep=" ", skipinitialspace=True)

In [13]:
dataset = Raw_Dataset.copy()
dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [14]:
dataset.isna().sum() # Calculate the number of missing values
dataset = dataset.dropna() # Drop missing value records
dataset.isna().sum() #

MPG             0
Cylinders       0
Displacement    0
Horsepower      0
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [15]:
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
393,27.0,4,140.0,86.0,2790.0,15.6,82,1.0,0.0,0.0
394,44.0,4,97.0,52.0,2130.0,24.6,82,0.0,1.0,0.0
395,32.0,4,135.0,84.0,2295.0,11.6,82,1.0,0.0,0.0
396,28.0,4,120.0,79.0,2625.0,18.6,82,1.0,0.0,0.0
397,31.0,4,119.0,82.0,2720.0,19.4,82,1.0,0.0,0.0


In [16]:
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [17]:
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

In [19]:
train_stats = train_dataset.describe()

train_stats = train_stats.transpose()

In [21]:
def norm(x): # minus mean and divide by std
    return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [22]:
print(normed_train_data.shape,train_labels.shape)
print(normed_test_data.shape, test_labels.shape)

(314, 9) (314,)
(78, 9) (78,)


In [23]:
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
train_db = train_db.shuffle(100).batch(32)

In [31]:
class Network(keras.Model):
# regression network
 def __init__(self):
    super(Network, self).__init__()
# create 3 fully-connected layers
    self.fc1 = layers.Dense(64, activation='relu')
    self.fc2 = layers.Dense(64, activation='relu')
    self.fc3 = layers.Dense(1)
 def call(self, inputs, training=None, mask=None):
# pass through the 3 layers sequentially
    x = self.fc1(inputs)
    x = self.fc2(x)
    x = self.fc3(x)
    return x

In [34]:
model = Network()
model.build(input_shape=(4, 9))
model.summary()

Model: "network_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               multiple                  640       
                                                                 
 dense_1 (Dense)             multiple                  4160      
                                                                 
 dense_2 (Dense)             multiple                  65        
                                                                 
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________


In [35]:
optimizer = tf.keras.optimizers.RMSprop(0.001)

In [39]:
for epoch in range(200): # 200 Epoch
    for step, (x,y) in enumerate(train_db): # Loop through training set once
# Set gradient tape
        with tf.GradientTape() as tape:
            out = model(x) # Get network output
            loss = tf.reduce_mean(losses.MSE(y, out))
# Calculate MSE
            mae_loss = tf.reduce_mean(losses.MAE(y, out))
# Calculate MAE
        if step % 10 == 0: # Print training loss every 10 steps
            print(epoch, step, float(loss))
# Calculate and update gradients
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

0 0 601.9248657226562
1 0 506.06622314453125
2 0 441.37786865234375
3 0 418.7763366699219
4 0 374.80120849609375
5 0 240.62869262695312
6 0 177.2410888671875
7 0 174.1385498046875
8 0 149.60809326171875
9 0 102.56219482421875
10 0 115.32402038574219
11 0 82.42401885986328
12 0 76.43943786621094
13 0 96.1513671875
14 0 74.91209411621094
15 0 70.36837768554688
16 0 59.97703552246094
17 0 47.130638122558594
18 0 68.62931823730469
19 0 70.22525024414062
20 0 72.8553466796875
21 0 69.34324645996094
22 0 77.90042114257812
23 0 57.565269470214844
24 0 55.68667984008789
25 0 51.88151550292969
26 0 52.62668228149414
27 0 69.77803039550781
28 0 40.263065338134766
29 0 53.548553466796875
30 0 64.06443786621094
31 0 67.45335388183594
32 0 67.226318359375
33 0 62.112022399902344
34 0 77.23236083984375
35 0 74.9248046875
36 0 49.495880126953125
37 0 71.80935668945312
38 0 58.23086166381836
39 0 50.97840881347656
40 0 62.961063385009766
41 0 77.905029296875
42 0 70.5030288696289
43 0 59.5974388122558