In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
dataset_path = tf.keras.utils.get_file("/content/auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data") 

In [3]:
column_name = ['MPG','Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_name, na_values='?', comment=
                        '\t', sep = ' ', skipinitialspace=True)

In [4]:
raw_dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [5]:
raw_dataset.isna().sum()

MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [6]:
raw_dataset = raw_dataset.dropna()

In [7]:
raw_dataset.isna().sum()

MPG             0
Cylinders       0
Displacement    0
Horsepower      0
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [8]:
dataset = raw_dataset.copy()

In [9]:
# train, test 데이터로 분리 sample

train_data = dataset.sample(frac=0.8)
test_data = dataset.drop(train_data.index)

In [10]:
train_stats = train_data.describe()

In [11]:
train_stats.pop('MPG')

count    314.000000
mean      23.647134
std        7.921637
min       10.000000
25%       17.500000
50%       23.000000
75%       29.000000
max       46.600000
Name: MPG, dtype: float64

In [12]:
train_stats = train_stats.transpose()

In [13]:
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Cylinders,314.0,5.43949,1.728681,3.0,4.0,4.0,8.0,8.0
Displacement,314.0,192.066879,106.027388,68.0,98.0,140.5,293.25,455.0
Horsepower,314.0,103.94586,38.737425,46.0,75.0,92.0,129.0,230.0
Weight,314.0,2958.324841,857.099837,1613.0,2219.25,2750.0,3596.25,5140.0
Acceleration,314.0,15.505096,2.781116,8.0,13.5,15.5,17.075,24.8
Model Year,314.0,76.047771,3.601687,70.0,73.0,76.0,79.0,82.0
Origin,314.0,1.589172,0.807321,1.0,1.0,1.0,2.0,3.0


In [14]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std'] 

In [15]:
train_label = train_data.pop('MPG')
test_label = test_data.pop('MPG')

train_data = norm(train_data)
test_data = norm(test_data)



In [16]:
# 모델 Dense

model = models.Sequential([
  layers.Dense(64, activation='relu', input_dim=7),
  layers.Dense(32, activation='relu'),
  layers.Dense(1)
])

In [17]:
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [18]:
history = model.fit(x=train_data, y=train_label, batch_size=16, epochs=100, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78