In [1]:
import numpy as np
import pandas as pd
import math

# import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
pd.options.display.float_format = '{:.2f}'.format
# pd.options.display.max_rows = None  # 모든 row 내용 print
pd.options.display.max_rows = 20

In [3]:
# Provide the names for the columns since the CSV file with the data does not have a header row.
feature_names = ['symboling', 'normalized-losses', 'make', 'fuel-type',
        'aspiration', 'num-doors', 'body-style', 'drive-wheels',
        'engine-location', 'wheel-base', 'length', 'width', 'height', 'weight',
        'engine-type', 'num-cylinders', 'engine-size', 'fuel-system', 'bore',
        'stroke', 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',
        'highway-mpg', 'price']

In [4]:
# Load in the data from a CSV file that is comma separated.
car_data = pd.read_csv('https://storage.googleapis.com/mledu-datasets/cars_data.csv',
                     sep=',', names=feature_names, header=None, encoding='latin-1')

In [5]:
car_data = car_data.reindex(np.random.permutation(car_data.index))
# or equivalently,
# car_data = car_data.sample(frac=1)

In [6]:
print("Data set loaded. Num examples: ", len(car_data))

Data set loaded. Num examples:  205


In [7]:
car_data.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
182,2,122,volkswagen,diesel,std,two,sedan,fwd,front,97.3,...,97,idi,3.01,3.40,23.0,52,4800,37,46,7775
157,0,91,toyota,gas,std,four,hatchback,fwd,front,95.7,...,98,2bbl,3.19,3.03,9.0,70,4800,30,37,7198
55,3,150,mazda,gas,std,two,hatchback,rwd,front,95.3,...,70,4bbl,?,?,9.4,101,6000,17,23,10945
87,1,125,mitsubishi,gas,turbo,four,sedan,fwd,front,96.3,...,110,spdi,3.17,3.46,7.5,116,5500,23,30,9279
82,3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.9,...,156,spdi,3.58,3.86,7.0,145,5000,19,24,12629


In [8]:
LABEL = 'price'

numeric_feature_names = ['symboling', 'normalized-losses', 'wheel-base',
        'length', 'width', 'height', 'weight', 'engine-size', 'horsepower',
        'peak-rpm', 'city-mpg', 'highway-mpg', 'bore', 'stroke',
         'compression-ratio']

In [9]:
# Run to inspect numeric features.
car_data[numeric_feature_names]

Unnamed: 0,symboling,normalized-losses,wheel-base,length,width,height,weight,engine-size,horsepower,peak-rpm,city-mpg,highway-mpg,bore,stroke,compression-ratio
182,2,122,97.30,171.70,65.50,55.70,2261,97,52,4800,37,46,3.01,3.40,23.00
157,0,91,95.70,166.30,64.40,52.80,2109,98,70,4800,30,37,3.19,3.03,9.00
55,3,150,95.30,169.00,65.70,49.60,2380,70,101,6000,17,23,?,?,9.40
87,1,125,96.30,172.40,65.40,51.60,2403,110,116,5500,23,30,3.17,3.46,7.50
82,3,?,95.90,173.20,66.30,50.20,2833,156,145,5000,19,24,3.58,3.86,7.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,1,168,94.50,168.70,64.00,52.60,2300,98,112,6600,26,29,3.24,3.08,9.40
94,1,128,94.50,165.30,63.80,54.50,1951,97,69,5200,31,37,3.15,3.29,9.40
1,3,?,88.60,168.80,64.10,48.80,2548,130,111,5000,21,27,3.47,2.68,9.00
195,-1,74,104.30,188.80,67.20,57.50,3034,141,114,5400,23,28,3.78,3.15,9.50


In [10]:
car_data[numeric_feature_names].describe()

Unnamed: 0,symboling,wheel-base,length,width,height,weight,engine-size,city-mpg,highway-mpg,compression-ratio
count,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0
mean,0.83,98.76,174.05,65.91,53.72,2555.57,126.91,25.22,30.75,10.14
std,1.25,6.02,12.34,2.15,2.44,520.68,41.64,6.54,6.89,3.97
min,-2.0,86.6,141.1,60.3,47.8,1488.0,61.0,13.0,16.0,7.0
25%,0.0,94.5,166.3,64.1,52.0,2145.0,97.0,19.0,25.0,8.6
50%,1.0,97.0,173.2,65.5,54.1,2414.0,120.0,24.0,30.0,9.0
75%,2.0,102.4,183.1,66.9,55.5,2935.0,141.0,30.0,34.0,9.4
max,3.0,120.9,208.1,72.3,59.8,4066.0,326.0,49.0,54.0,23.0


In [11]:
# Coerce the numeric features to numbers. This is necessary because the model crashes because not all the values are numeric.
for feature_name in numeric_feature_names + [LABEL]:
    car_data[feature_name] = pd.to_numeric(car_data[feature_name], errors ='coerce')

"""
errors: error는 총 3개의 옵션이 존재합니다.
- errors = 'ignore' -> 만약 숫자로 변경할 수 없는 데이터라면 숫자로 변경하지 않고 원본 데이터를 그대로 반환합니다.
- errors = 'coerce' -> 만약 숫자로 변경할 수 없는 데이터라면 기존 데이터를 지우고 NaN으로 설정하여 반환합니다.
- errors = 'raise' -> 만약 숫자로 변경할 수 없는 데이터라면 에러를 일으키며 코드를 중단합니다."""
    
# Fill missing values with 0.
# Is this an OK thing to do? You may want to come back and revisit this decision later.
car_data.fillna(0, inplace=True)

In [12]:
car_data.shape

(205, 26)

In [13]:
X = car_data[numeric_feature_names].copy()
y = car_data['price'].copy()

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2 )

### normalization

In [15]:
car_data_norm = pd.DataFrame()
for feature_name in numeric_feature_names + [LABEL]:
    car_data_norm[feature_name] = (car_data[feature_name]-car_data[feature_name].mean())/car_data[feature_name].std()

In [16]:
car_data_norm.head()

Unnamed: 0,symboling,normalized-losses,wheel-base,length,width,height,weight,engine-size,horsepower,peak-rpm,city-mpg,highway-mpg,bore,stroke,compression-ratio,price
182,0.94,0.42,-0.24,-0.19,-0.19,0.81,-0.57,-0.72,-1.25,-0.4,1.8,2.21,-0.48,0.38,3.24,-0.64
157,-0.67,-0.11,-0.51,-0.63,-0.7,-0.38,-0.86,-0.69,-0.81,-0.4,0.73,0.91,-0.14,-0.29,-0.29,-0.71
55,1.74,0.9,-0.57,-0.41,-0.1,-1.69,-0.34,-1.37,-0.05,1.33,-1.26,-1.13,-6.1,-5.81,-0.19,-0.25
87,0.13,0.47,-0.41,-0.13,-0.24,-0.87,-0.29,-0.41,0.31,0.61,-0.34,-0.11,-0.18,0.49,-0.67,-0.45
82,1.74,-1.67,-0.47,-0.07,0.18,-1.44,0.53,0.7,1.02,-0.11,-0.95,-0.98,0.59,1.22,-0.79,-0.04


In [17]:
X_norm = car_data_norm.copy()
y_norm = X_norm.pop('price')
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X_norm, y_norm, test_size=0.2)

In [18]:
X_train_norm_dict = {name:np.array(value) for name, value in X_train_norm.items()}
y_train_norm_dict = {y_train_norm.name:y_train_norm.values}

In [19]:
feature_columns = []
feature_columns = [
    tf.feature_column.numeric_column(feature_name)
    for feature_name in ['symboling', 'width']
]

In [20]:
len(feature_columns)

2

In [21]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [22]:
X_train_dict = {name:np.array(value) for name, value in X_train.items()}

In [23]:
print(feature_layer(X_train_norm_dict))

tf.Tensor(
[[-0.669832   -0.23671637]
 [ 0.13318297 -0.7961038 ]
 [-1.472847    0.2760554 ]
 [ 0.13318297  2.5602207 ]
 [ 1.7392129  -0.42317885]
 [-1.472847    2.7000675 ]
 [-1.472847    2.0474489 ]
 [ 0.13318297 -0.98256624]
 [ 0.13318297  0.4625179 ]
 [ 0.13318297 -0.98256624]
 [ 0.93619794 -0.19010076]
 [ 1.7392129  -0.8427194 ]
 [ 0.93619794 -0.93595064]
 [ 0.13318297 -1.0757974 ]
 [-0.669832    0.2760554 ]
 [-0.669832   -0.32994762]
 [ 1.7392129   0.2760554 ]
 [-0.669832   -0.93595064]
 [ 1.7392129   0.18282418]
 [ 0.93619794  0.2294398 ]
 [ 1.7392129   1.1151365 ]
 [ 0.13318297 -0.98256624]
 [ 0.13318297  0.2760554 ]
 [-0.669832    0.2760554 ]
 [-1.472847   -0.6096413 ]
 [-0.669832   -0.7028725 ]
 [-0.669832    2.0474489 ]
 [-1.472847    0.2760554 ]
 [-0.669832    0.2760554 ]
 [ 0.13318297 -0.7961038 ]
 [-2.275862    0.6023647 ]
 [ 0.13318297 -0.98256624]
 [ 0.93619794  0.18282418]
 [ 0.93619794 -0.19010076]
 [ 0.13318297 -0.98256624]
 [ 1.7392129   0.2760554 ]
 [ 1.7392129  -0.

In [24]:
feature_columns = []
feature_columns = [
    tf.feature_column.numeric_column(feature_name)
    for feature_name in numeric_feature_names
]
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

### convert dataframe to dict
when using feature_layer, <span style='color:red'> use dict </span> for input data in model.fit

In [25]:
X_train_norm_dict = {name:np.array(value) for name, value in X_train_norm.items()}
y_train_norm_dict = {y_train_norm.name:y_train_norm.values}

In [26]:
print(feature_layer(X_train_norm_dict))

tf.Tensor(
[[ 0.66362983 -0.18640871 -0.28764477 ... -0.32758275 -0.29170555
  -0.23671637]
 [-0.43862268  0.8835773  -0.28764477 ... -1.1726311  -0.939355
  -0.7961038 ]
 [ 0.00975122 -0.9506844  -0.23729281 ...  1.1435698   0.9537743
   0.2760554 ]
 ...
 [ 0.51417184 -0.6449741  -0.21211682 ...  0.38878787  0.0570288
   0.2760554 ]
 [-0.2518002  -1.4092499  -0.79116434 ...  0.95535445  0.12345439
   0.92867404]
 [ 0.66362983 -1.2563947  -0.66528445 ...  1.1550932   0.9205615
   0.6023647 ]], shape=(164, 15), dtype=float32)


In [27]:
model = tf.keras.models.Sequential()
model.add(feature_layer)
model.add(tf.keras.layers.Dense(units=3, input_shape=(1,), activation='relu'))
# model.add(tf.keras.layers.Dense(units=1, activation='relu')) 
#                         feature_layer일 경우 input_dim은 무조건 1:  바로 앞의 layer 수이므로 항상 1
model.add(tf.keras.layers.Dense(units=1))
model.compile(tf.keras.optimizers.Adam(learning_rate=0.01),
             loss='mean_squared_error',
             metrics=[tf.keras.metrics.MeanSquaredError()])

In [28]:
# X_train_norm_dict = {name:np.array(value) for name, value in car_data_norm.items()}
# # Caution: dict 만들때 주의
# y_train_norm_dict = np.array(X_train_norm_dict.pop('price'))

###  feature column사용시 <span style='color:red'> .fit 입력 주의 </span>
 feature column 사용시는 <span style='color:red'>  y value는 array만 사용</span>. x value는 dict 사용

In [29]:
batch_size=16
epochs=500
model.fit(X_train_norm_dict, y_train_norm_dict.values(), epochs=epochs, batch_size= batch_size)
# feature column 사용시는 y value는 array만 사용. x value는 dict 사용

Epoch 1/500
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500


Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500


Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 

Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 

Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 

Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 

Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 

Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<tensorflow.python.keras.callbacks.History at 0x1ded33f60a0>

In [30]:
X_test_norm_dict = {name:np.array(value) for name, value in X_test_norm.items()}
y_test_norm_dict = {y_test_norm.name:y_test_norm.values}

In [31]:
model.evaluate(X_test_norm_dict, y_test_norm_dict.values())

Consider rewriting this model with the Functional API.


[0.10722622275352478, 0.10722622275352478]