In [173]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
import numpy as np

In [174]:
## 데이터 로딩
from urllib.request import Request, urlopen, urlretrieve
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
FILE = 'mpg_data'
ret = urlretrieve(URL, FILE)

In [175]:
# re(정규표현식) => \s
df=pd.read_csv(URL, sep='\s+', header=None)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       398 non-null    float64
 1   1       398 non-null    int64  
 2   2       398 non-null    float64
 3   3       398 non-null    object 
 4   4       398 non-null    float64
 5   5       398 non-null    float64
 6   6       398 non-null    int64  
 7   7       398 non-null    int64  
 8   8       398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [176]:
# 일부 데이터 확인
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [177]:
# 컬럼명 변경
df.rename(columns={0:'mpg', 1:'cylinders', 2:'displacement',
                   3:'horsepower', 4:'weight', 5:'acceleration',
                   6:'model year', 7:'origin', 8:'car name'}, inplace=True)

In [178]:
# 차이름 피처 제거
df.drop('car name', axis = 1, inplace=True)
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.00,2790.0,15.6,82,1
394,44.0,4,97.0,52.00,2130.0,24.6,82,2
395,32.0,4,135.0,84.00,2295.0,11.6,82,1
396,28.0,4,120.0,79.00,2625.0,18.6,82,1


In [180]:
# 상관관계 파악
# df.corr()

In [181]:
# 컬럼 : object -> float6
# '?'가 포함되어 있어서 안됨 => 결측치로써 대체해주어야
# df = df.replace('?', np.nan)
# 여기서는 '?'를 0으로 바꿔줌
df = df.replace('?', 0)

# 해당 컬럼 : object -> float64
df['horsepower'] = pd.to_numeric(df['horsepower'])
# df['horsepower'] = df['horsepower'].astype(int)

In [182]:
# 0으로 바꾼 결측치를 평균치로 대체
dfMean = df['horsepower'].mean()
df['horsepower'] = df['horsepower'].replace(0, dfMean)

In [183]:
df['horsepower'].unique()

array([130.        , 165.        , 150.        , 140.        ,
       198.        , 220.        , 215.        , 225.        ,
       190.        , 170.        , 160.        ,  95.        ,
        97.        ,  85.        ,  88.        ,  46.        ,
        87.        ,  90.        , 113.        , 200.        ,
       210.        , 193.        , 102.89447236, 100.        ,
       105.        , 175.        , 153.        , 180.        ,
       110.        ,  72.        ,  86.        ,  70.        ,
        76.        ,  65.        ,  69.        ,  60.        ,
        80.        ,  54.        , 208.        , 155.        ,
       112.        ,  92.        , 145.        , 137.        ,
       158.        , 167.        ,  94.        , 107.        ,
       230.        ,  49.        ,  75.        ,  91.        ,
       122.        ,  67.        ,  83.        ,  78.        ,
        52.        ,  61.        ,  93.        , 148.        ,
       129.        ,  96.        ,  71.        ,  98.  

In [184]:
df['mpg'].dtypes

dtype('float64')

In [185]:
# 해당 컬럼들 unique()값을 확인하여,
# 혹시나 수치형으로 나타난 범주 데이터 확인 (확실한 컬럼 제외 가능)
colnames = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin']
for n in colnames:
    print('{} : {}\n'.format(n, df[n].dtypes), df[n].unique(),"\n")
    
# 해당 결과로 수치형으로 나타난 범주형 피처가
# - 'model year'와 'origin'이라는 것을 알 수 있음


mpg : float64
 [18.  15.  16.  17.  14.  24.  22.  21.  27.  26.  25.  10.  11.   9.
 28.  19.  12.  13.  23.  30.  31.  35.  20.  29.  32.  33.  17.5 15.5
 14.5 22.5 24.5 18.5 29.5 26.5 16.5 31.5 36.  25.5 33.5 20.5 30.5 21.5
 43.1 36.1 32.8 39.4 19.9 19.4 20.2 19.2 25.1 20.6 20.8 18.6 18.1 17.7
 27.5 27.2 30.9 21.1 23.2 23.8 23.9 20.3 21.6 16.2 19.8 22.3 17.6 18.2
 16.9 31.9 34.1 35.7 27.4 25.4 34.2 34.5 31.8 37.3 28.4 28.8 26.8 41.5
 38.1 32.1 37.2 26.4 24.3 19.1 34.3 29.8 31.3 37.  32.2 46.6 27.9 40.8
 44.3 43.4 36.4 44.6 40.9 33.8 32.7 23.7 23.6 32.4 26.6 25.8 23.5 39.1
 39.  35.1 32.3 37.7 34.7 34.4 29.9 33.7 32.9 31.6 28.1 30.7 24.2 22.4
 34.  38.  44. ] 

cylinders : int64
 [8 4 6 3 5] 

displacement : float64
 [307.  350.  318.  304.  302.  429.  454.  440.  455.  390.  383.  340.
 400.  113.  198.  199.  200.   97.  110.  107.  104.  121.  360.  140.
  98.  232.  225.  250.  351.  258.  122.  116.   79.   88.   71.   72.
  91.   97.5  70.  120.   96.  108.  155.   68.  114.  

In [186]:
# orgin 피처가 수치로 나타낸 범주형으로 판단
df['origin'] = df['origin'].astype(object)
df = pd.get_dummies(df)

In [187]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    float64
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin_1      398 non-null    uint8  
 8   origin_2      398 non-null    uint8  
 9   origin_3      398 non-null    uint8  
dtypes: float64(5), int64(2), uint8(3)
memory usage: 23.1 KB


In [188]:
# 데이터 분리
X = df.drop('mpg', axis=1)
y = df['mpg']
print('X.shape: {},  y.shape: {}'.format(X.shape, y.shape))

X.shape: (398, 9),  y.shape: (398,)


In [189]:
# 스케일링 (원-핫-인코딩을 한 origin피처 가 0과 1로 되어 있으니, MinMaxScaler로 스케일)
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
minmax.fit(X)
X_scaled = minmax.transform(X)

In [None]:
# # 스케일링
# from sklearn.preprocessing import StandardScaler

# std = StandardScaler()
# std.fit(X_train)
# X_train_scaled = std.transform(X_train)
# X_test_scaled = std.transform(X_test)

In [190]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y)
# , random_state =10

In [191]:
import warnings
warnings.filterwarnings(action='ignore')   # 경고무시

# 필터 타입에 해당하는 sklearn에 존재하는 모든 모델 이름과 객체 리스트로 반환
from sklearn.utils import all_estimators
models = all_estimators(type_filter = 'regressor')

# 각 모델들 훈련시키고 정확도 추출
scores = []
for name, model in models:
    try:
        # 모델 객체 생성
        md = model()
        # 학습
        md.fit(X_train, y_train)
        # 평가
        result = md.score(X_test, y_test)
    
        scores.append((name, np.round(result,3)))
    except:
        pass

In [192]:
scores

[('ARDRegression', 0.805),
 ('AdaBoostRegressor', 0.83),
 ('BaggingRegressor', 0.857),
 ('BayesianRidge', 0.801),
 ('CCA', 0.792),
 ('DecisionTreeRegressor', 0.768),
 ('DummyRegressor', -0.004),
 ('ElasticNet', 0.392),
 ('ElasticNetCV', 0.797),
 ('ExtraTreeRegressor', 0.74),
 ('ExtraTreesRegressor', 0.875),
 ('GammaRegressor', 0.38),
 ('GaussianProcessRegressor', -30.032),
 ('GradientBoostingRegressor', 0.85),
 ('HistGradientBoostingRegressor', 0.835),
 ('HuberRegressor', 0.796),
 ('KNeighborsRegressor', 0.857),
 ('KernelRidge', 0.769),
 ('Lars', 0.801),
 ('LarsCV', 0.755),
 ('Lasso', 0.506),
 ('LassoCV', 0.802),
 ('LassoLars', -0.004),
 ('LassoLarsCV', 0.803),
 ('LassoLarsIC', 0.801),
 ('LinearRegression', 0.801),
 ('LinearSVR', 0.74),
 ('MLPRegressor', 0.118),
 ('NuSVR', 0.79),
 ('OrthogonalMatchingPursuit', 0.728),
 ('OrthogonalMatchingPursuitCV', 0.802),
 ('PLSCanonical', -1.274),
 ('PLSRegression', 0.754),
 ('PassiveAggressiveRegressor', 0.713),
 ('PoissonRegressor', 0.803),
 ('Qu

### [1] 모델 구성

In [194]:
model = Sequential(name='AutoMPG')
# Auto MPG => 공백은 '\'로 들어가기 때문에 주의

In [195]:
l1 = Dense(300, activation='relu', input_shape=(9,))
# 들어가는 학습값 (학습은 X값이니까)
# X인 data의 피처개수는 9개니까 input_shape에는 9가 배열로 들어가야함

In [196]:
l2 = Dense(100, activation='relu')

In [197]:
l3 = Dense(1)
# 회귀, 1개의 값만 출력해야 하니까
# , activation='softmax'

In [198]:
model.add(l1)
model.add(l2)
model.add(l3)

In [199]:
## 모델 구성 확인 => summary()
model.summary()

Model: "AutoMPG"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_27 (Dense)            (None, 300)               3000      
                                                                 
 dense_28 (Dense)            (None, 100)               30100     
                                                                 
 dense_29 (Dense)            (None, 1)                 101       
                                                                 
Total params: 33,201
Trainable params: 33,201
Non-trainable params: 0
_________________________________________________________________


### [2] 모델 생성

In [201]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
# 타겟에 대한 내용 sp~ 다중~

### [3] 모델 학습

In [202]:
model.fit(X_train, y_train, epochs=500)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500

Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 

Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 

Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 

Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 

Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x2258216a430>

### [4] 평가

In [203]:
model.evaluate(X_test, y_test)



[6.170392036437988, 0.0]

### [5]  예측

In [204]:
model.predict([[8, 318.0, 165.0, 3436.0, 12.5, 74, 1,0,0]])



array([[-291.90356]], dtype=float32)

In [205]:
y_proba = model.predict(X_test)
y_proba.round(2)



array([[38.06],
       [25.78],
       [13.56],
       [13.66],
       [21.43],
       [21.4 ],
       [21.15],
       [14.2 ],
       [15.7 ],
       [24.22],
       [32.13],
       [27.  ],
       [30.85],
       [15.82],
       [27.73],
       [29.95],
       [15.38],
       [32.26],
       [19.48],
       [31.27],
       [22.44],
       [24.76],
       [28.71],
       [22.85],
       [31.74],
       [23.31],
       [23.09],
       [31.69],
       [20.23],
       [34.58],
       [18.76],
       [26.38],
       [14.88],
       [18.54],
       [26.77],
       [25.86],
       [18.03],
       [18.14],
       [12.89],
       [19.74],
       [19.14],
       [39.74],
       [21.77],
       [10.92],
       [28.43],
       [25.38],
       [36.32],
       [19.63],
       [18.64],
       [13.69],
       [34.31],
       [32.65],
       [34.38],
       [25.33],
       [15.24],
       [13.2 ],
       [29.59],
       [27.51],
       [14.16],
       [22.56],
       [12.78],
       [15.81],
       [