# 선형회귀 (자동차 연비 예측)
---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# !gdown https://raw.githubusercontent.com/devdio/datasets/main/auto-mpg.csv

In [None]:
mpg = pd.read_csv('/content/auto-mpg.csv', na_values=['?'])
mpg.shape

(398, 9)

In [None]:
mpg.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

In [None]:
mpg.info

<bound method DataFrame.info of       mpg  cylinders  displacement  horsepower  weight  acceleration  \
0    18.0          8         307.0       130.0    3504          12.0   
1    15.0          8         350.0       165.0    3693          11.5   
2    18.0          8         318.0       150.0    3436          11.0   
3    16.0          8         304.0       150.0    3433          12.0   
4    17.0          8         302.0       140.0    3449          10.5   
..    ...        ...           ...         ...     ...           ...   
393  27.0          4         140.0        86.0    2790          15.6   
394  44.0          4          97.0        52.0    2130          24.6   
395  32.0          4         135.0        84.0    2295          11.6   
396  28.0          4         120.0        79.0    2625          18.6   
397  31.0          4         119.0        82.0    2720          19.4   

     model year  origin                   car name  
0            70       1  chevrolet chevelle malibu

In [None]:
mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,1,ford torino


In [None]:
mpg['horsepower'].unique()

array([130., 165., 150., 140., 198., 220., 215., 225., 190., 170., 160.,
        95.,  97.,  85.,  88.,  46.,  87.,  90., 113., 200., 210., 193.,
        nan, 100., 105., 175., 153., 180., 110.,  72.,  86.,  70.,  76.,
        65.,  69.,  60.,  80.,  54., 208., 155., 112.,  92., 145., 137.,
       158., 167.,  94., 107., 230.,  49.,  75.,  91., 122.,  67.,  83.,
        78.,  52.,  61.,  93., 148., 129.,  96.,  71.,  98., 115.,  53.,
        81.,  79., 120., 152., 102., 108.,  68.,  58., 149.,  89.,  63.,
        48.,  66., 139., 103., 125., 133., 138., 135., 142.,  77.,  62.,
       132.,  84.,  64.,  74., 116.,  82.])

In [None]:
mpg['origin'].value_counts()

1    249
3     79
2     70
Name: origin, dtype: int64

In [None]:
mpg.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name'],
      dtype='object')

In [None]:
x = mpg.drop(['mpg', 'origin', 'car name'], axis=1)

In [None]:
y = mpg['mpg']

In [None]:
# 테스트 셋 나누기

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2022)

print(x_train.shape, x_test.shape)

(318, 6) (80, 6)


In [None]:
# 정규화
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_s = scaler.fit_transform(x_train)
y_train = y_train.values

# 모델 만들기

In [None]:
model = keras.Sequential([
    layers.Dense(64,activation = 'relu', input_shape=(6,)),
    layers.Dense(32,activation = 'relu'),
    layers.Dense(8)

    

    # 64는 unit의 개수이다.
    # activation은 활성화 함수이다. 시그모이드랑 relu 등등이있음
])

In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 64)                448       
                                                                 
 dense_14 (Dense)            (None, 32)                2080      
                                                                 
 dense_15 (Dense)            (None, 8)                 264       
                                                                 
Total params: 2,792
Trainable params: 2,792
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mse', 'mae']
)

In [None]:
# 학습

EPOCHS = 200
BATCH_SIZE = 32

model.fit(x_train, y_train,
          epochs = EPOCHS,
          batch_size = BATCH_SIZE,
          validation_split=0.2,
          verbose = 1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f9d787cc4c0>

In [None]:
model.evaluate(x_train, y_train)



[nan, nan, nan]

In [None]:
def plot_history(history.history):
  hist = pd.DataFrame(history,history)
  hist['epoch']= history.epochshist.head()

  plt.plot(hist['epoch'],hist['mse'], label='Train MSE')
  plt.plot(hist['epoch'],hist['mse'], label='Val MSE')

SyntaxError: ignored

In [None]:
# 로그 확인
histroy.history.keys()

In [None]:
plt.history(history)

In [None]:
## 예측

x_test_s = scaler.transform(x_test)
y_test = y_test.values

In [None]:
y_pred = model.predict(x_test_s)
y_pred

In [None]:
y_test.shape

In [None]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)