In [1]:
import pandas as pd
import numpy as np

# 데이터 불러오기

In [2]:
data = pd.read_csv('../data/auto_mpg.csv')
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


# 난수를 고정 - np.random.seed

In [3]:
np.random.seed(7)

# '?' 제거

In [4]:
data['horsepower'][30:34]

30     90
31     95
32      ?
33    100
Name: horsepower, dtype: object

### '?'는 문자이므로 median() 함수를 바로 쓸 수 없어 먼저 0으로 바꿔 준 후에
### median() 함수를 적용해 준다.

In [5]:
data['horsepower'] = data['horsepower'].replace('?',0)

In [6]:
data['horsepower']= data['horsepower'].replace(0,data['horsepower'].median())

In [7]:
data['horsepower'][30:34]

30     90
31     95
32     92
33    100
Name: horsepower, dtype: object

# 자동차 이름 전처리

In [8]:
from sklearn.preprocessing import LabelEncoder

In [9]:
le = LabelEncoder()
data['car name'] = le.fit_transform(data['car name'])
data['car name']

0       49
1       36
2      231
3       14
4      161
      ... 
393    153
394    301
395    119
396    159
397     69
Name: car name, Length: 398, dtype: int32

In [10]:
data.shape

(398, 9)

# x, y로 데이터 나누기

In [11]:
y = data['mpg']
x = data.drop(['mpg'],axis='columns')

In [12]:
x.shape

(398, 8)

In [13]:
y.shape

(398,)

# x 값 StandardScaler로 표준화!

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()
x = scaler.fit_transform(x)
x

array([[ 1.49819126,  1.0906037 ,  0.67358934, ..., -1.62742629,
        -0.71514478, -1.11374457],
       [ 1.49819126,  1.5035143 ,  1.59026573, ..., -1.62742629,
        -0.71514478, -1.25918548],
       [ 1.49819126,  1.19623199,  1.19740442, ..., -1.62742629,
        -0.71514478,  0.92242821],
       ...,
       [-0.85632057, -0.56103873, -0.53118534, ...,  1.62198339,
        -0.71514478, -0.33060119],
       [-0.85632057, -0.70507731, -0.66213911, ...,  1.62198339,
        -0.71514478,  0.11690931],
       [-0.85632057, -0.71467988, -0.58356685, ...,  1.62198339,
        -0.71514478, -0.88998932]])

# train, test set 나누기

In [16]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    random_state=42,
                                                    test_size=.2)

In [17]:
print(f'x_train: {x_train.shape}\nx_test: {x_test.shape}\n\ny_train: {y_train.shape}\ny_test: {y_test.shape}')

x_train: (318, 8)
x_test: (80, 8)

y_train: (318,)
y_test: (80,)


# Build a CNN model only Dense

In [None]:
from keras.models import Sequential
from keras.layers import Dense 

Using TensorFlow backend.


In [None]:
model = Sequential()

In [None]:
model.add(Dense(12,
                input_dim=x_train.shape[1],
                activation='relu'))

In [None]:
model.add(Dense(8,
                activation='relu'))

In [None]:
model.add(Dense(1,
                activation='relu'))

# Compile the model

In [None]:
model.compile(loss='mean_squared_error',
              optimizer='adam',
              metrics=['accuracy'])

# Fit the model

In [None]:
history = model.fit(x_train,
                    y_train,
                    epochs=60,
                    verbose=1,
                    validation_data=(x_test,y_test))

# Prediction

In [None]:
y_pred = model.predict(x_test)
y_pred

# Evaluation

In [None]:
mod_name = model.metrics_names
mod_name

In [None]:
score = model.evaluate(x_test, y_test)
score

In [None]:
print(f'{mod_name[0]}: {score[0]}\n{mod_name[1]}: {score[1]}')

# Graph

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
fig, ax = plt.subplots(1,2,figsize=(17,6))
ax = ax.ravel()

ax[0].set_title('mean_squared_error Chart')
ax[0].plot(history.history['acc'], label='train')
ax[0].plot(history.history['val_acc'], label='test')
ax[0].legend(loc='best')

ax[1].set_title('Loss Chart')
ax[1].plot(history.history['loss'], label='train')
ax[1].plot(history.history['val_loss'], label='test')
ax[1].legend(loc='best')

plt.show()