# 기반 준비

## 모듈 임포팅

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense

## 데이터 다운로드

In [29]:
!wget https://github.com/dhrim/keras_howto_2021/raw/master/common/iris.csv

--2021-07-25 10:41:39--  https://github.com/dhrim/keras_howto_2021/raw/master/common/iris.csv
Resolving github.com (github.com)... 140.82.112.4
Connecting to github.com (github.com)|140.82.112.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/dhrim/keras_howto_2021/master/common/iris.csv [following]
--2021-07-25 10:41:39--  https://raw.githubusercontent.com/dhrim/keras_howto_2021/master/common/iris.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2218 (2.2K) [text/plain]
Saving to: ‘iris.csv.1’


2021-07-25 10:41:40 (34.1 MB/s) - ‘iris.csv.1’ saved [2218/2218]



In [30]:
!head iris.csv

septal_length,septal_width,petal_length,petal_width,class
6.4,2.8,5.6,2.2,2
5.0,2.3,3.3,1.0,1
4.9,2.5,4.5,1.7,2
4.9,3.1,1.5,0.1,0
5.7,3.8,1.7,0.3,0
4.4,3.2,1.3,0.2,0
5.4,3.4,1.5,0.4,0
6.9,3.1,5.1,2.3,2
6.7,3.1,4.4,1.4,1


## 데이터 로딩

In [31]:
iris = pd.read_csv("iris.csv")
iris.head()

Unnamed: 0,septal_length,septal_width,petal_length,petal_width,class
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [32]:
data = iris.to_numpy()
print(data.shape)
print(data[:5])

(120, 5)
[[6.4 2.8 5.6 2.2 2. ]
 [5.  2.3 3.3 1.  1. ]
 [4.9 2.5 4.5 1.7 2. ]
 [4.9 3.1 1.5 0.1 0. ]
 [5.7 3.8 1.7 0.3 0. ]]


## x, y 분리 전에 섞기

In [33]:
np.random.shuffle(data)
print(data[:5])

[[5.8 2.7 5.1 1.9 2. ]
 [7.7 2.6 6.9 2.3 2. ]
 [6.8 3.  5.5 2.  1. ]
 [5.7 2.8 4.1 1.3 1. ]
 [6.8 3.2 5.9 2.3 2. ]]


## x, y로 분리

In [6]:
x = data[:,:4]
y = data[:,4:]

In [7]:
print(x.shape)
print(y.shape)

(120, 4)
(120, 1)


In [8]:
print(x[:5])
print(y[:5])

[[6.4 2.8 5.6 2.2]
 [5.  2.3 3.3 1. ]
 [4.9 2.5 4.5 1.7]
 [4.9 3.1 1.5 0.1]
 [5.7 3.8 1.7 0.3]]
[[2.]
 [1.]
 [2.]
 [0.]
 [0.]]


## x, y 쌍을 섞기

In [37]:
indexes = np.arange(x.shape[0])
np.random.shuffle(indexes)

x = x[indexes]
y = y[indexes]

[0 1 2 3 4 5 6 7 8 9]
[118  68  13   6  77  71   1 110  41  23]


# 데이터 분리

## 수작업으로 데이터 분리

train:valid:test를 8:1:1로 쪼갬

우선 8:2로 먼저 쪼개고, 나머지 2를 1:1:로 쪼갬

In [9]:
# train과 remain로 쪼개기
split_index = int(len(x)*0.8)

train_x, remain_x = x[:split_index], x[split_index:]
train_y, remain_y = y[:split_index], y[split_index:]


# reamin을 validation과 test로 쪼개기
split_index = int(len(remain_x)*0.5)
print(split_index)

valid_x, test_x = remain_x[:split_index], remain_x[split_index:]
valid_y, test_y = remain_y[:split_index], remain_y[split_index:]

12


In [10]:
print(train_x.shape)
print(valid_x.shape)
print(test_x.shape)

(96, 4)
(12, 4)
(12, 4)


## scikit의 train_test_split()으로 분리

In [11]:
from sklearn.model_selection import train_test_split

train_x, remain_x, train_y, remain_y = train_test_split(x, y, test_size=0.2, shuffle=True)

valid_x, test_x, valid_y, test_y = train_test_split(remain_x, remain_y, test_size=0.5, shuffle=True)


In [12]:
print(train_x.shape)
print(valid_x.shape)
print(test_x.shape)

(96, 4)
(12, 4)
(12, 4)


# validation 데이터 적용

## fit()의 validation_data 사용

In [13]:
from sklearn.model_selection import train_test_split

train_x, remain_x, train_y, remain_y = train_test_split(x, y, test_size=0.2, shuffle=True)

valid_x, test_x, valid_y, test_y = train_test_split(remain_x, remain_y, test_size=0.5, shuffle=True)


In [14]:
model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

In [15]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
model.fit(train_x, train_y, epochs=100, validation_data=(valid_x, valid_y) )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f0e7617ed90>

In [17]:
loss, acc = model.evaluate(test_x, test_y)

print("loss :", loss)
print("acc :", acc)


loss : 0.3896573781967163
acc : 1.0


## fit()의 validation_split 사용


In [23]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, shuffle=True)


In [24]:
model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

In [25]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
model.fit(train_x, train_y, epochs=100, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f0e64510c10>

In [27]:
loss, acc = model.evaluate(test_x, test_y)

print("loss :", loss)
print("acc :", acc)


loss : 0.5014492869377136
acc : 0.9166666865348816
