# 기반 준비

## 모듈 임포팅

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense, Flatten

## 속성 데이터 준비

### 데이터 다운로드

In [2]:
!wget https://github.com/dhrim/keras_howto_2021/raw/master/common/iris.csv

--2021-07-25 12:57:29--  https://github.com/dhrim/keras_howto_2021/raw/master/common/iris.csv
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/dhrim/keras_howto_2021/master/common/iris.csv [following]
--2021-07-25 12:57:29--  https://raw.githubusercontent.com/dhrim/keras_howto_2021/master/common/iris.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2218 (2.2K) [text/plain]
Saving to: ‘iris.csv.5’


2021-07-25 12:57:29 (37.2 MB/s) - ‘iris.csv.5’ saved [2218/2218]



In [3]:
!head iris.csv

septal_length,septal_width,petal_length,petal_width,class
6.4,2.8,5.6,2.2,2
5.0,2.3,3.3,1.0,1
4.9,2.5,4.5,1.7,2
4.9,3.1,1.5,0.1,0
5.7,3.8,1.7,0.3,0
4.4,3.2,1.3,0.2,0
5.4,3.4,1.5,0.4,0
6.9,3.1,5.1,2.3,2
6.7,3.1,4.4,1.4,1


### 데이터 로딩

In [4]:
iris = pd.read_csv("iris.csv")
iris.head()

Unnamed: 0,septal_length,septal_width,petal_length,petal_width,class
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [5]:
iris_data = iris.to_numpy()
print(iris_data.shape)
print(iris_data[:5])

(120, 5)
[[6.4 2.8 5.6 2.2 2. ]
 [5.  2.3 3.3 1.  1. ]
 [4.9 2.5 4.5 1.7 2. ]
 [4.9 3.1 1.5 0.1 0. ]
 [5.7 3.8 1.7 0.3 0. ]]


### x, y 분리 전에 섞기

In [6]:
np.random.shuffle(iris_data)
print(iris_data[:5])

[[4.6 3.6 1.  0.2 0. ]
 [6.5 3.2 5.1 2.  2. ]
 [4.8 3.4 1.6 0.2 0. ]
 [5.  3.5 1.6 0.6 0. ]
 [6.5 3.  5.8 2.2 2. ]]


### x, y로 분리

In [7]:
iris_raw_x = iris_data[:,:4]
iris_raw_y = iris_data[:,4:]

In [8]:
print(iris_raw_x.shape)
print(iris_raw_y.shape)

(120, 4)
(120, 1)


In [9]:
print(iris_raw_x[:5])
print(iris_raw_y[:5])

[[4.6 3.6 1.  0.2]
 [6.5 3.2 5.1 2. ]
 [4.8 3.4 1.6 0.2]
 [5.  3.5 1.6 0.6]
 [6.5 3.  5.8 2.2]]
[[0.]
 [2.]
 [0.]
 [0.]
 [2.]]


# 속성 데이터

## 아무것도 적용 하지 않은

In [25]:
x = iris_raw_x
y = iris_raw_y

In [26]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, shuffle=True)


In [27]:
model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_x, train_y, epochs=10, validation_split=0.2)

loss, acc = model.evaluate(test_x, test_y)

print("loss :", loss)
print("acc :", acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
loss : 1.159281849861145
acc : 0.1666666716337204


## Normalization 적용

각 속성 별로 개별 데이터 이다. 각 컬럼 별로 해주어야 한다.

In [28]:
df_x = pd.DataFrame(iris_raw_x)
df_x_scaled = (df_x - df_x.min()) / (df_x.max() - df_x.min())
x = df_x_scaled.to_numpy()

y = iris_raw_y

In [29]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, shuffle=True)


In [30]:
model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_x, train_y, epochs=10, validation_split=0.2)

loss, acc = model.evaluate(test_x, test_y)

print("loss :", loss)
print("acc :", acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
loss : 1.0720347166061401
acc : 0.5833333134651184


## Standardization 적용

column 별로 해주기 위해 pandas를 사용한다.


In [31]:
df_x = pd.DataFrame(iris_raw_x)
df_x_scaled = (df_x - df_x.mean()) / (df_x.std())
x = df_x_scaled.to_numpy()

y = iris_raw_y

In [32]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, shuffle=True)


In [33]:
model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


model.fit(train_x, train_y, epochs=10, validation_split=0.2)

loss, acc = model.evaluate(test_x, test_y)

print("loss :", loss)
print("acc :", acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
loss : 0.9685394167900085
acc : 0.5


# 영상 데이터

## 아무것도 적용 하지 않은

In [19]:
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()


In [20]:
model = keras.Sequential()
model.add(Flatten(input_shape=(28, 28)))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

model.fit(train_x, train_y, epochs=5, verbose=1, batch_size=128)

loss, acc = model.evaluate(test_x, test_y)
print("loss=",loss)
print("acc=",acc)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                7850      
_________________________________________________________________
dense_10 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_11 (Dense)             (None, 10)                110       
Total params: 8,070
Trainable params: 8,070
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss= 1.8452115058898926
acc= 0.29660001397132874


## Normalization 적용

0 ~ 1 사이의 값으로 변경한다.

In [21]:
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()

all_x = np.vstack([train_x, test_x])
max = np.max(all_x)
min = np.min(all_x)

train_x = (train_x - min) / (max - min)
test_x = (test_x - min) / (max - min)

In [22]:
model = keras.Sequential()
model.add(Flatten(input_shape=(28, 28)))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

model.fit(train_x, train_y, epochs=5, verbose=1, batch_size=128)

loss, acc = model.evaluate(test_x, test_y)
print("loss=",loss)
print("acc=",acc)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                7850      
_________________________________________________________________
dense_13 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_14 (Dense)             (None, 10)                110       
Total params: 8,070
Trainable params: 8,070
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss= 0.25403323769569397
acc= 0.9277999997138977


## Standardization 적용

In [23]:
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()

all_x = np.vstack([train_x, test_x])
max = np.max(all_x)
min = np.min(all_x)

train_x = (train_x - min) / (max - min)
test_x = (test_x - min) / (max - min)

In [24]:
model = keras.Sequential()
model.add(Flatten(input_shape=(28, 28)))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

model.fit(train_x, train_y, epochs=5, verbose=1, batch_size=128)

loss, acc = model.evaluate(test_x, test_y)
print("loss=",loss)
print("acc=",acc)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                7850      
_________________________________________________________________
dense_16 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_17 (Dense)             (None, 10)                110       
Total params: 8,070
Trainable params: 8,070
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss= 0.27409493923187256
acc= 0.9212999939918518
