# 정확도 높이기
- digits dataset
- random seed 2021
- test_size=0.2

In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf 
seed = 2021 
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
from sklearn.datasets import load_digits
digits = load_digits()

In [3]:
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [4]:
df.target.value_counts().sort_index()

0    178
1    182
2    177
3    183
4    181
5    182
6    181
7    179
8    174
9    180
Name: target, dtype: int64

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(digits.data)

In [6]:
from tensorflow.keras.utils import to_categorical
y_onehot = to_categorical(digits.target)
y_onehot[:10]

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
    X_scaled, y_onehot, stratify=y_onehot,
    test_size=0.2, random_state=seed
)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

### 모델 정의 및 설정

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [9]:
model = Sequential([
    Dense(32, input_dim=64, activation='relu'),
    Dense(24, activation='relu'),
    Dense(12, activation='relu'),
    Dense(10, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 24)                792       
_________________________________________________________________
dense_2 (Dense)              (None, 12)                300       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                130       
Total params: 3,302
Trainable params: 3,302
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### 모델 저장 관련 설정

In [11]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
modelpath = 'model/best_digits.h5'
checkpointer = ModelCheckpoint(modelpath, save_best_only=True)
early_stopping = EarlyStopping(patience=30)

### 모델 학습 및 저장

In [12]:
history = model.fit(
    X_train, Y_train, batch_size=300, epochs=300, 
    verbose=0, callbacks=[checkpointer, early_stopping], validation_split=0.2
)

### 모델 평가

In [29]:
from tensorflow.keras.models import load_model
best_model = load_model('model/best_digits.h5')
acc = best_model.evaluate(X_test, Y_test)
print(f'정확도: {acc[1]}')

정확도: 0.9611111283302307


### 모델2

In [14]:
model2 = Sequential([
    Dense(80, input_dim=64, activation='relu'),
    Dense(55, activation='relu'),
    Dense(35, activation='relu'),
    Dense(15, activation='relu'),
    Dense(10, activation='softmax')
])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 80)                5200      
_________________________________________________________________
dense_5 (Dense)              (None, 55)                4455      
_________________________________________________________________
dense_6 (Dense)              (None, 35)                1960      
_________________________________________________________________
dense_7 (Dense)              (None, 15)                540       
_________________________________________________________________
dense_8 (Dense)              (None, 10)                160       
Total params: 12,315
Trainable params: 12,315
Non-trainable params: 0
_________________________________________________________________


In [15]:
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
modelpath2 = 'model/best_digits2.h5'
checkpointer2 = ModelCheckpoint(modelpath2, save_best_only=True)
early_stopping2 = EarlyStopping(patience=30)

In [23]:
history2 = model2.fit(
    X_train, Y_train, batch_size=600, epochs=500, 
    verbose=0, callbacks=[checkpointer2, early_stopping2], validation_split=0.2
)

In [30]:
best_model2 = load_model('model/best_digits2.h5')
acc2 = best_model2.evaluate(X_test, Y_test)
print(f'정확도: {acc2[1]}')

정확도: 0.9638888835906982


### 모델3

In [19]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

In [20]:
model3=Sequential([
    Dense(32, input_dim=64, activation='relu'),
    Dense(16, activation='relu'),
    Dense(10, activation='softmax'),
])
model3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_10 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_11 (Dense)             (None, 10)                170       
Total params: 2,778
Trainable params: 2,778
Non-trainable params: 0
_________________________________________________________________


In [21]:
model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
modelpath3 = 'model/best_digits3.h5'
checkpointer3 = ModelCheckpoint(modelpath3, monitor='val_loss', verbose=0, save_best_only=True)
early_stopping3 = EarlyStopping(patience=40)

In [25]:
history3 = model3.fit(
    X_train, Y_train, epochs=900, batch_size=600, verbose=0,
    callbacks=[checkpointer3, early_stopping3], validation_split=0.2
)

In [31]:
best_model3 = load_model('model/best_digits3.h5')
acc3 = best_model3.evaluate(X_test, Y_test)
print(f'정확도: {acc3[1]}')

정확도: 0.9583333134651184
