# 초음파 광물 데이터 - 이진 분류

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
seed = 2023
np.random.seed(seed)

- 데이터 전처리

In [3]:
# 파일 업로드
from google.colab import files
up = files.upload()

Saving sonar.csv to sonar.csv


In [4]:
df = pd.read_csv('sonar.csv', header=None)
df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R


In [5]:
df[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

In [6]:
# X data: scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.iloc[:, :-1].values)     # 행 전체, 마지막열 제외한 모든 열을 넘파이 배열로 만들어 줌

In [7]:
# Y data: label encoding
from sklearn.preprocessing import LabelEncoder
y_labeled = LabelEncoder().fit_transform(df[60].values)

In [8]:
# Train, Test dataset 분리
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_labeled, stratify=y_labeled, test_size=0.2, random_state=seed
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((166, 60), (42, 60), (166,), (42,))

## 모델 정의/설정/학습/평가

In [11]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

### Case 1. 은닉층 2개

In [10]:
model1 = Sequential([ 
    Dense(80, input_dim=60, activation='relu'),
    Dense(12, activation='relu'),
    Dense(1, activation='sigmoid')
])
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                4880      
                                                                 
 dense_1 (Dense)             (None, 12)                972       
                                                                 
 dense_2 (Dense)             (None, 1)                 13        
                                                                 
Total params: 5,865
Trainable params: 5,865
Non-trainable params: 0
_________________________________________________________________


In [12]:
model1.compile('adam', 'binary_crossentropy', ['accuracy'])
model_path1 = 'best_model1.h5'
mc1 = ModelCheckpoint(
    filepath=model_path1,               # 저장할 파일 이름
    monitor='val_loss',                 # validation dataset의 loss값 기준
    verbose=1,                          # 화면에 잔뜩 표시
    save_best_only=True                 # best model만 저장
)
es1 = EarlyStopping(
    monitor='val_loss',
    patience=20                     # 20 epoch 동안 val_loss가 좋아지지 않으면 강제 종료
)

In [13]:
hist1 = model1.fit(X_train, y_train, validation_split=0.2,
                   epochs=200, batch_size=100, verbose=0, 
                   callbacks=[mc1, es1])


Epoch 1: val_loss improved from inf to 0.67009, saving model to best_model1.h5

Epoch 2: val_loss improved from 0.67009 to 0.65214, saving model to best_model1.h5

Epoch 3: val_loss improved from 0.65214 to 0.63652, saving model to best_model1.h5

Epoch 4: val_loss improved from 0.63652 to 0.62343, saving model to best_model1.h5

Epoch 5: val_loss improved from 0.62343 to 0.61150, saving model to best_model1.h5

Epoch 6: val_loss improved from 0.61150 to 0.59939, saving model to best_model1.h5

Epoch 7: val_loss improved from 0.59939 to 0.58854, saving model to best_model1.h5

Epoch 8: val_loss improved from 0.58854 to 0.57803, saving model to best_model1.h5

Epoch 9: val_loss improved from 0.57803 to 0.56736, saving model to best_model1.h5

Epoch 10: val_loss improved from 0.56736 to 0.55658, saving model to best_model1.h5

Epoch 11: val_loss improved from 0.55658 to 0.54613, saving model to best_model1.h5

Epoch 12: val_loss improved from 0.54613 to 0.53493, saving model to best_mod

In [14]:
best_model1 = load_model(model_path1)
best_model1.evaluate(X_test, y_test)



[0.3833175003528595, 0.8571428656578064]

### Case 2. 은닉층 4개

In [15]:
model2 = Sequential([ 
    Dense(80, input_dim=60, activation='relu'),
    Dense(48, activation='relu'),
    Dense(20, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 80)                4880      
                                                                 
 dense_4 (Dense)             (None, 48)                3888      
                                                                 
 dense_5 (Dense)             (None, 20)                980       
                                                                 
 dense_6 (Dense)             (None, 8)                 168       
                                                                 
 dense_7 (Dense)             (None, 1)                 9         
                                                                 
Total params: 9,925
Trainable params: 9,925
Non-trainable params: 0
_________________________________________________________________


In [16]:
model2.compile('adam', 'binary_crossentropy', ['accuracy'])
model_path2 = 'best_model2.h5'
mc2 = ModelCheckpoint(filepath=model_path2, verbose=1, save_best_only=True)
es2 = EarlyStopping(monitor='val_loss', patience=20)
hist2 = model2.fit(X_train, y_train, validation_split=0.2,
                   epochs=200, batch_size=100, verbose=0, 
                   callbacks=[mc2, es2])


Epoch 1: val_loss improved from inf to 0.65999, saving model to best_model2.h5

Epoch 2: val_loss improved from 0.65999 to 0.65190, saving model to best_model2.h5

Epoch 3: val_loss improved from 0.65190 to 0.64315, saving model to best_model2.h5

Epoch 4: val_loss improved from 0.64315 to 0.63315, saving model to best_model2.h5

Epoch 5: val_loss improved from 0.63315 to 0.62134, saving model to best_model2.h5

Epoch 6: val_loss improved from 0.62134 to 0.60891, saving model to best_model2.h5

Epoch 7: val_loss improved from 0.60891 to 0.59569, saving model to best_model2.h5

Epoch 8: val_loss improved from 0.59569 to 0.58144, saving model to best_model2.h5

Epoch 9: val_loss improved from 0.58144 to 0.56698, saving model to best_model2.h5

Epoch 10: val_loss improved from 0.56698 to 0.55316, saving model to best_model2.h5

Epoch 11: val_loss improved from 0.55316 to 0.53991, saving model to best_model2.h5

Epoch 12: val_loss improved from 0.53991 to 0.52615, saving model to best_mod

In [19]:
best_model2 = load_model(model_path2)
best_model2.evaluate(X_test, y_test)



[0.49250778555870056, 0.8571428656578064]

### Case 3. 은닉층 6개

In [20]:
model3 = Sequential([ 
    Dense(100, input_dim=60, activation='relu'),
    Dense(64, activation='relu'),
    Dense(40, activation='relu'),
    Dense(20, activation='relu'),
    Dense(10, activation='relu'),
    Dense(4, activation='relu'),
    Dense(1, activation='sigmoid')
])
model3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 100)               6100      
                                                                 
 dense_9 (Dense)             (None, 64)                6464      
                                                                 
 dense_10 (Dense)            (None, 40)                2600      
                                                                 
 dense_11 (Dense)            (None, 20)                820       
                                                                 
 dense_12 (Dense)            (None, 10)                210       
                                                                 
 dense_13 (Dense)            (None, 4)                 44        
                                                                 
 dense_14 (Dense)            (None, 1)                

In [21]:
model3.compile('adam', 'binary_crossentropy', ['accuracy'])
model_path3 = 'best_model3.h5'
mc3 = ModelCheckpoint(filepath=model_path3, verbose=1, save_best_only=True)
es3 = EarlyStopping(monitor='val_loss', patience=20)
hist3 = model3.fit(X_train, y_train, validation_split=0.2,
                   epochs=200, batch_size=100, verbose=0, 
                   callbacks=[mc3, es3])




Epoch 1: val_loss improved from inf to 0.67360, saving model to best_model3.h5

Epoch 2: val_loss improved from 0.67360 to 0.65937, saving model to best_model3.h5

Epoch 3: val_loss improved from 0.65937 to 0.64619, saving model to best_model3.h5

Epoch 4: val_loss improved from 0.64619 to 0.63346, saving model to best_model3.h5

Epoch 5: val_loss improved from 0.63346 to 0.62015, saving model to best_model3.h5

Epoch 6: val_loss improved from 0.62015 to 0.60404, saving model to best_model3.h5

Epoch 7: val_loss improved from 0.60404 to 0.58472, saving model to best_model3.h5

Epoch 8: val_loss improved from 0.58472 to 0.56420, saving model to best_model3.h5

Epoch 9: val_loss improved from 0.56420 to 0.54450, saving model to best_model3.h5

Epoch 10: val_loss improved from 0.54450 to 0.52469, saving model to best_model3.h5

Epoch 11: val_loss improved from 0.52469 to 0.50431, saving model to best_model3.h5

Epoch 12: val_loss improved from 0.50431 to 0.48229, saving model to best_mod

In [22]:
best_model3 = load_model(model_path3)
best_model3.evaluate(X_test, y_test)



[0.47107401490211487, 0.8809523582458496]