In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

# 1. 데이터 준비
data = pd.read_csv('abalone.csv')
data = data.drop(columns=['id'])
data = pd.get_dummies(data, columns=['Sex'])

# NaN 값 확인 및 처리
data = data.dropna()  # NaN 값 제거

# 2. 데이터셋 분할
X = data.drop(columns=['Rings']).values
y = data['Rings'].values
y = to_categorical(y)  # One-hot encoding

# 3. 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
# 데이터 타입 변환
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')

# X_test의 타입을 float32로 변환하고, NaN 값 처리
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

In [3]:
# 4. CNN 모델 구성
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
# 5. 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 6. 모델 훈련
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

model.fit(X_train, y_train, epochs=20, batch_size=32)


Epoch 1/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1577 - loss: 2.8277
Epoch 2/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2030 - loss: 2.3292
Epoch 3/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2326 - loss: 2.2496
Epoch 4/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2386 - loss: 2.1551
Epoch 5/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2282 - loss: 2.1572
Epoch 6/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2324 - loss: 2.1164
Epoch 7/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2452 - loss: 2.1008
Epoch 8/20
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2593 - loss: 2.0889
Epoch 9/20
[1m105/105[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1d960735610>

In [5]:

# 7. y_test의 타입 및 차원 확인
print("y_test dtype:", y_test.dtype)
print("y_test shape:", y_test.shape)


y_test dtype: float32
y_test shape: (836, 30)


In [6]:
# 8. 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2606 - loss: 2.0140  
Loss: 1.990964412689209, Accuracy: 0.2715311050415039


### 돌아는 가지만 저조한 정확도이다.

#### 입력 데이터의 스케일을 조정해보자. 각 특성의 범위를 0과 1 사이로 정규화해보겠다.

In [7]:
from sklearn.preprocessing import StandardScaler

# 1. 데이터 준비
data = pd.read_csv('abalone.csv')
data = data.drop(columns=['id'])
data = pd.get_dummies(data, columns=['Sex'])
data = data.dropna()

# 2. 데이터 전처리
X = data.drop(columns=['Rings']).values
y = data['Rings'].values
y = to_categorical(y)

# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 3. 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 타입 변환
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

# 4. CNN 모델 구성
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))

# 5. 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 6. 모델 훈련
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

model.fit(X_train, y_train, epochs=50, batch_size=64)  #에포크도 늘려보았다.

# 7. 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1565 - loss: 2.9833
Epoch 2/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2579 - loss: 2.2267
Epoch 3/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2368 - loss: 2.1313
Epoch 4/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2524 - loss: 2.0593
Epoch 5/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2677 - loss: 2.0384
Epoch 6/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2683 - loss: 2.0285
Epoch 7/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2726 - loss: 1.9972
Epoch 8/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2683 - loss: 1.9839
Epoch 9/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

### 정규화 및 에포크 증가를 해봤지만 성능 향상은 미미하다.
### 이 외에도 Earlystopping, 다층 CNN, 드롭아웃, 학습률 조정, ReduceLROnPlateau 콜백 추가 등등 많은 부분에서 모델을 조정해 보았지만 CNN으로 하기에는 부적합한 것 같다.