### 전복데이터의 분류/회귀를 CNN과 LSTM으로 분류 및 회귀를 작성하라

In [26]:
#데이터 로드 및 전처리

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np

In [27]:
# 데이터 로드
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/abalone.csv"
data = pd.read_csv(url)

In [28]:
# 범주형 데이터 'Sex' 열을 라벨 인코딩
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])  # 'M', 'F', 'I'를 숫자로 인코딩

In [29]:
# 특징(Feature)과 레이블(Label) 분리
X = data.drop(['Rings', 'id'], axis=1).values  # 'Rings'와 'id'를 제외한 나머지를 Feature로 사용
y = data['Rings'].values  # 'Rings'는 라벨

In [30]:
# 데이터 분할 (학습 데이터, 테스트 데이터)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
# 데이터 정규화 (StandardScaler 사용)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [32]:
# 분류를 위한 레이블 생성 (이진 분류로 설정)
y_train_class = (y_train >= 10).astype(int)
y_test_class = (y_test >= 10).astype(int)

print("Data preprocessing completed!")

Data preprocessing completed!


In [33]:
#CNN 분류

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten

In [34]:
# CNN을 위한 입력 데이터는 3D (samples, timesteps, features)
# Conv1D에서 시퀀스가 의미를 가지도록 timesteps를 최소 2 이상으로 설정해야 함
X_train_cnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_cnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [35]:
# CNN 분류 모델 구성
cnn_class_model = Sequential()
cnn_class_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_scaled.shape[1], 1)))  # (timesteps, features)
cnn_class_model.add(MaxPooling1D(pool_size=2))
cnn_class_model.add(Flatten())
cnn_class_model.add(Dense(32, activation='relu'))
cnn_class_model.add(Dense(1, activation='sigmoid'))  # 이진 분류

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [36]:
# 모델 컴파일
cnn_class_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [37]:
# 모델 학습
cnn_class_model.fit(X_train_cnn, y_train_class, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7379 - loss: 0.5847
Epoch 2/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7459 - loss: 0.5108
Epoch 3/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7659 - loss: 0.4922
Epoch 4/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7531 - loss: 0.4941
Epoch 5/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7627 - loss: 0.4936
Epoch 6/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7632 - loss: 0.4836
Epoch 7/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7727 - loss: 0.4765
Epoch 8/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7779 - loss: 0.4628
Epoch 9/50
[1m105/105[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x216fbff3cb0>

In [38]:
# 성능 평가
loss, accuracy = cnn_class_model.evaluate(X_test_cnn, y_test_class)
print(f'CNN Classification Accuracy: {accuracy * 100:.2f}%')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7927 - loss: 0.4214
CNN Classification Accuracy: 77.03%


In [39]:
# LSTM 분류

from tensorflow.keras.layers import LSTM

In [43]:
# LSTM을 위한 입력 데이터는 3D (samples, timesteps, features)
# timesteps는 1, features는 각 특징의 개수로 맞춰줍니다.
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

In [44]:
# LSTM 분류 모델 구성
lstm_class_model = Sequential()
lstm_class_model.add(LSTM(64, activation='relu', input_shape=(1, X_train_scaled.shape[1])))  # timesteps=1, features=8
lstm_class_model.add(Dense(32, activation='relu'))
lstm_class_model.add(Dense(1, activation='sigmoid'))  # 이진 분류

In [45]:
# 모델 컴파일
lstm_class_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [46]:
# 모델 학습
lstm_class_model.fit(X_train_lstm, y_train_class, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6951 - loss: 0.6189
Epoch 2/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7344 - loss: 0.5101
Epoch 3/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7633 - loss: 0.4833
Epoch 4/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7828 - loss: 0.4627
Epoch 5/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7993 - loss: 0.4354
Epoch 6/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8035 - loss: 0.4349
Epoch 7/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8040 - loss: 0.4207
Epoch 8/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7963 - loss: 0.4245
Epoch 9/50
[1m105/105[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x216fbff3110>

In [47]:
# 성능 평가
loss, accuracy = lstm_class_model.evaluate(X_test_lstm, y_test_class)
print(f'LSTM Classification Accuracy: {accuracy * 100:.2f}%')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8164 - loss: 0.3894 
LSTM Classification Accuracy: 80.02%
