In [1]:
# 데이터 로드 및 전처리
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [8]:
# 데이터 로드 (헤더가 포함되어 있다고 가정하여 header=0 설정)
url = "C:/Users/dnwjd/OneDrive/Desktop/CSE_6/딥러닝/AI-class-main/AI-class-main/abalone.csv"
data = pd.read_csv(url)

In [9]:
# EDA - 데이터 타입 확인
print(data.dtypes)  # 데이터 타입 확인

id                  int64
Sex                object
Length            float64
Diameter          float64
Height            float64
Whole_weight      float64
Shucked_weight    float64
Viscera_weight    float64
Shell_weight      float64
Rings               int64
dtype: object


In [10]:
# EDA - 결측치 확인
print(data.isnull().sum())  # 결측치 확인

id                0
Sex               0
Length            0
Diameter          0
Height            0
Whole_weight      0
Shucked_weight    0
Viscera_weight    0
Shell_weight      0
Rings             0
dtype: int64


In [11]:
# 2. 범주형 데이터 'Sex' 열을 라벨 인코딩
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])  # 'M', 'F', 'I'를 숫자로 인코딩

In [12]:
# 3. 특징(Feature)과 레이블(Label) 분리
X = data.drop(['Rings', 'id'], axis=1).values  # 'Rings'와 'id'를 제외한 나머지를 Feature로 사용
y = data['Rings'].values  # 'Rings'는 라벨

In [13]:
# 4. 데이터 분할 (학습 데이터, 테스트 데이터)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# 5. 데이터 정규화 (StandardScaler 사용)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data scaling successful!")

Data scaling successful!


In [15]:
# 머신러닝 분류 

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [16]:
# 6. 분류용 레이블 생성 (10을 기준으로 이진 분류)
y_train_class = (y_train >= 10).astype(int)
y_test_class = (y_test >= 10).astype(int)

In [17]:
# 7. 랜덤 포레스트 분류 모델 훈련
clf = RandomForestClassifier()
clf.fit(X_train_scaled, y_train_class)

In [18]:
# 8. 예측 및 성능 평가
y_pred_class = clf.predict(X_test_scaled)
print(classification_report(y_test_class, y_pred_class))

              precision    recall  f1-score   support

           0       0.80      0.76      0.78       421
           1       0.77      0.80      0.79       415

    accuracy                           0.78       836
   macro avg       0.78      0.78      0.78       836
weighted avg       0.78      0.78      0.78       836



In [19]:
# 머신러닝 회귀

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [20]:
# 9. 회귀용 랜덤 포레스트 모델 훈련
reg = RandomForestRegressor()
reg.fit(X_train_scaled, y_train)

In [21]:
# 10. 예측 및 성능 평가
y_pred_reg = reg.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred_reg)
r2 = r2_score(y_test, y_pred_reg)
print(f'MSE: {mse}, R2: {r2}')

MSE: 4.981766028708134, R2: 0.5397995967059739


In [22]:
# 신경망 분류

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [23]:
# 11. 신경망 분류 모델 구성
model_class = Sequential()
model_class.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model_class.add(Dense(32, activation='relu'))
model_class.add(Dense(1, activation='sigmoid'))  # 이진 분류이므로 sigmoid 사용

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
# 12. 모델 컴파일
model_class.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 13. 모델 학습
model_class.fit(X_train_scaled, y_train_class, epochs=50, batch_size=32, verbose=1)

# 14. 성능 평가
loss, accuracy = model_class.evaluate(X_test_scaled, y_test_class)
print(f'Classification Accuracy: {accuracy * 100:.2f}%')

Epoch 1/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7344 - loss: 0.5482
Epoch 2/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7595 - loss: 0.4877
Epoch 3/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7751 - loss: 0.4704
Epoch 4/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8013 - loss: 0.4319
Epoch 5/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8000 - loss: 0.4323
Epoch 6/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7827 - loss: 0.4432
Epoch 7/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7911 - loss: 0.4357
Epoch 8/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8017 - loss: 0.4282
Epoch 9/50
[1m105/105[0m [32m━━━━━━━━

In [25]:
# 신경망 회귀

# 15. 신경망 회귀 모델 구성
model_reg = Sequential()
model_reg.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model_reg.add(Dense(32, activation='relu'))
model_reg.add(Dense(1))  # 회귀이므로 출력층에 활성화 함수 없음

In [26]:
# 16. 모델 컴파일
model_reg.compile(optimizer='adam', loss='mse')

In [27]:
# 17. 모델 학습
model_reg.fit(X_train_scaled, y_train, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 83.4781
Epoch 2/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 19.1695
Epoch 3/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 7.2892
Epoch 4/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.2196
Epoch 5/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.7602
Epoch 6/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.5122
Epoch 7/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.3068
Epoch 8/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.0972
Epoch 9/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.1421
Epoch 10/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - 

<keras.src.callbacks.history.History at 0x1d874901970>

In [28]:
# 18. 성능 평가
mse = model_reg.evaluate(X_test_scaled, y_test)
print(f'Regression MSE: {mse}')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4.5527  
Regression MSE: 4.417262077331543
