In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/abalone.csv')

In [6]:
print(df.head())

   id Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  \
0   0   M   0.455     0.365   0.095        0.5140          0.2245   
1   1   M   0.350     0.265   0.090        0.2255          0.0995   
2   2   F   0.530     0.420   0.135        0.6770          0.2565   
3   3   M   0.440     0.365   0.125        0.5160          0.2155   
4   4   I   0.330     0.255   0.080        0.2050          0.0895   

   Viscera_weight  Shell_weight  Rings  
0          0.1010         0.150     15  
1          0.0485         0.070      7  
2          0.1415         0.210      9  
3          0.1140         0.155     10  
4          0.0395         0.055      7  


In [7]:
# 성별(Sex) 데이터를 숫자형으로 변환
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])

# 특징(X)과 타겟(y)을 분리합니다. 여기서 Rings는 나이를 의미하는 타겟 값
X = df.drop('Rings', axis=1)
y = df['Rings']

# 데이터를 표준화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
# 훈련 세트와 테스트 세트로 데이터를 나눔
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [9]:
# 랜덤 포레스트 회귀 모델을 사용하여 데이터를 학습
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

In [10]:
# 테스트 세트에 대한 예측
y_pred = regressor.predict(X_test)

# 평균 제곱 오차와 결정 계수를 계산
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'평균 제곱 오차 (머신러닝): {mse}')

평균 제곱 오차 (머신러닝): 3.804511602870814


In [11]:
# Rings 값을 나이에 따라 범주형 데이터로 변환
def categorize_rings(rings):
    if rings <= 8:
        return 'Young'
    elif rings <= 10:
        return 'Adult'
    else:
        return 'Old'

# 나이를 범주형으로 변환한 데이터를 y_class에 저장
y_class = y.apply(categorize_rings)

In [12]:
# 범주형 나이 데이터를 숫자형으로 변환
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_class)

In [13]:
# 훈련 세트와 테스트 세트로 데이터를 나눔
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

In [15]:
# 테스트 세트에 대한 예측을 수행
y_pred = classifier.predict(X_test)

# 분류 모델의 정확도를 계산
accuracy = accuracy_score(y_test, y_pred)

print(f'정확도 (머신러닝): {accuracy}')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

정확도 (머신러닝): 0.687799043062201
              precision    recall  f1-score   support

       Adult       0.57      0.55      0.56       281
         Old       0.69      0.75      0.72       276
       Young       0.80      0.77      0.79       279

    accuracy                           0.69       836
   macro avg       0.69      0.69      0.69       836
weighted avg       0.69      0.69      0.69       836



In [16]:
# TensorFlow를 사용하여 신경망 모델을 생성
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [17]:
# 회귀 신경망 모델을 정의
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
# 모델을 컴파일하고 학습
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)

Epoch 1/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.7777 - val_loss: 0.4979
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4778 - val_loss: 0.4832
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4780 - val_loss: 0.4800
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4548 - val_loss: 0.4748
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4333 - val_loss: 0.4763
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4559 - val_loss: 0.4693
Epoch 7/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4331 - val_loss: 0.4661
Epoch 8/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4157 - val_loss: 0.4730
Epoch 9/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━

In [19]:
# 테스트 세트에 대한 예측을 수행
y_pred = model.predict(X_test)

# 신경망 회귀 모델의 평균 제곱 오차와 결정 계수를 계산
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'평균 제곱 오차 (신경망): {mse}')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
평균 제곱 오차 (신경망): 0.5010673436891393


In [20]:
# 분류를 위한 신경망 모델을 정의
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
# 모델을 컴파일하고 학습합
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)

Epoch 1/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5418 - loss: 0.9272 - val_accuracy: 0.5194 - val_loss: 0.8752
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6020 - loss: 0.8079 - val_accuracy: 0.5373 - val_loss: 0.8667
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6369 - loss: 0.7818 - val_accuracy: 0.6090 - val_loss: 0.8224
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6589 - loss: 0.7277 - val_accuracy: 0.6149 - val_loss: 0.8019
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6541 - loss: 0.7316 - val_accuracy: 0.6119 - val_loss: 0.8077
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6711 - loss: 0.7015 - val_accuracy: 0.6179 - val_loss: 0.7895
Epoch 7/100
[1m94/94[0m [32m━━━

In [22]:
# 테스트 세트에서 모델을 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f'정확도 (신경망): {accuracy}')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6749 - loss: 0.7027 
정확도 (신경망): 0.6698564887046814
