# 전복 데이터 CNN 회귀

## 1. 데이터 불러오기

In [27]:
import pandas as pd

df = pd.read_csv(r"C:\Users\DoHyeonjik\GachonUniv\3-2\datasets\DL\abalone.csv")
df

Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...,...
4172,4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [28]:
# shape 및 레이블 알아보기
print(df.shape)
print(df.columns)

(4177, 10)
Index(['id', 'Sex', 'Length', 'Diameter', 'Height', 'Whole_weight',
       'Shucked_weight', 'Viscera_weight', 'Shell_weight', 'Rings'],
      dtype='object')


## 2. 데이터 전처리

In [29]:
# sex 라벨인코딩
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
print(df['Sex'])
print(df)

0       2
1       2
2       0
3       2
4       1
       ..
4172    0
4173    2
4174    2
4175    0
4176    2
Name: Sex, Length: 4177, dtype: int32
        id  Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  \
0        0    2   0.455     0.365   0.095        0.5140          0.2245   
1        1    2   0.350     0.265   0.090        0.2255          0.0995   
2        2    0   0.530     0.420   0.135        0.6770          0.2565   
3        3    2   0.440     0.365   0.125        0.5160          0.2155   
4        4    1   0.330     0.255   0.080        0.2050          0.0895   
...    ...  ...     ...       ...     ...           ...             ...   
4172  4172    0   0.565     0.450   0.165        0.8870          0.3700   
4173  4173    2   0.590     0.440   0.135        0.9660          0.4390   
4174  4174    2   0.600     0.475   0.205        1.1760          0.5255   
4175  4175    0   0.625     0.485   0.150        1.0945          0.5310   
4176  4176    2   0.710    

In [30]:
# 레이블 추출
df_label = df['Rings']
df_data = df.drop(['Rings','id'], axis=1)
print(df_data.shape, df_label.shape)

(4177, 8) (4177,)


In [31]:
# 훈련세트와 테스트세트로 분리
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(df_data, df_label, test_size=0.2, random_state=42)
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(3341, 8) (836, 8)
(3341,) (836,)


In [32]:
# Split_sequence 함수 적용
# 1) 적용을 위해 훈련세트와 테스트 세트병합
import numpy as np

y_train = np.array(y_train)
train_set = np.c_[x_train, y_train]

y_test = np.array(y_test)
test_set = np.c_[x_test, y_test]
print(train_set.shape)
print(test_set.shape)

(3341, 9)
(836, 9)


In [33]:
#split_sequence() 작성
from collections import Counter

def split_sequence(sequence, n_steps):
    x, y = list(), list()
    for i in range(len(sequence)):
        end_idx = i + n_steps
        if end_idx > len(sequence):
            break
        seq_x = sequence[i:end_idx, :-1]
        seq_y_values = sequence[i:end_idx, -1]

        most_common_values = Counter(seq_y_values).most_common(1)[0][0]
        x.append(seq_x)
        y.append(most_common_values)

    return np.array(x), np.array(y)        

In [34]:
# split_sequence 적용
n_steps = 5
x_train, y_train = split_sequence(train_set, n_steps)
x_test, y_test = split_sequence(test_set, n_steps)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(3337, 5, 8) (3337,)
(832, 5, 8) (832,)


In [35]:
print(type(x_train))
print(x_train)
print(y_train)

<class 'numpy.ndarray'>
[[[1.     0.55   0.445  ... 0.288  0.1365 0.21  ]
  [1.     0.475  0.355  ... 0.2535 0.091  0.14  ]
  [0.     0.305  0.225  ... 0.0585 0.0335 0.045 ]
  [1.     0.275  0.2    ... 0.0565 0.013  0.035 ]
  [2.     0.495  0.38   ... 0.263  0.1425 0.215 ]]

 [[1.     0.475  0.355  ... 0.2535 0.091  0.14  ]
  [0.     0.305  0.225  ... 0.0585 0.0335 0.045 ]
  [1.     0.275  0.2    ... 0.0565 0.013  0.035 ]
  [2.     0.495  0.38   ... 0.263  0.1425 0.215 ]
  [1.     0.505  0.38   ... 0.2595 0.1435 0.18  ]]

 [[0.     0.305  0.225  ... 0.0585 0.0335 0.045 ]
  [1.     0.275  0.2    ... 0.0565 0.013  0.035 ]
  [2.     0.495  0.38   ... 0.263  0.1425 0.215 ]
  [1.     0.505  0.38   ... 0.2595 0.1435 0.18  ]
  [0.     0.655  0.5    ... 0.508  0.314  0.445 ]]

 ...

 [[0.     0.6    0.48   ... 0.4575 0.2435 0.3135]
  [2.     0.525  0.405  ... 0.316  0.1455 0.28  ]
  [0.     0.49   0.4    ... 0.256  0.1325 0.145 ]
  [0.     0.67   0.55   ... 0.5425 0.3035 0.4   ]
  [2.     0.51

### 3. 모델 생성 및 학습

In [36]:
# 모델생성 
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten

model = Sequential()

model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(5,8,1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

model.summary()

  super().__init__(


In [37]:
history = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_split=0.1, shuffle=True)

Epoch 1/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 52.0022 - mae: 6.1168 - val_loss: 6.7976 - val_mae: 1.9252
Epoch 2/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 7.5302 - mae: 1.9947 - val_loss: 6.4509 - val_mae: 1.8917
Epoch 3/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.4953 - mae: 1.8683 - val_loss: 6.1237 - val_mae: 1.7749
Epoch 4/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.8773 - mae: 1.8577 - val_loss: 5.8012 - val_mae: 1.7376
Epoch 5/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.4540 - mae: 1.8333 - val_loss: 5.6763 - val_mae: 1.7138
Epoch 6/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.3857 - mae: 1.8308 - val_loss: 5.7950 - val_mae: 1.6957
Epoch 7/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.9956 

In [38]:
print(model.evaluate(x_test, y_test))

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 6.0673 - mae: 1.6824  
[5.250033378601074, 1.6455206871032715]
