In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.model_selection import train_test_split

In [7]:
path = '../data/fish.csv'
fish_df = pd.read_csv(path)
fish_df.head()

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,26.5,31.1,12.3778,4.6961
3,Bream,363.0,29.0,33.5,12.73,4.4555
4,Bream,430.0,29.0,34.0,12.444,5.134


In [14]:
from sklearn.preprocessing import LabelEncoder

In [15]:
feature = fish_df.drop(columns=['Species'])
target = fish_df['Species']

# 타겟 라벨 인코딩
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(target)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(feature, target, random_state=30)

In [17]:
pipe = Pipeline([
    ('scaler', MinMaxScaler()),  # Scaler는 기본값으로 넣고, GridSearchCV에서 다른 옵션들과 함께 탐색
    ('model', SVC())])

In [18]:
# Parameter Grid 설정
param_grid = {
    'scaler': [StandardScaler(), MinMaxScaler(), RobustScaler()],
    'model__kernel': ['poly', 'rbf'],
    'model__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'model__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

In [19]:
# Grid Search CV 설정
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [20]:
# 최적의 파라미터 및 스케일러 확인
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation accuracy: {:.2f}".format(grid_search.best_score_))

# 최적의 모델로 테스트 데이터에 대해 성능 평가
best_model = grid_search.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print("Test set accuracy: {:.2f}".format(test_accuracy))

Best parameters found:  {'model__C': 0.1, 'model__gamma': 100, 'model__kernel': 'poly', 'scaler': MinMaxScaler()}
Best cross-validation accuracy: 0.94
Test set accuracy: 0.82
