In [1]:
import pickle 
import pandas as pd

with open("titanic_step4_importance_train.pickle", "rb") as pickle_file:
    train_importance = pickle.load(pickle_file)

with open("titanic_step4_importance_train_y.pickle", "rb") as pickle_file:
    train_y = pickle.load(pickle_file)

with open("titanic_step4_importance_test.pickle", "rb") as pickle_file:
    test_importance = pickle.load(pickle_file)



In [2]:
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from scipy import stats

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import warnings

warnings.filterwarnings("ignore")


#### SVC 하이퍼 파라미터
- SVC는 분류 경계가 최대가 되는 하이퍼 플레인을 구하는 것이 목표
- 하이퍼 플레인을 일직선 또는 곡률을 가진 선으로 할지 선택 가능
- 주요 하이퍼 파라미터
    - C: regularization 
    - gamma: 얼마나 훈련 데이터셋에 fit하게 할 지 결정

#### RandomizedSearchCV + SVC
- Grid Search보다 보통 성능이 높다고 평가됨
- 하지만 수행 시간이 상당히 걸릴 수 있음 

In [5]:
hyper_params = {
    "C": stats.uniform(0, 50),
    "gamma": stats.uniform(0, 1)
}

gd = RandomizedSearchCV(
    estimator=SVC(random_state=2025),
    param_distributions=hyper_params,
    n_iter=100,
    cv=5, # 내부적으로 Stratified K-Fold 사용
    scoring="accuracy",
    random_state=2025,
    n_jobs=-1
)

gd.fit(train_importance, train_y)
print(gd.best_score_)
print(gd.best_params_)

df = pd.DataFrame(gd.cv_results_)
print(df[["params", "mean_test_score"]])

0.8339087314041805
{'C': np.float64(4.889400554283291), 'gamma': np.float64(0.1516847042112811)}
                                               params  mean_test_score
0   {'C': 6.77440818389809, 'gamma': 0.88785170273...         0.817086
1   {'C': 46.630281994325124, 'gamma': 0.445568164...         0.821549
2   {'C': 19.41177730569913, 'gamma': 0.2575964353...         0.822666
3   {'C': 32.86837927355188, 'gamma': 0.4926169375...         0.821549
4   {'C': 48.21192096250286, 'gamma': 0.8009844748...         0.820444
..                                                ...              ...
95  {'C': 14.63028322882729, 'gamma': 0.7955577851...         0.820444
96  {'C': 17.165184253749988, 'gamma': 0.156595114...         0.819315
97  {'C': 31.259768801363897, 'gamma': 0.212003570...         0.822666
98  {'C': 27.108447939999504, 'gamma': 0.875683423...         0.819321
99  {'C': 28.86465178921625, 'gamma': 0.4122925309...         0.821549

[100 rows x 2 columns]
