### 필요 패키지 설치

In [1]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.18.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting click!=8.0.0,>=7.1 (from wandb)
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 (from wandb)
  Downloading protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting pyyaml (from wandb)
  Downloading PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting requests<3,>=2.0.0 (from wandb)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.14.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting setproctitle (from wandb)
  

### wandb 로그인


wandb에 로그인하여 api키를 입력합니다.

In [2]:
import wandb

# WanDB 로그인
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /data/ephemeral/home/.netrc


True

### hyperparameter_tune

튜닝할 파라미터의 이름, 자료형, 범위 등 세팅합니다.

In [None]:
# 파라미터 세팅 구현

sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'lg_max_iter' : {
            'values' : [300,500,700]
        },
        'lg_class_weight' : {
            'values' : [None,'balanced']
        },
        'rf_n_estimators': {
            'values': [50, 100, 200]
        },
        'rf_max_depth': {
            'values': [10, 20, 30]
        },
        'rf_min_samples_split': {
            'values': [2, 5, 10]
        },
        'dc_min_samples_split': {
            'values': [2, 4, 8]
        },
        'dc_max_depth': {
            'values': [10, 20, 40]
        },
        'knn_n_neighbors': {
            'values': [4, 6, 8]
        },
        'knn_leaf_size': {
            'values': [20, 30, 40]
        }
    }
}

sweep_id = wandb.sweep(
    sweep_config,
    project="hyperparameter_tune"
)
sweep_id

#### sweep 학습

In [None]:
####임시로 학습 데이터 불러오는 블럭


In [None]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier

import numpy as np


def soft_voting(predictions):
    voting_result = np.argmax(np.sum(predictions, axis=0), axis=1)
    return voting_result


def wandb_training_function():
    with wandb.init() as run:
        params = wandb.config

        val_scores = list()

        lg = LogisticRegression(max_iter=params['lg_max_iter'],
                                class_weight=params['lg_class_weight'])

        # 모델 정의
        rf = RandomForestClassifier(n_estimators=params['rf_n_estimators'],
                                max_depth=params['rf_max_depth'],
                                min_samples_split=params['rf_min_samples_split'])

        dc = DecisionTreeClassifier(min_samples_split=params['dc_min_samples_split'],
                                    max_depth=params['dc_max_depth'])

        knn = KNeighborsClassifier(n_neighbors=params['knn_n_neighbors'],
                                    leaf_size=params['knn_leaf_size'])

        # 모델 리스트 구성
        models = [lg ,rf, dc, knn]

        # 모델 학습
        for model in models:
            model.fit(x_train, y_train)

        # 모델 예측
        preds_train = [model.predict_proba(x_train) for model in models]
        preds_valid = [model.predict_proba(x_valid) for model in models]
        voting_preds_train = soft_voting(preds_train)
        voting_preds_valid = soft_voting(preds_valid)

        # 모델 평가
        train_accuracy = accuracy_score(y_train, voting_preds_train)
        valid_accuracy = accuracy_score(y_valid, voting_preds_valid)

        val_scores.append(valid_accuracy)

        wandb.log({"train_accuracy": train_accuracy, "valid_accuracy": valid_accuracy})

        metrics = {"cv_metric": np.mean(val_scores)}
        wandb.log(metrics)

In [None]:
count = 20

wandb.agent(sweep_id, function=wandb_training_function, count=count)

### 하이퍼 파라미터 분석

ensemble에 사용할 모델인 KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, LogisticRegression에 대한 hyperparameter 최적화를 진행하였음.
아래는 각 모델별 학습 그래프이다.

<center>
<img src='hyperparameter_img/dc.png'/><br>
<img src='hyperparameter_img/knn.png'/><br>
<img src='hyperparameter_img/lg.png'/><br>
<img src='hyperparameter_img/rf.png'/><br>
<img src='hyperparameter_img/all.png'/><br>

</center>

아래는 0.4224의 정확도를 가진 파라미터 값이다.

KNeighborsClassifier(leat_size = 20, n_neighbors = 4)
DecisionTreeClassifier(max_depth = 20, min_samples_split=2)
RandomForestClassifier(max_depth = 10,min_samples_split = 10, n_estimators = 50)
LogisticRegression(class_weight=None, max_iter=300)

위 파라미터로 soft-vote-ensamble을 진행하였으나 이전보다 더 낮은 acc를 도출하였다.
