## Basic Modules

In [None]:
### Utils
import os
import glob
import re
import warnings
import joblib
import pandas as pd
import numpy as np
import datetime as dt
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

### Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as pyo
from plotly.subplots import make_subplots
from chart_studio.plotly import plot, iplot
plt.style.use('fivethirtyeight')

### ML/DL Models
import tensorflow as tf
from xgboost import XGBRegressor, XGBClassifier
from lightgbm import LGBMRegressor, LGBMClassifier

### Data preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import LabelEncoder


### Install W&B(Experiment managing), WIT(Model analysis) and Load

In [None]:
!pip install wandb -q
!pip install witwidget

In [None]:
import wandb
from witwidget.notebook.visualization import WitConfigBuilder
from witwidget.notebook.visualization import WitWidget

## W & B

In [None]:
wandb.login()

### Basic

In [None]:
# 초기화 (실험관리 시작)
wandb.init(
    project="wandb test",
    config={
        'estimators':99999,
        'learning_rate':0.01
    }
)

# 하이퍼 파라미터 지정
config = wandb.config
params = {'estimators':config.estimators, 'learning_rate':config.learning_rate}

# 모델 학습 및 예측
model = XGBRegressor(params**)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
accuracy = accuracy_func(y_test, y_pred)

# 정확도 로깅
wandb.log({'accuracy':accuracy})
wandb.finish()

### HPO(Hyper Parameter Optimization)

In [None]:
# 하이퍼 파라미터 튜닝할 파라미터 지정
sweep_config = {
    # method: random or grid
    'method':'grid',
    'parameters':{
        'max_depth': {
            'values':[4, 6, 8, 10]
        },
        'min_child_weight':{
            'values':[1, 3, 5]
        }
    }
}

# sweep id 정보 입력
sweep_id = wandb.sweep(project='wandb hpo test', sweep=sweep_config)

# train하는 함수 작성
def train():

    config_defaults = {
        'estimators':99999,
        'learning_rate':0.01,
        'seed':2201
    }

    wandb.init(
        project="wandb sweep",
        config=config_defaults,
        magic=True
    )
    # sweep 하는 동안에는 default 파라미터가 덮어쓰기됨
    config = wandb.config

    # 하이퍼 파라미터 지정
    config = wandb.config
    params = {
        'estimators':config.estimators, 
        'learning_rate':config.learning_rate
    }

    # 모델 학습 및 예측
    model = XGBRegressor(params**)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    accuracy = accuracy_func(y_test, y_pred)

    # 정확도 로깅
    wandb.log({'accuracy':accuracy})

# wandb agent으로 하이퍼 파라미터 튜닝 실행
wandb.agent(sweep_id, function=train)

## WIT(What-If Tool)

In [None]:
# Creates a tf feature spec from the dataframe and columns specified
def create_feature_spec(df, columns=None):
    feature_spec = {}
    if columns == None:
        columns = df.columns.values.tolist()
    for f in columns:
        if df[f].dtype is np.dtype(np.int64):
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.int64)
        elif df[f].dtype is np.dtype(np.float64):
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.float32)
        else:
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.string)
    return feature_spec

In [None]:
feature_spec = create_feature_spec(train)

In [None]:
# regression
config_builder = WitConfigBuilder(np.array(test).tolist(), test.columns.to_list()).set_estimator_and_feature_spec(regressor_model, feature_spec=feature_spec).set_target_feature('targetCol').set_model_type('regression')
# classification
config_builder = WitConfigBuilder(np.array(test).tolist(), test.columns.to_list()).set_estimator_and_feature_spec(classifier_model, feature_spec=feature_spec).set_target_feature('targetCol').set_label_vocab(['value0', 'value1'])

WitWidget(config_builder, height=800)