In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import xgboost as xgb
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import f1_score
from xgboost import plot_importance


%matplotlib inline

In [4]:
gm_10 = pd.read_csv("/content/drive/MyDrive/YSAL/2023-1/LOL/GM10_fix.csv")
gm_15 = pd.read_csv("/content/drive/MyDrive/YSAL/2023-1/LOL/GM15_fix.csv")

In [5]:
drop_gm10 = gm_10.drop(['Unnamed: 0', 'gameId','redWins',
                   "blueFirstTowerLane",'blueDragonType','redFirstTowerLane','redDragonType'], axis=1)
drop_gm15 = gm_15.drop(['Unnamed: 0', 'gameId','redWins',
                   "blueFirstTowerLane",'blueDragonType','redFirstTowerLane','redDragonType'], axis=1)

In [7]:
def train_test(df):
  y_train = df['blueWins']
  x_train = df.drop(['blueWins'], axis=1)
  return x_train, y_train

In [8]:
x_train, y_train = train_test(drop_gm15)

val_size=0.3

x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size = val_size, random_state=42)

# Xgboost

In [9]:
pip install bayesian-optimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.2-py3-none-any.whl (17 kB)
Collecting colorama>=0.4.6
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.4.2 colorama-0.4.6


In [10]:
from bayes_opt import BayesianOptimization

In [11]:
# 탐색 대상 함수 (XGBRegressor)
def XGB_cv(max_depth,learning_rate, n_estimators, gamma
            ,min_child_weight, subsample
            ,colsample_bytree, silent=True, nthread=-1):

    # 모델 정의
    model = xgb.XGBClassifier(max_depth=int(max_depth),
                            learning_rate=learning_rate,
                            n_estimators=int(n_estimators),
                            gamma=gamma,
                            min_child_weight=min_child_weight,
                            subsample=subsample,
                            colsample_bytree=colsample_bytree, 
                            nthread=nthread
                            )
    # 모델 훈련
    model.fit(x_train, y_train)

    # 예측값 출력
    y_pred= model.predict(x_valid)

    return f1_score(y_valid, y_pred)

In [20]:
pbounds = {'max_depth': (3, 7),
                'learning_rate': (0.01, 0.3),
                'n_estimators': (20,100),
                'gamma': (0, 100),
                'min_child_weight': (0, 3),
                'subsample': (0.5, 1),
                'colsample_bytree' :(0.2, 1)
                }

xg_bo=BayesianOptimization(f=XGB_cv, pbounds=pbounds, verbose=2, random_state=1 )    

xg_bo.maximize(init_points=2, n_iter=100)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | n_esti... | subsample |
-------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m0.6317   [0m | [0m0.5336   [0m | [0m72.03    [0m | [0m0.01003  [0m | [0m4.209    [0m | [0m0.4403   [0m | [0m27.39    [0m | [0m0.5931   [0m |
| [95m2        [0m | [95m0.6855   [0m | [95m0.4764   [0m | [95m39.68    [0m | [95m0.1663   [0m | [95m4.677    [0m | [95m2.056    [0m | [95m36.36    [0m | [95m0.9391   [0m |
| [0m3        [0m | [0m0.6409   [0m | [0m0.2046   [0m | [0m40.19    [0m | [0m0.1041   [0m | [0m3.836    [0m | [0m2.294    [0m | [0m37.26    [0m | [0m0.6836   [0m |
| [95m4        [0m | [95m0.6938   [0m | [95m0.8121   [0m | [95m10.95    [0m | [95m0.1614   [0m | [95m6.073    [0m | [95m2.061    [0m | [95m30.64    [0m | [95m0.601    [0m |
| [0m5        [0m | [0m0.6778  

# Random Forest

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
def bo_params_rf(max_samples,n_estimators,max_features, max_depth):
    
    params = {
        'max_samples': max_samples,
        'max_features':max_features,
        'max_depth':int(max_depth),
        'n_estimators':int(n_estimators)
    }
    clf = RandomForestClassifier(**params)
    
    clf.fit(x_train, y_train)

    # 예측값 출력
    y_pred= clf.predict(x_valid)

    return f1_score(y_valid, y_pred)

In [15]:
pbounds = {'max_samples':(0.5,1),
        'max_features':(0.5,1),
        'max_depth' : (3, 7),
        'n_estimators':(10,200)}

rf_bo = BayesianOptimization(bo_params_rf, pbounds=pbounds, verbose=2, random_state=1)
rf_bo.maximize(init_points=2, n_iter=100)

|   iter    |  target   | max_depth | max_fe... | max_sa... | n_esti... |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.7806   [0m | [0m4.668    [0m | [0m0.8602   [0m | [0m0.5001   [0m | [0m67.44    [0m |
| [0m2        [0m | [0m0.7687   [0m | [0m3.587    [0m | [0m0.5462   [0m | [0m0.5931   [0m | [0m75.66    [0m |
| [0m3        [0m | [0m0.7781   [0m | [0m4.922    [0m | [0m0.756    [0m | [0m0.6562   [0m | [0m67.59    [0m |
| [0m4        [0m | [0m0.7689   [0m | [0m3.523    [0m | [0m0.7864   [0m | [0m0.6083   [0m | [0m67.12    [0m |
| [0m5        [0m | [0m0.7668   [0m | [0m4.949    [0m | [0m0.7672   [0m | [0m0.7738   [0m | [0m67.53    [0m |
| [0m6        [0m | [0m0.7725   [0m | [0m3.085    [0m | [0m0.9039   [0m | [0m0.9824   [0m | [0m143.4    [0m |
| [0m7        [0m | [0m0.7732   [0m | [0m4.458    [0m | [0m0.5112   [0m | [0m0.5762   [0m | [0m194.6    [0m 

In [21]:
print(xg_bo.max)
print(rf_bo.max)

{'target': 0.6978967495219885, 'params': {'colsample_bytree': 0.3559933667008637, 'gamma': 8.834445524484702, 'learning_rate': 0.2553193067332952, 'max_depth': 5.272480565844037, 'min_child_weight': 1.721934014761535, 'n_estimators': 61.74875769041427, 'subsample': 0.9674259123440412}}
{'target': 0.7818012999071494, 'params': {'max_depth': 6.722808724081828, 'max_features': 0.9562452054707964, 'max_samples': 0.8774713674032177, 'n_estimators': 140.97627307723155}}


# 다른 데이터에 적용

In [17]:
x_train, y_train = train_test(drop_gm10)

val_size=0.3

x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size = val_size, random_state=42)

xgboost

In [23]:
params = {'colsample_bytree': 0.3559933667008637, 'gamma': 8.834445524484702, 'learning_rate': 0.2553193067332952, 'max_depth': 5, 'min_child_weight': 1.721934014761535, 'n_estimators': 62, 'subsample': 0.9674259123440412}

model = xgb.XGBClassifier(**params)
model.fit(x_train, y_train)
valid_pred = model.predict(x_valid)
print("F1 score: %.2f%%" % (f1_score(y_valid, valid_pred) * 100.0))
print("Accuracy: %.2f%%" % (accuracy_score(y_valid, valid_pred) * 100.0))

F1 score: 69.79%
Accuracy: 72.62%


Rf

In [24]:
params = {'max_depth': 7, 'max_features': 0.9562452054707964, 'max_samples': 0.8774713674032177, 'n_estimators': 141}

clf = RandomForestClassifier(**params)
model.fit(x_train, y_train)
valid_pred = model.predict(x_valid)
print("F1 sore: %.2f%%" % (f1_score(y_valid, valid_pred) * 100.0))
print("Accuracy: %.2f%%" % (accuracy_score(y_valid, valid_pred) * 100.0))

F1 sore: 69.79%
Accuracy: 72.62%
