In [2]:
import pandas as pd

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from xgboost import XGBClassifier

In [16]:
df = pd.read_csv('../data/processed.csv')

X = df.drop(columns=['PlayerID','EngagementLevel'])
y = df.EngagementLevel

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y,shuffle=True,random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

cat_cols = X.select_dtypes(include='object').columns.to_list()
num_cols = X.select_dtypes(include=['float64','int64']).columns.to_list()

num_pipe = Pipeline([
    ('scl',StandardScaler())
])

cat_pipe = Pipeline([
    ('enc',OneHotEncoder())
])

processor = ColumnTransformer([
        ('num',num_pipe,num_cols),
        ('cat',cat_pipe,cat_cols),
])

xgb = XGBClassifier(objective='multi:softmax',seed=42)

pipeline = Pipeline([
        ('prs',processor),
        ('algo',xgb)
])

# params = {'algo__n_estimators':[100,200,300],'algo__learning_rate':[0.1,0.2,0.3],'algo__max_depth':[2,4,8]}