In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay,classification_report,recall_score,precision_score,f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv('../data/processed.csv')
df.head()

Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,1
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,1
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,2
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,1
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,1


In [3]:
X = df.drop(columns=['PlayerID','EngagementLevel'])
y = df.EngagementLevel

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y,shuffle=True,random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((32027, 11), (8007, 11), (32027,), (8007,))

In [10]:
cat_cols = X.select_dtypes(include='object').columns.to_list()
num_cols = X.select_dtypes(include=['float64','int64']).columns.to_list()

num_pipe = Pipeline([
    ('scl',StandardScaler())
])

cat_pipe = Pipeline([
    ('enc',OneHotEncoder())
])

In [17]:
models = [RandomForestClassifier(random_state=42),XGBClassifier(objective='multi:softmax',seed=42)]

results = []

for model in models:
    processor = ColumnTransformer([
        ('num',num_pipe,num_cols),
        ('cat',cat_pipe,cat_cols),
    ])
    
    pipeline = Pipeline([
        ('prs',processor),
        ('algo',model)
    ])
    
    pipeline.fit(X_train,y_train)
    y_pred = pipeline.predict(X_test)
    
    rec = recall_score(y_test,y_pred,average='weighted')
    pre = precision_score(y_test,y_pred,average='weighted')
    f1 = f1_score(y_test,y_pred,average='weighted')
    
    results.append({
        "Name": model.__class__.__name__,
        "Recall Score": rec,
        "Precision Score": pre,
        "F1 Score": f1
    })
    
pd.DataFrame(results)   





Unnamed: 0,Name,Recall Score,Precision Score,F1 Score
0,RandomForestClassifier,0.898589,0.899685,0.898097
1,XGBClassifier,0.917572,0.917867,0.917294


Berdasarkan evaluasi model, XGBoost menunjukkan performa terbaik dalam mengklasifikasikan level engagement pemain. Model ini berpotensi meningkatkan efektivitas strategi retensi melalui segmentasi yang lebih akurat, sehingga mendukung optimalisasi biaya kampanye serta peningkatan lifetime value pemain.