In [108]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

In [109]:
df = pd.read_csv('all_match.csv')

In [110]:
df['win'] = df['win'].replace({False: 0, True: 1})

In [111]:
y = df['win']
X = df.drop(['win', 'match_id'], axis=1)

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [113]:
best_rf_model = RandomForestClassifier(random_state=42)
best_rf_model.fit(X_train, y_train)

In [114]:
y_rf_pred = best_rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_rf_pred)
print(f'Random Forest Accuracy: {accuracy_rf}')

Random Forest Accuracy: 0.9914263445050663


In [115]:
best_gb_model = GradientBoostingClassifier(random_state=42)
best_gb_model.fit(X_train, y_train)

In [116]:
y_gb_pred = best_gb_model.predict(X_test)
accuracy_gb = accuracy_score(y_test, y_gb_pred)
print(f'Gradient Boosting Accuracy: {accuracy_gb}')

Gradient Boosting Accuracy: 0.950896336710834


In [118]:
base_models = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]
meta_model = LogisticRegression()
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

In [119]:
stacking_model.fit(X_train, y_train)

In [120]:
y_pred_stacking = stacking_model.predict(X_test)
y_pred_proba_stacking = stacking_model.predict_proba(X_test)

In [121]:
y_pred_proba_stacking

array([[9.99109423e-01, 8.90577218e-04],
       [9.94160066e-01, 5.83993440e-03],
       [5.28059607e-02, 9.47194039e-01],
       ...,
       [2.12834672e-02, 9.78716533e-01],
       [6.77518320e-04, 9.99322482e-01],
       [1.24641895e-03, 9.98753581e-01]])

In [122]:
from sklearn.metrics import roc_auc_score
auc_roc_stacking = roc_auc_score(y_test, y_pred_proba_stacking[:, 1])
print(f'Stacking Model AUC-ROC: {auc_roc_stacking}')

Stacking Model AUC-ROC: 0.9998955093312597


In [126]:
testt = pd.DataFrame([{'radian_team': 8291895, 'dire_team': 8255888}], columns=X_train.columns).fillna(False)

In [127]:
testt

Unnamed: 0,radian_team,dire_team
0,8291895,8255888


In [128]:
stacking_model.predict_proba(testt)

array([[0.16586645, 0.83413355]])

In [None]:
array([[0.98941756, 0.01058244]])