In [None]:
import pandas as pd
from utils import DATA_DIR

folder = DATA_DIR + "/pro_ensembling"

solo_queue_csv = folder + "/comp-games-with-predictions_solo_queue.csv"
clash_csv = folder + "/comp-games-with-predictions_clash.csv"

solo_queue_df = pd.read_csv(solo_queue_csv)
clash_df = pd.read_csv(clash_csv)

solo_queue_df.head()

clash_df.head()



In [None]:
print(solo_queue_df.columns)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss
from typing import Tuple, Dict
import matplotlib.pyplot as plt


def evaluate_predictions(
    y_true: np.ndarray, y_pred: np.ndarray, y_prob: np.ndarray
) -> Dict[str, float]:
    """Evaluate predictions using multiple metrics"""
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_prob),
        "log_loss": log_loss(y_true, y_prob),
    }


# First, let's create a baseline using just pre-draft chances
def evaluate_baseline(df: pd.DataFrame) -> Dict[str, float]:
    """Evaluate baseline performance using just pre-draft win chances"""
    y_true = df["blue_team_won"].values
    y_prob = df["Blue Win Pre Draft"].values
    y_pred = (y_prob > 0.5).astype(int)

    return evaluate_predictions(y_true, y_pred, y_prob)


# Prepare features for the full model
def prepare_features(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
    df["prediction_diff"] = df["solo_queue_prediction"] - df["clash_prediction"]

    features = [
        "solo_queue_prediction",
        "clash_prediction",
        "prediction_diff",
        "Blue Win Pre Draft",
    ]

    X = df[features].values  # Shape: (n_samples, 4)
    y = df["blue_team_won"].values  # Shape: (n_samples,)

    return X, y


# Prepare data
combined_df = pd.merge(
    solo_queue_df.rename(
        columns={
            "model_prediction": "solo_queue_prediction",
            "model_confidence": "solo_queue_confidence",
        }
    ),
    clash_df[["id", "model_prediction"]].rename(
        columns={"model_prediction": "clash_prediction"}
    ),
    on="id",
    how="inner",
)

# Filter for close games
close_games_df = combined_df[
    (combined_df['Blue Win Pre Draft'] >= 0.35) & 
    (combined_df['Blue Win Pre Draft'] <= 0.65)
]

print(f"Total games: {len(combined_df)}")
print(f"Close games: {len(close_games_df)} ({len(close_games_df)/len(combined_df)*100:.1f}%)")


# Print baseline metrics
print("Baseline (Pre-draft only) Metrics:")
baseline_metrics = evaluate_baseline(combined_df)
for metric, value in baseline_metrics.items():
    print(f"{metric}: {value:.4f}")

# Prepare full model data
X, y = prepare_features(combined_df)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# Train XGBoost model with early stopping
model = XGBClassifier(
    n_estimators=1000,  # More trees, will use early stopping
    learning_rate=0.01,  # Lower learning rate to prevent overfitting
    max_depth=3,
    random_state=42,
    eval_metric=['logloss', 'error'],  # Move eval_metric here
    early_stopping_rounds=50,
)

# Train with evaluation sets
eval_set = [(X_train, y_train), (X_test, y_test)]
model.fit(
    X_train,
    y_train,
    eval_set=eval_set,
    verbose=True
)

# Get predictions
train_preds = model.predict(X_train)
train_probs = model.predict_proba(X_train)[:, 1]
test_preds = model.predict(X_test)
test_probs = model.predict_proba(X_test)[:, 1]

# Evaluate full model
print("\nFull Model Metrics:")
print("Training Set:")
train_metrics = evaluate_predictions(y_train, train_preds, train_probs)
for metric, value in train_metrics.items():
    print(f"{metric}: {value:.4f}")

print("\nTest Set:")
test_metrics = evaluate_predictions(y_test, test_preds, test_probs)
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

# Plot training history
results = model.evals_result()
epochs = len(results["validation_0"]["error"])
x_axis = range(epochs)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(x_axis, results["validation_0"]["error"], label="Train")
plt.plot(x_axis, results["validation_1"]["error"], label="Test")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Classification Error")
plt.title("Model Error vs Epoch")

plt.subplot(1, 2, 2)
plt.plot(x_axis, results["validation_0"]["logloss"], label="Train")
plt.plot(x_axis, results["validation_1"]["logloss"], label="Test")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Log Loss")
plt.title("Log Loss vs Epoch")

plt.tight_layout()
plt.show()

# Feature importance
feature_names = [
    "Solo Queue Prediction",
    "Clash Prediction",
    "Prediction Difference",
    "Pre-Draft Win Chance",
]

print("\nFeature Importances:")
importance_dict = dict(zip(feature_names, model.feature_importances_))
for feature, importance in sorted(
    importance_dict.items(), key=lambda x: x[1], reverse=True
):
    print(f"{feature}: {importance:.4f}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss
from typing import Tuple, Dict


def evaluate_predictions(
    y_true: np.ndarray, y_pred: np.ndarray, y_prob: np.ndarray
) -> Dict[str, float]:
    """Evaluate predictions using multiple metrics"""
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_prob),
        "log_loss": log_loss(y_true, y_prob),
    }


# Filter for close games
combined_df = pd.merge(
    solo_queue_df.rename(
        columns={
            "model_prediction": "solo_queue_prediction",
            "model_confidence": "solo_queue_confidence",
        }
    ),
    clash_df[["id", "model_prediction"]].rename(
        columns={"model_prediction": "clash_prediction"}
    ),
    on="id",
    how="inner",
)

# Filter for close games
close_games_df = combined_df[
    (combined_df["Blue Win Pre Draft"] >= 0.40)
    & (combined_df["Blue Win Pre Draft"] <= 0.60)
]

print(f"Total games: {len(combined_df)}")
print(
    f"Close games: {len(close_games_df)} ({len(close_games_df)/len(combined_df)*100:.1f}%)"
)

# First, evaluate baseline on close games
print("\nBaseline (Pre-draft only) Metrics on Close Games:")
baseline_metrics = evaluate_baseline(close_games_df)
for metric, value in baseline_metrics.items():
    print(f"{metric}: {value:.4f}")


# Prepare features for the full model
def prepare_features(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
    df["prediction_diff"] = df["solo_queue_prediction"] - df["clash_prediction"]

    features = [
        "solo_queue_prediction",
        "clash_prediction",
        "prediction_diff",
        "Blue Win Pre Draft",
    ]

    X = df[features].values
    y = df["blue_team_won"].values

    return X, y


# Prepare full model data
X, y = prepare_features(close_games_df)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# Train XGBoost model
model = XGBClassifier(
    n_estimators=1000,
    learning_rate=0.001,
    max_depth=2,
    random_state=42,
    eval_metric=["logloss", "error"],
)

# Train with evaluation sets
eval_set = [(X_train, y_train), (X_test, y_test)]
model.fit(X_train, y_train, eval_set=eval_set, verbose=True)

# Get predictions
train_preds = model.predict(X_train)
train_probs = model.predict_proba(X_train)[:, 1]
test_preds = model.predict(X_test)
test_probs = model.predict_proba(X_test)[:, 1]

# Print results
print("\nFull Model Metrics on Close Games:")
print("Training Set:")
train_metrics = evaluate_predictions(y_train, train_preds, train_probs)
for metric, value in train_metrics.items():
    print(f"{metric}: {value:.4f}")

print("\nTest Set:")
test_metrics = evaluate_predictions(y_test, test_preds, test_probs)
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

# Feature importance
feature_names = [
    "Solo Queue Prediction",
    "Clash Prediction",
    "Prediction Difference",
    "Pre-Draft Win Chance",
]

print("\nFeature Importances:")
importance_dict = dict(zip(feature_names, model.feature_importances_))
for feature, importance in sorted(
    importance_dict.items(), key=lambda x: x[1], reverse=True
):
    print(f"{feature}: {importance:.4f}")

# Additional analysis of prediction improvements
print("\nDetailed Analysis of Close Games:")
print(
    f"Average pre-draft win chance: {close_games_df['Blue Win Pre Draft'].mean():.4f}"
)
print(f"Actual blue side win rate: {close_games_df['blue_team_won'].mean():.4f}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from typing import Dict, List, Tuple

def prepare_features_for_error_analysis(df: pd.DataFrame) -> Tuple[np.ndarray, Dict[str, LabelEncoder]]:
    """Prepare features including champions and team names"""
    # Create label encoders for categorical variables
    categorical_columns = [
        'region_name', 'blue_team_name', 'red_team_name',
        'blue_top_name', 'blue_jungle_name', 'blue_mid_name', 
        'blue_bot_name', 'blue_support_name',
        'red_top_name', 'red_jungle_name', 'red_mid_name', 
        'red_bot_name', 'red_support_name'
    ]
    
    encoders = {}
    encoded_df = df.copy()
    
    # Encode categorical variables
    for col in categorical_columns:
        encoders[col] = LabelEncoder()
        encoded_df[col] = encoders[col].fit_transform(df[col])
    
    # Select features for the model
    features = [
        'Blue Win Pre Draft', 'Red Win Pre Draft',
        *categorical_columns
    ]
    
    return encoded_df[features].values, encoders

# Prepare the data
X, encoders = prepare_features_for_error_analysis(solo_queue_df)

# Create two target variables
y_error = solo_queue_df['model_error'].values
y_confidence = solo_queue_df['model_confidence'].values

# Split the data
X_train, X_test, y_error_train, y_error_test, y_conf_train, y_conf_test = train_test_split(
    X, y_error, y_confidence, test_size=0.2, random_state=42
)

# Train models for both error and confidence
error_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)

confidence_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)

# Fit models
error_model.fit(X_train, y_error_train)
confidence_model.fit(X_train, y_conf_train)

# Get predictions
error_preds = error_model.predict(X_test)
conf_preds = confidence_model.predict(X_test)

# Print performance metrics
print("Model Error Prediction Performance:")
print(f"MSE: {mean_squared_error(y_error_test, error_preds):.4f}")
print(f"R2 Score: {r2_score(y_error_test, error_preds):.4f}")

print("\nModel Confidence Prediction Performance:")
print(f"MSE: {mean_squared_error(y_conf_test, conf_preds):.4f}")
print(f"R2 Score: {r2_score(y_conf_test, conf_preds):.4f}")

# Get feature names
feature_names = [
    'Blue Win Pre Draft', 'Red Win Pre Draft',
    'Region', 'Blue Team', 'Red Team',
    'Blue Top', 'Blue Jungle', 'Blue Mid', 'Blue Bot', 'Blue Support',
    'Red Top', 'Red Jungle', 'Red Mid', 'Red Bot', 'Red Support'
]

# Print feature importances for both models
print("\nTop 10 Features Contributing to Model Error:")
error_importance = dict(zip(feature_names, error_model.feature_importances_))
for feature, importance in sorted(error_importance.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{feature}: {importance:.4f}")

print("\nTop 10 Features Contributing to Model Confidence:")
conf_importance = dict(zip(feature_names, confidence_model.feature_importances_))
for feature, importance in sorted(conf_importance.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{feature}: {importance:.4f}")

# Additional analysis: Find patterns in high-error games
high_error_threshold = np.percentile(solo_queue_df['model_error'], 75)
high_error_games = solo_queue_df[solo_queue_df['model_error'] > high_error_threshold]

print("\nAnalysis of High-Error Games:")
print(f"Number of high-error games: {len(high_error_games)}")
print("\nMost common teams in high-error games:")
print("Blue side:")
print(high_error_games['blue_team_name'].value_counts().head())
print("\nRed side:")
print(high_error_games['red_team_name'].value_counts().head())

print("\nMost common champions in high-error games:")
for role in ['top', 'jungle', 'mid', 'bot', 'support']:
    print(f"\n{role.capitalize()} lane:")
    blue_champs = high_error_games[f'blue_{role}_name'].value_counts().head(3)
    red_champs = high_error_games[f'red_{role}_name'].value_counts().head(3)
    print("Blue side:", dict(blue_champs))
    print("Red side:", dict(red_champs))

# Calculate average error by region
print("\nAverage model error by region:")
region_errors = solo_queue_df.groupby('region_name')['model_error'].agg(['mean', 'count']).sort_values('mean', ascending=False)
print(region_errors)