In [52]:
# Eternal Return - Improved Team Win Rate Prediction Model

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [53]:
print("Loading data...")
df = pd.read_csv('players_data.csv')  # Replace with your actual CSV file name
print("Initial DataFrame shape:", df.shape)

Loading data...
Initial DataFrame shape: (311265, 22)


In [54]:
print("Grouping data into teams...")
team_df = df.groupby(['gameId', 'gameRank']).agg({
    'character': lambda x: list(x),
    'weapon': lambda x: list(x),
    'itemWeapon': lambda x: list(x),
    'itemChest': lambda x: list(x),
    'itemHead': lambda x: list(x),
    'itemArm': lambda x: list(x),
    'itemLeg': lambda x: list(x),
    'TeamKill': 'sum',
    'Kill': 'sum',
    'Death': 'sum',
    'Assist': 'sum',
    'Dmg_Player': 'sum',
    'Dmg_Monster': 'sum'
}).reset_index()
print("Grouped DataFrame shape:", team_df.shape)

Grouping data into teams...
Grouped DataFrame shape: (89375, 15)


In [55]:
print("Filtering for complete teams (3 players)...")
team_df = team_df[team_df['character'].apply(len) == 3].copy()
print("Filtered DataFrame shape:", team_df.shape)

Filtering for complete teams (3 players)...
Filtered DataFrame shape: (70458, 15)


In [56]:
print("Creating regression target and character features...")
team_df['win_rate_proxy'] = 1 / team_df['gameRank']
team_df[['char_1', 'char_2', 'char_3']] = pd.DataFrame(team_df['character'].tolist(), index=team_df.index)

Creating regression target and character features...


In [57]:
# Drop original list-based columns
team_df.drop(columns=['character', 'weapon', 'itemWeapon', 'itemChest', 'itemHead', 'itemArm', 'itemLeg'], inplace=True)
print("Final processed team_df shape:", team_df.shape)


Final processed team_df shape: (70458, 12)


In [58]:
cat_features = ['char_1', 'char_2', 'char_3']
num_features = ['TeamKill', 'Kill', 'Death', 'Assist', 'Dmg_Player', 'Dmg_Monster']
X = team_df[cat_features + num_features]
y = team_df['win_rate_proxy']

In [59]:
print("Setting up preprocessing pipeline...")
preprocessor = ColumnTransformer(transformers=[
    ('num', SimpleImputer(strategy='mean'), num_features),
    ('cat', Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ]), cat_features)
])


Setting up preprocessing pipeline...


In [44]:
# Input features & target
X = team_df[cat_features + num_features]
y = team_df['win_rate_proxy']

In [60]:
print("Splitting data and training Random Forest model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42))
])


Splitting data and training Random Forest model...


In [None]:
#train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [61]:
model.fit(X_train, y_train)

In [65]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\nModel Evaluation:")
print(f"RMSE: {rmse:.4f}")
print(f"R^2 Score: {r2:.4f}")


Model Evaluation:
RMSE: 0.1456
R^2 Score: 0.7312


In [None]:
def predict_win_rate_with_stats():
    """
    Prompts user for 3 character names and their stats to predict win rate.
    Requires input of team stats (Kills, Deaths, Damage, etc.).
    """
    print("Enter the names of 3 characters in your team.")
    
    # Input characters
    char_1 = input("Character 1: ").strip()
    char_2 = input("Character 2: ").strip()
    char_3 = input("Character 3: ").strip()

    # Input team stats
    print("\nEnter the stats for the team.")
    team_kills = int(input("Total Team Kills: "))
    team_deaths = int(input("Total Team Deaths: "))
    team_assists = int(input("Total Team Assists: "))
    team_damage_player = float(input("Total Team Damage to Players: "))
    team_damage_monster = float(input("Total Team Damage to Monsters: "))

    # Build input DataFrame with both characters and stats
    input_data = pd.DataFrame([{
        'char_1': char_1,
        'char_2': char_2,
        'char_3': char_3,
        'TeamKill': team_kills,
        'Kill': team_kills,  # For now, assume team kills = individual kills
        'Death': team_deaths,
        'Assist': team_assists,
        'Dmg_Player': team_damage_player,
        'Dmg_Monster': team_damage_monster
    }])

    # Model prediction
    prediction = model.predict(input_data)[0]
    predicted_rank = 1 / prediction if prediction != 0 else float('inf')

    print(f"\nPredicted Win Rate Proxy (1/gameRank): {prediction:.4f}")
    print(f"≈ Expected Game Rank: {predicted_rank:.2f}")
    return prediction


In [70]:
predict_win_rate_with_stats()


Enter the names of 3 characters in your team.

Enter the stats for the team.


ValueError: invalid literal for int() with base 10: ''