In [2]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import StandardScaler, LabelEncoder

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv('../Data/LCK_Tournament.csv')

In [4]:
#Drop columns not important
df = df.drop(['year', 'league', 'datacompleteness', 'split', 'playoffs', 'game', 'patch',
              'side', 'position', 'teamid'], axis=1)

Feature Engineering

In [5]:
# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Combine all multi kills features into one
# Define the list of multi kills features to combine
multi_kills_features = ["doublekills", "triplekills", "quadrakills", "pentakills"]

# Initialize a new column to hold the combined multi kills
df["multikills"] = 0

# Iterate through each multi kills feature and add its value to the combined column
for feature in multi_kills_features:
    df["multikills"] += df[feature]

# Drop the individual multi kills features as they are now combined
df = df.drop(multi_kills_features, axis=1)


In [7]:
def calculate_team_champion_stats(data):
    """Calculate overall champion statistics for each team regardless of pick order"""
    team_champ_stats = {}
    
    # Get all pick columns
    pick_cols = [f"pick{i}" for i in range(1, 6)]
    
    # Calculate stats for each team-champion combination
    for team in data["teamname"].unique():
        team_data = data[data["teamname"] == team]
        champion_stats = {}
        
        # Combine all picks to get total champion usage
        for pick_col in pick_cols:
            for champ in team_data[pick_col].unique():
                if champ not in champion_stats:
                    champion_stats[champ] = {"games": 0, "wins": 0}
                
                champ_games = team_data[team_data[pick_col] == champ]
                champion_stats[champ]["games"] += len(champ_games)
                champion_stats[champ]["wins"] += champ_games["result"].sum()
        
        team_champ_stats[team] = champion_stats
    
    return team_champ_stats

# Calculate team-champion statistics
team_champion_stats = calculate_team_champion_stats(df)

# Add statistics columns for each pick
for i in range(1, 6):
    pick_col = f"pick{i}"
    winrate_col = f"winrate_{pick_col}"
    count_col = f"count_{pick_col}"
    
    def get_champ_stats(row):
        team_stats = team_champion_stats.get(row["teamname"], {})
        champ_stats = team_stats.get(row[pick_col], {"games": 0, "wins": 0})
        
        games = champ_stats["games"]
        winrate = champ_stats["wins"] / games if games > 0 else 0
        
        return pd.Series({
            winrate_col: winrate,
            count_col: games
        })
    
    df[[ winrate_col, count_col ]] = df.apply(get_champ_stats, axis=1)

# Display example of processed data
print("\nExample of team-champion statistics:")
print(df[["teamname", 
          "pick1", "winrate_pick1", "count_pick1",
          "pick2", "winrate_pick2", "count_pick2"]].head())

# Save processed data
df.to_csv("../Data/processed_for_prediction.csv", index=False)



Example of team-champion statistics:
            teamname   pick1  winrate_pick1  count_pick1     pick2  \
0                DRX  Lucian       0.222222          9.0   Orianna   
1  Nongshim RedForce   Milio       0.250000          8.0      Azir   
2                DRX  Lucian       0.222222          9.0      Nami   
3  Nongshim RedForce   Milio       0.250000          8.0        Vi   
4              Gen.G   Corki       0.843750         32.0  Aphelios   

   winrate_pick2  count_pick2  
0       0.000000          4.0  
1       0.176471         17.0  
2       0.200000          5.0  
3       0.444444          9.0  
4       0.750000         12.0  


In [3]:
import pandas as pd
df = pd.read_csv('../Data/processed_for_prediction.csv')