In [30]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def get_top_players_by_year(year: int, data_folder: str = "data") -> dict:
    """
    Loads the VCT dataset for a given year and returns the top 10 players
    per tournament based on performance metrics.

    Args:
        year (int): Year to analyze (2021–2025)
        data_folder (str): Folder where yearly CSVs are stored

    Returns:
        dict: {tournament_name: top_10_players DataFrame}
    """
    # Load the CSV file for the year
    file_path = f"{data_folder}/{year}.csv"
    df = pd.read_csv(file_path)
    df['Year'] = year  # Add Year column if needed

    # Clean percentage columns
    percent_columns = ['Headshot %', 'Clutch Success %', 'Kill, Assist, Trade, Survive %']
    for col in percent_columns:
        if col in df.columns:
            df[col] = df[col].apply(lambda x: str(x).rstrip('%') if isinstance(x, str) else x)
            df[col] = pd.to_numeric(df[col], errors='coerce') / 100

    # Recalculate K:D Ratio
    df['K:D Ratio'] = df['Kills'] / df['Deaths']

    # Metrics to include
    extended_metrics = [
        'Kill, Assist, Trade, Survive %',
        'Average Combat Score',
        'Clutch Success %',
        'Average Damage Per Round',
        'K:D Ratio',
        'Headshot %',
        'First Kills'
    ]

    top_10_by_tournament = {}

    for tournament in df['Tournament'].dropna().unique():
        tournament_df = df[df['Tournament'] == tournament].copy()

        # Filter players with at least 50 rounds
        tournament_df = tournament_df[tournament_df['Rounds Played'] >= 50]

        # Drop rows with missing values in key metrics
        tournament_df = tournament_df.dropna(subset=extended_metrics)

        if tournament_df.empty:
            continue

        # Group by player and average metrics
        grouped = tournament_df.groupby('Player')[extended_metrics].mean().reset_index()

        # Normalize
        scaler = MinMaxScaler()
        grouped_scaled = grouped.copy()
        grouped_scaled[extended_metrics] = scaler.fit_transform(grouped[extended_metrics])

        # Composite score
        grouped_scaled['Composite Score'] = grouped_scaled[extended_metrics].mean(axis=1)

        # Top 10
        top_10 = grouped_scaled.sort_values('Composite Score', ascending=False).head(10).reset_index(drop=True)
        top_10_by_tournament[tournament] = top_10

    return top_10_by_tournament


In [41]:
top_2024 = get_top_players_by_year(2024)

# View top 10 players from a specific tournament
top_2024["Valorant Champions 2024"]


Unnamed: 0,Player,"Kill, Assist, Trade, Survive %",Average Combat Score,Clutch Success %,Average Damage Per Round,K:D Ratio,Headshot %,First Kills,Composite Score
0,Derke,0.9,1.0,0.612745,0.982665,1.0,0.414986,0.829508,0.819986
1,Meteor,0.727273,0.896527,0.400327,0.919104,0.987276,1.0,0.372678,0.757598
2,zekken,0.681818,0.932976,0.546218,0.881443,0.755183,0.337176,0.875878,0.715813
3,Chronicle,0.845455,0.802977,0.328431,0.891658,0.90017,0.56196,0.584262,0.70213
4,primmie,0.863636,0.854004,0.318627,0.774648,0.935992,0.942363,0.180328,0.695657
5,ZmjjKK,0.665289,0.91199,0.276292,0.870186,0.792448,0.341106,1.0,0.693902
6,CHICHOO,0.990909,0.714387,0.42402,0.702059,0.878244,0.596542,0.411148,0.673901
7,RieNs,0.891608,0.618492,0.465686,0.669639,0.722255,0.975615,0.338462,0.668822
8,keznit,0.845455,0.953225,0.348039,1.0,0.703494,0.354467,0.353443,0.65116
9,aspas,0.823232,0.726435,0.351307,0.74696,0.770978,0.510086,0.621129,0.650018
