In [20]:
import os
import pandas as pd
from dotenv import load_dotenv

from mktools.get_data import load_data_pd
from mktools.stats import calculate_npi, calculate_all_stats

load_dotenv()

# Load Data from the Google Sheet
df = load_data_pd(
    sheet_name="data",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

In [16]:
# Calculate the overall NPI number for each player per season per game type
npi_df = calculate_npi(initial_df=df)

npi_df

Unnamed: 0,NAME,PLACE,PLAYERS,SEASON,COUNT,TOTAL_PLAYED,PERCENT,NPI_INTERMEDIATE,NPI
0,Cooper,1,2,0,18,24,75.000000,0.750000,1.250000
1,Cooper,2,2,0,6,24,25.000000,0.500000,1.250000
2,Connor,1,2,0,21,31,67.741935,0.677419,1.322581
3,Connor,2,2,0,10,31,32.258065,0.645161,1.322581
4,Regan,1,2,0,7,11,63.636364,0.636364,1.363636
...,...,...,...,...,...,...,...,...,...
1362,Graber,4,4,10,6,7,85.714286,3.428571,3.857143
1363,Graber,3,4,10,1,7,14.285714,0.428571,3.857143
1364,Randy,4,4,10,8,9,88.888889,3.555556,3.888889
1365,Randy,3,4,10,1,9,11.111111,0.333333,3.888889


In [21]:
# Define the column name mappings for each possible place
place_dict = {1: "WINS", 2: "2NDS", 3: "3RDS", 4: "4THS"}

# Calculate the total stats values for each player, season and game type
all_stats_df = calculate_all_stats(
    initial_df=npi_df,
    place_mapping_ref=place_dict,
    wins_column=place_dict[1],
    seconds_column=place_dict[2],
    thirds_column=place_dict[3],
    fourths_column=place_dict[4],
)

In [30]:
def calculate_all_win_rates(stats_df: pd.DataFrame) -> pd.DataFrame:

    sdf = stats_df.copy()

    all_wr_df = (
        sdf.groupby(by=["NAME", "PLAYERS"])
        .agg(
            TOTAL_WINS=pd.NamedAgg("WINS", "sum"),
            TOTAL_GAMES_PLAYED=pd.NamedAgg("TOTAL_PLAYED", "sum"),
            OVERALL_WIN_RATE=pd.NamedAgg("WINS_PERCENTAGE", "mean"),
            OVERALL_2NDS=pd.NamedAgg("2NDS", "sum"),
            OVERALL_3RDS=pd.NamedAgg("3RDS", "sum"),
            OVERALL_4THS=pd.NamedAgg("4THS", "sum"),
            OVERALL_2NDS_RATE=pd.NamedAgg("2NDS_PERCENTAGE", "mean"),
            OVERALL_3RDS_RATE=pd.NamedAgg("3RDS_PERCENTAGE", "mean"),
            OVERALL_4THS_RATE=pd.NamedAgg("4THS_PERCENTAGE", "mean"),
        )
        .reset_index()
        .sort_values(
            by=[
                "PLAYERS",
                "OVERALL_WIN_RATE",
            ],
            ascending=[False, False],
        )
        .reset_index(drop=True)
    )

    return all_wr_df

Unnamed: 0,NAME,PLAYERS,TOTAL_WINS,TOTAL_GAMES_PLAYED,OVERALL_WIN_RATE,OVERALL_2NDS,OVERALL_3RDS,OVERALL_4THS,OVERALL_2NDS_RATE,OVERALL_3RDS_RATE,OVERALL_4THS_RATE
0,Cooper,4,811.0,1771,44.273049,451.0,301.0,208.0,26.617169,17.444565,11.665218
1,Matt,4,428.0,1045,40.576899,278.0,224.0,115.0,25.679362,22.166837,11.576901
2,Regan,4,440.0,1345,31.819209,382.0,336.0,187.0,28.768469,25.363452,14.048869
3,Chandler,4,40.0,141,31.694246,38.0,37.0,26.0,32.801472,21.616909,13.887373
4,Luke,4,65.0,244,28.824472,74.0,60.0,45.0,31.341625,22.641234,17.192669
...,...,...,...,...,...,...,...,...,...,...,...
87,Domingo,2,0.0,4,0.000000,4.0,0.0,0.0,100.000000,0.000000,0.000000
88,Garrett,2,0.0,2,0.000000,2.0,0.0,0.0,100.000000,0.000000,0.000000
89,Randy,2,0.0,1,0.000000,0.0,0.0,1.0,0.000000,0.000000,100.000000
90,Robert,2,0.0,1,0.000000,1.0,0.0,0.0,100.000000,0.000000,0.000000


In [31]:
all_wr_df[all_wr_df["TOTAL_GAMES_PLAYED"] == all_wr_df["TOTAL_GAMES_PLAYED"].max()]

Unnamed: 0,NAME,PLAYERS,TOTAL_WINS,TOTAL_GAMES_PLAYED,OVERALL_WIN_RATE,OVERALL_2NDS,OVERALL_3RDS,OVERALL_4THS,OVERALL_2NDS_RATE,OVERALL_3RDS_RATE,OVERALL_4THS_RATE
7,Blake,4,467.0,2188,21.175623,655.0,630.0,436.0,30.240596,28.688485,19.895297


In [34]:
all_wr_df[all_wr_df["NAME"] == "Blake"]["TOTAL_GAMES_PLAYED"].sum()

3918

In [36]:
all_wr_df[all_wr_df["NAME"] == "Martin"]

Unnamed: 0,NAME,PLAYERS,TOTAL_WINS,TOTAL_GAMES_PLAYED,OVERALL_WIN_RATE,OVERALL_2NDS,OVERALL_3RDS,OVERALL_4THS,OVERALL_2NDS_RATE,OVERALL_3RDS_RATE,OVERALL_4THS_RATE
12,Martin,4,1.0,16,8.333333,2.0,6.0,7.0,12.5,43.055556,36.111111
65,Martin,3,0.0,3,0.0,0.0,3.0,0.0,0.0,100.0,0.0


In [35]:
all_wr_df[all_wr_df["NAME"] == "Cole"]["TOTAL_GAMES_PLAYED"].sum()

2369

In [27]:
all_wr_df[all_wr_df["NAME"] == "Graber"]

Unnamed: 0,NAME,PLAYERS,TOTAL_WINS,TOTAL_GAMES_PLAYED,OVERALL_WIN_RATE,OVERALL_2NDS_RATE,OVERALL_3RDS_RATE,OVERALL_4THS_RATE
10,Graber,4,48.0,309,11.442677,26.833027,26.075643,35.648653
49,Graber,3,16.0,131,7.535937,45.658727,46.805336,0.0
84,Graber,2,5.0,28,9.010989,89.89011,1.098901,0.0


In [28]:
all_wr_df[all_wr_df["NAME"] == "Domingo"]

Unnamed: 0,NAME,PLAYERS,TOTAL_WINS,TOTAL_GAMES_PLAYED,OVERALL_WIN_RATE,OVERALL_2NDS_RATE,OVERALL_3RDS_RATE,OVERALL_4THS_RATE
25,Domingo,4,0.0,200,0.0,2.058233,18.988746,78.953021
60,Domingo,3,0.0,54,0.0,17.083333,82.916667,0.0
87,Domingo,2,0.0,4,0.0,100.0,0.0,0.0


In [17]:
first_place_df = npi_df[npi_df["PLACE"] == 1].copy()

In [18]:
df[df["PLAYERS"] == 1]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [19]:
first_place_df.groupby(by=["NAME", "PLAYERS"]).agg(
    OVERALL_WIN_RATE=pd.NamedAgg("PERCENT", "mean"),
    TOTAL_GAMES_PLAYED=pd.NamedAgg("TOTAL_PLAYED", "sum"),
).reset_index().sort_values(
    by=[
        "PLAYERS",
        "OVERALL_WIN_RATE",
    ],
    ascending=[False, False],
).reset_index(
    drop=True
)

Unnamed: 0,NAME,PLAYERS,OVERALL_WIN_RATE,TOTAL_GAMES_PLAYED
0,Martin,4,50.0,2
1,Cooper,4,44.273049,1771
2,Matt,4,40.576899,1045
3,Chandler,4,38.737412,138
4,Konnor,4,34.521886,49
5,Regan,4,31.819209,1345
6,Luke,4,31.706919,231
7,Cole,4,27.185923,1415
8,Connor,4,21.454947,1472
9,Blake,4,21.175623,2188
