In [3]:
import os
import numpy as np
import polars as pl
import pandas as pd
from plotly import express as px
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
def load_data(sheet_name: str, sheet_id: str) -> pl.DataFrame:
    """Load specified sheet from Google Drive.

    Args:
        sheet_name (str): Name of the table to be loaded
        sheet_id (str): Google Drive Sheets ID Unique value stored in `.env` file.
        Environment variable is accessible with `dotenv.load_dotenv()` and `os.environ["<YOUR_SHEET_ID>"]`.

    Returns:
        pl.DataFrame: Polars DataFrame loaded from the specified Google Drive sheet.
    """

    # Construct the Google Drive URL using `sheet_id` and `sheet_name`
    url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

    # Load the data from the URL as a Polars DataFrame
    # Fill Null values with `np.nan`
    df = pl.read_csv(
        url,
        columns=["UID", "SUID", "NAME", "CHARACTER", "MAP", "PLACE", "PLAYERS", "DATE"],
    ).fill_nan(np.nan)

    return df


def load_data_pd(sheet_name: str, sheet_id: str) -> pd.DataFrame:
    """Load specified sheet from Google Drive.

    Args:
        sheet_name (str): Name of the table to be loaded
        sheet_id (str): Google Drive Sheets ID Unique value stored in `.env` file.
        Environment variable is accessible with `dotenv.load_dotenv()` and `os.environ["<YOUR_SHEET_ID>"]`.

    Returns:
        pd.DataFrame: Pandas DataFrame loaded from the specified Google Drive sheet.
    """

    # Construct the Google Drive URL using `sheet_id` and `sheet_name`
    url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

    # Load the data from the URL as a Polars DataFrame
    # Fill Null values with `np.nan`
    df = pd.read_csv(
        url,
        usecols=[
            "UID",
            "SUID",
            "NAME",
            "CHARACTER",
            "MAP",
            "PLACE",
            "PLAYERS",
            "DATE",
            "SEASON",
        ],
    ).fillna(np.nan)

    return df

In [5]:
# %%timeit
panda_df = load_data_pd(sheet_name="data", sheet_id=os.environ["SHEET_ID"])

In [6]:
panda_df

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20,0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20,0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20,0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20,0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20,0
...,...,...,...,...,...,...,...,...,...
19374,5652,21,Regan,Toad,Banshee Boardwalk,4,4,2024-05-24,10
19375,5653,21,Cooper,Wario,Royal Raceway,1,4,2024-05-24,10
19376,5653,21,Matt,Toad,Royal Raceway,2,4,2024-05-24,10
19377,5653,21,Regan,Yoshi,Royal Raceway,3,4,2024-05-24,10


In [7]:
mdf = panda_df[panda_df["NAME"] == "Matt"]

In [8]:
panda_df[panda_df["NAME"] == "Cooper"]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20,0
6,2,1,Cooper,Yoshi,Kalimari Desert,1,4,2021-09-20,0
10,3,1,Cooper,Toad,Yoshi Valley,1,4,2021-09-20,0
15,4,1,Cooper,Mario,Wario Stadium,1,4,2021-09-20,0
18,5,1,Cooper,Luigi,Choco Mountain,1,3,2021-09-20,0
...,...,...,...,...,...,...,...,...,...
19360,5649,21,Cooper,Yoshi,Yoshi Valley,1,3,2024-05-24,10
19363,5650,21,Cooper,Luigi,Koopa Troopa Beach,1,4,2024-05-24,10
19367,5651,21,Cooper,Toad,Bowser's Castle,1,4,2024-05-24,10
19371,5652,21,Cooper,D.K.,Banshee Boardwalk,1,4,2024-05-24,10


In [13]:
panda_df["NAME"].str.contains("Cooper").any()

True

In [10]:
panda_df["NAME"].value_counts()

NAME
Blake        3871
Cooper       2956
Connor       2396
Cole         2341
Regan        2137
Matt         1525
Triston       805
Jake          546
Graber        460
Anthony       348
Luke          326
Domingo       238
Colton        230
Chandler      226
Robert        209
Miller        111
Konnor         90
Ben            84
Sam            81
Randy          54
Joey           51
Andrew         45
Caskey         43
Chloe          36
Hughes         35
Kayla          18
Austin         18
Martin         17
Garrett        17
Coop W         16
Mikey          16
Justin         11
Duncan P       10
Mitch           7
Billy           2
Kieran          2
Duncan P        1
Name: count, dtype: int64

In [34]:
pdf = panda_df.copy()

vs_list = []

for uid in pdf["UID"].unique():
    temp = pdf[pdf["UID"] == uid]

    if (
        temp["NAME"].str.contains("Cooper").any()
        & temp["NAME"].str.contains("Matt").any()
    ):

        vs_list.append(temp)

In [35]:
vs_df = pd.concat(objs=vs_list)

vs_df

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
23,7,1,Blake,Yoshi,Banshee Boardwalk,1,4,2021-09-20,0
24,7,1,Connor,Toad,Banshee Boardwalk,4,4,2021-09-20,0
25,7,1,Cooper,Bowser,Banshee Boardwalk,3,4,2021-09-20,0
26,7,1,Matt,Peach,Banshee Boardwalk,2,4,2021-09-20,0
27,8,1,Blake,Yoshi,Toad's Turnpike,2,3,2021-09-20,0
...,...,...,...,...,...,...,...,...,...
19325,5639,20,Blake,Toad,Toad's Turnpike,3,3,2024-05-23,10
19375,5653,21,Cooper,Wario,Royal Raceway,1,4,2024-05-24,10
19376,5653,21,Matt,Toad,Royal Raceway,2,4,2024-05-24,10
19377,5653,21,Regan,Yoshi,Royal Raceway,3,4,2024-05-24,10


In [36]:
vs_df.head(10)

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
23,7,1,Blake,Yoshi,Banshee Boardwalk,1,4,2021-09-20,0
24,7,1,Connor,Toad,Banshee Boardwalk,4,4,2021-09-20,0
25,7,1,Cooper,Bowser,Banshee Boardwalk,3,4,2021-09-20,0
26,7,1,Matt,Peach,Banshee Boardwalk,2,4,2021-09-20,0
27,8,1,Blake,Yoshi,Toad's Turnpike,2,3,2021-09-20,0
28,8,1,Cooper,Bowser,Toad's Turnpike,1,3,2021-09-20,0
29,8,1,Matt,Toad,Toad's Turnpike,3,3,2021-09-20,0
30,9,1,Blake,Yoshi,D.K.'s Jungle,3,3,2021-09-20,0
31,9,1,Cooper,Wario,D.K.'s Jungle,2,3,2021-09-20,0
32,9,1,Matt,Toad,D.K.'s Jungle,1,3,2021-09-20,0


In [50]:
vs_df[vs_df["PLACE"] == 1]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
23,7,1,Blake,Yoshi,Banshee Boardwalk,1,4,2021-09-20,0
28,8,1,Cooper,Bowser,Toad's Turnpike,1,3,2021-09-20,0
32,9,1,Matt,Toad,D.K.'s Jungle,1,3,2021-09-20,0
98,27,3,Cooper,Peach,Koopa Troopa Beach,1,4,2021-09-22,0
103,28,3,Matt,Toad,Royal Raceway,1,4,2021-09-22,0
...,...,...,...,...,...,...,...,...,...
19307,5635,20,Cooper,Peach,Bowser's Castle,1,4,2024-05-23,10
19311,5636,20,Matt,Yoshi,Sherbet Land,1,4,2024-05-23,10
19315,5637,20,Cooper,Luigi,Mario Raceway,1,4,2024-05-23,10
19323,5639,20,Cooper,Mario,Toad's Turnpike,1,3,2024-05-23,10


In [57]:
vs_wins_gb_c = (
    vs_df[(vs_df["PLACE"] == 1) & (vs_df["NAME"] == "Cooper")]
    .groupby(["SEASON"])
    .agg(
        NAME=pd.NamedAgg(column="NAME", aggfunc="first"),
        wins=pd.NamedAgg(column="UID", aggfunc="count"),
    )
    .reset_index()
)

vs_wins_gb_c

Unnamed: 0,SEASON,NAME,wins
0,0,Cooper,30
1,1,Cooper,5
2,2,Cooper,29
3,3,Cooper,16
4,4,Cooper,29
5,5,Cooper,24
6,6,Cooper,31
7,7,Cooper,21
8,8,Cooper,42
9,9,Cooper,33


In [58]:
vs_wins_gb_m = (
    vs_df[(vs_df["PLACE"] == 1) & (vs_df["NAME"] == "Matt")]
    .groupby(["SEASON"])
    .agg(
        NAME=pd.NamedAgg(column="NAME", aggfunc="first"),
        wins=pd.NamedAgg(column="UID", aggfunc="count"),
    )
    .reset_index()
)

In [63]:
vs_gb = (
    vs_df.groupby(["SEASON", "NAME"])
    .agg(
        average_place=pd.NamedAgg(column="PLACE", aggfunc="mean"),
        games_played=pd.NamedAgg(column="UID", aggfunc="count"),
    )
    .reset_index()
)

mvc_df = (
    vs_gb[vs_gb["NAME"].isin(["Cooper", "Matt"])]
    .sort_values(by=["SEASON", "average_place"], ascending=[True, True])
    .reset_index(drop=True)
)

mvc_df

Unnamed: 0,SEASON,NAME,average_place,games_played
0,0,Matt,1.817204,93
1,0,Cooper,2.139785,93
2,1,Matt,1.965517,29
3,1,Cooper,2.346154,26
4,2,Matt,1.974684,79
5,2,Cooper,2.074074,81
6,3,Matt,1.943396,53
7,3,Cooper,2.12963,54
8,4,Cooper,1.745763,59
9,4,Matt,2.050847,59


In [60]:
c_merge = pd.merge(
    mvc_df, vs_wins_gb_c, on=["NAME", "SEASON"], how="inner", validate="1:1"
)

c_merge

Unnamed: 0,NAME,SEASON,average_place,games_played,wins
0,Cooper,0,2.139785,93,30
1,Cooper,1,2.346154,26,5
2,Cooper,2,2.074074,81,29
3,Cooper,3,2.12963,54,16
4,Cooper,4,1.745763,59,29
5,Cooper,5,1.985507,69,24
6,Cooper,6,2.27451,102,31
7,Cooper,7,2.305556,72,21
8,Cooper,8,1.913043,92,42
9,Cooper,9,2.044776,67,33


In [61]:
m_merge = pd.merge(
    mvc_df, vs_wins_gb_m, on=["NAME", "SEASON"], how="inner", validate="1:1"
)

m_merge

Unnamed: 0,NAME,SEASON,average_place,games_played,wins
0,Matt,0,1.817204,93,45
1,Matt,1,1.965517,29,10
2,Matt,2,1.974684,79,33
3,Matt,3,1.943396,53,24
4,Matt,4,2.050847,59,19
5,Matt,5,1.884058,69,32
6,Matt,6,2.039216,102,40
7,Matt,7,2.097222,72,29
8,Matt,8,2.108696,92,31
9,Matt,9,2.328358,67,16


In [62]:
cat_cat_df = (
    pd.concat(objs=[c_merge, m_merge])
    .sort_values(by=["SEASON", "average_place"], ascending=[True, True])
    .reset_index(drop=True)
)

cat_cat_df

Unnamed: 0,NAME,SEASON,average_place,games_played,wins
0,Matt,0,1.817204,93,45
1,Cooper,0,2.139785,93,30
2,Matt,1,1.965517,29,10
3,Cooper,1,2.346154,26,5
4,Matt,2,1.974684,79,33
5,Cooper,2,2.074074,81,29
6,Matt,3,1.943396,53,24
7,Cooper,3,2.12963,54,16
8,Cooper,4,1.745763,59,29
9,Matt,4,2.050847,59,19


# MARK

In [68]:
panda_df_c_wins = (
    panda_df[(panda_df["PLACE"] == 1) & (panda_df["NAME"] == "Cooper")]
    .groupby(["SEASON"])
    .agg(
        NAME=pd.NamedAgg(column="NAME", aggfunc="first"),
        total_wins=pd.NamedAgg(column="NAME", aggfunc="count"),
    )
    .reset_index()
)

panda_df_c_wins

Unnamed: 0,SEASON,NAME,total_wins
0,0,Cooper,188
1,1,Cooper,32
2,2,Cooper,118
3,3,Cooper,90
4,4,Cooper,138
5,5,Cooper,115
6,6,Cooper,133
7,7,Cooper,173
8,8,Cooper,195
9,9,Cooper,200


In [67]:
panda_df_m_wins = (
    panda_df[(panda_df["PLACE"] == 1) & (panda_df["NAME"] == "Matt")]
    .groupby(["SEASON"])
    .agg(
        NAME=pd.NamedAgg(column="NAME", aggfunc="first"),
        total_wins=pd.NamedAgg(column="UID", aggfunc="count"),
    )
    .reset_index()
)

panda_df_m_wins

Unnamed: 0,SEASON,NAME,total_wins
0,0,Matt,98
1,1,Matt,88
2,2,Matt,62
3,3,Matt,67
4,4,Matt,45
5,5,Matt,64
6,6,Matt,69
7,7,Matt,47
8,8,Matt,60
9,9,Matt,23
