In [2]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
from multielo import MultiElo, Player, Tracker
from mktools.get_data import load_data_pd
from alive_progress import alive_it
from mktools.validate_data import validate_bad_uids
import plotly.express as px
from bs4 import BeautifulSoup

load_dotenv()

True

In [3]:
df = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

df["DATE"] = pd.to_datetime(df["DATE"])

invalid, valid = validate_bad_uids(df=df, return_valid=True)

In [4]:
valid

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20 00:00:00,0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20 00:00:00,0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20 00:00:00,0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20 00:00:00,0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20 00:00:00,0
...,...,...,...,...,...,...,...,...,...
20933,6373,742,Cooper,Wario,Kalimari Desert,3,4,2024-07-22 23:28:37,11
20934,6373,742,Blake,Yoshi,Kalimari Desert,4,4,2024-07-22 23:28:37,11
20935,6374,742,Cooper,Wario,Frappe Snowland,1,3,2024-07-23 00:13:20,11
20936,6374,742,Regan,Yoshi,Frappe Snowland,2,3,2024-07-23 00:13:20,11


In [5]:
valid[valid["NAME"].isna()]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [6]:
vdf = valid.copy()


msk = vdf.duplicated(subset=["UID", "NAME"], keep=False)

dupe_names = vdf[msk]

dupe_names

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
484,133,12,Connor,Peach,Toad's Turnpike,1,4,2021-10-01,0
485,133,12,Connor,Toad,Toad's Turnpike,4,4,2021-10-01,0
1237,348,27,Cole,Yoshi,Bowser's Castle,1,2,2021-11-13,0
1238,348,27,Connor,Peach,Bowser's Castle,2,2,2021-11-13,0
1239,348,27,Cole,Peach,Koopa Troopa Beach,2,2,2021-11-13,0
...,...,...,...,...,...,...,...,...,...
18869,5791,694,Cooper,Peach,D.K.'s Jungle,2,3,2024-06-04,10
20270,6179,725,Konnor,Peach,Mario Raceway,1,2,2024-07-05,11
20271,6179,725,Cooper,Yoshi,Mario Raceway,2,2,2024-07-05,11
20272,6179,725,Konnor,Yoshi,Banshee Boardwalk,1,2,2024-07-05,11


In [7]:
invalid_names = vdf[vdf["UID"].isin(dupe_names["UID"])]

In [8]:
valid_valid = vdf[~vdf["UID"].isin(dupe_names["UID"])].copy().reset_index(drop=True)

In [9]:
valid_valid[valid_valid["NAME"].isna()]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [10]:
place_msk = valid_valid.duplicated(subset=["UID", "PLACE"], keep=False)

dupe_places = valid_valid[place_msk]

In [11]:
invalid_places = valid_valid[valid_valid["UID"].isin(dupe_places["UID"])]

invalid_places

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
120,33,3,Blake,Bowser,Wario Stadium,2,4,2021-09-22,0
121,33,3,Connor,Toad,Wario Stadium,1,4,2021-09-22,0
122,33,3,Cooper,D.K.,Wario Stadium,4,4,2021-09-22,0
123,33,3,Regan,Peach,Wario Stadium,2,4,2021-09-22,0
1068,300,24,Blake,Bowser,Wario Stadium,1,4,2021-10-10,0
1069,300,24,Cooper,Toad,Wario Stadium,2,4,2021-10-10,0
1070,300,24,Matt,Peach,Wario Stadium,2,4,2021-10-10,0
1071,300,24,Regan,Yoshi,Wario Stadium,3,4,2021-10-10,0
2626,776,63,Regan,Toad,Wario Stadium,1,4,2021-12-15,1
2627,776,63,Cooper,Peach,Wario Stadium,2,4,2021-12-15,1


In [12]:
valid_valid_valid = (
    valid_valid[~valid_valid["UID"].isin(invalid_places["UID"])]
    .copy()
    .reset_index(drop=True)
)

In [13]:
valid_valid_valid[valid_valid_valid["NAME"].isna()]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [14]:
v_invalid, v_valid = validate_bad_uids(df=valid_valid_valid, return_valid=True)

In [15]:
v_invalid

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [16]:
v_valid[v_valid["NAME"].isna()]

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [17]:
oob_two_p = v_valid[(~v_valid["PLACE"].isin([1, 2])) & (v_valid["PLAYERS"] == 2)].copy()

oob_three_p = v_valid[
    (~v_valid["PLACE"].isin([1, 2, 3])) & (v_valid["PLAYERS"] == 3)
].copy()

oob_four_p = v_valid[
    (~v_valid["PLACE"].isin([1, 2, 3, 4])) & (v_valid["PLAYERS"] == 4)
].copy()

oob_concat = pd.concat([oob_two_p, oob_three_p, oob_four_p])

oob_invalid = v_valid[v_valid["UID"].isin(oob_concat["UID"])]

oob_invalid

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


## ALL ELO

In [18]:
tdf = (
    v_valid[(v_valid["PLAYERS"] == 3)]
    .drop(columns=["SUID", "CHARACTER", "PLAYERS", "SEASON", "MAP"])
    .sort_values(by=["UID", "DATE", "PLACE"])
    .copy()
    .reset_index(drop=True)
)


pdf = (
    tdf.pivot(index="UID", columns="PLACE", values="NAME")
    .reset_index()
    .rename(columns={1: "1st", 2: "2nd", 3: "3rd", 4: "4th"})
    .merge(tdf[["UID", "DATE"]], on="UID", how="inner", validate="1:m")
    .groupby("UID")
    .first()
    # .set_index("DATE")
    # .reset_index()
    .rename(columns={"DATE": "date"})
)

In [19]:
pdf

Unnamed: 0_level_0,1st,2nd,3rd,date
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,Cooper,Blake,Cole,2021-09-20 00:00:00
8,Cooper,Blake,Matt,2021-09-20 00:00:00
9,Matt,Cooper,Blake,2021-09-20 00:00:00
10,Blake,Cooper,Jake,2021-09-20 00:00:00
11,Cooper,Joey,Blake,2021-09-20 00:00:00
...,...,...,...,...
6350,Cooper,Regan,Matt,2024-07-21 17:39:13
6351,Cooper,Regan,Matt,2024-07-21 18:01:43
6353,Matt,Cooper,Triston,2024-07-21 19:02:42
6358,Domingo,Konnor,Triston,2024-07-21 23:55:41


In [20]:
pdf

Unnamed: 0_level_0,1st,2nd,3rd,date
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,Cooper,Blake,Cole,2021-09-20 00:00:00
8,Cooper,Blake,Matt,2021-09-20 00:00:00
9,Matt,Cooper,Blake,2021-09-20 00:00:00
10,Blake,Cooper,Jake,2021-09-20 00:00:00
11,Cooper,Joey,Blake,2021-09-20 00:00:00
...,...,...,...,...
6350,Cooper,Regan,Matt,2024-07-21 17:39:13
6351,Cooper,Regan,Matt,2024-07-21 18:01:43
6353,Matt,Cooper,Triston,2024-07-21 19:02:42
6358,Domingo,Konnor,Triston,2024-07-21 23:55:41


In [21]:
pi = pdf.reset_index().set_index("date")

pi["diff"] = pi.index.diff()

pi = pi.reset_index().fillna(pd.Timedelta(0))

pi

Unnamed: 0,date,UID,1st,2nd,3rd,diff
0,2021-09-20 00:00:00,5,Cooper,Blake,Cole,0 days 00:00:00
1,2021-09-20 00:00:00,8,Cooper,Blake,Matt,0 days 00:00:00
2,2021-09-20 00:00:00,9,Matt,Cooper,Blake,0 days 00:00:00
3,2021-09-20 00:00:00,10,Blake,Cooper,Jake,0 days 00:00:00
4,2021-09-20 00:00:00,11,Cooper,Joey,Blake,0 days 00:00:00
...,...,...,...,...,...,...
1782,2024-07-21 17:39:13,6350,Cooper,Regan,Matt,0 days 20:47:28
1783,2024-07-21 18:01:43,6351,Cooper,Regan,Matt,0 days 00:22:30
1784,2024-07-21 19:02:42,6353,Matt,Cooper,Triston,0 days 01:00:59
1785,2024-07-21 23:55:41,6358,Domingo,Konnor,Triston,0 days 04:52:59


In [22]:
pi[pi["diff"] < pd.Timedelta(1)]

Unnamed: 0,date,UID,1st,2nd,3rd,diff
0,2021-09-20,5,Cooper,Blake,Cole,0 days
1,2021-09-20,8,Cooper,Blake,Matt,0 days
2,2021-09-20,9,Matt,Cooper,Blake,0 days
3,2021-09-20,10,Blake,Cooper,Jake,0 days
4,2021-09-20,11,Cooper,Joey,Blake,0 days
...,...,...,...,...,...,...
1767,2024-07-13,6268,Cooper,Colton,Domingo,0 days
1768,2024-07-13,6271,Matt,Cooper,Triston,0 days
1771,2024-07-15,6279,Regan,Cooper,Konnor,0 days
1772,2024-07-15,6289,Regan,Cooper,Konnor,0 days


In [23]:
bad_dates = pi[pi["diff"].dt.total_seconds() < 0]

bad_dates

Unnamed: 0,date,UID,1st,2nd,3rd,diff


In [24]:
tdf[tdf["DATE"].isin(bad_dates["date"])]

Unnamed: 0,UID,NAME,PLACE,DATE


In [25]:
pi["diff"].dt.total_seconds() < 0 

0       False
1       False
2       False
3       False
4       False
        ...  
1782    False
1783    False
1784    False
1785    False
1786    False
Name: diff, Length: 1787, dtype: bool

In [26]:
pdf[pdf["date"].isna()]

Unnamed: 0_level_0,1st,2nd,3rd,date
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [27]:
p = tdf.pivot(index="UID", columns="PLACE", values="NAME")

p[p.isna().any(axis=1)]

PLACE,1,2,3
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [28]:
pdf.isna().any(axis=1)

UID
5       False
8       False
9       False
10      False
11      False
        ...  
6350    False
6351    False
6353    False
6358    False
6374    False
Length: 1787, dtype: bool

In [29]:
pdf[pdf.isna().any(axis=1)]

Unnamed: 0_level_0,1st,2nd,3rd,date
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [147]:
exp_elo = MultiElo(score_function_base=2)
tracker = Tracker(elo_rater=exp_elo)
tracker.process_data(pdf)

rdf = tracker.get_current_ratings()

In [148]:
rdf.sort_values(["rating", "n_games"], ascending=[False, False])

Unnamed: 0,rank,player_id,n_games,rating
0,1,Cooper,906,1469.827488
1,2,Regan,608,1368.71556
2,3,Cole,641,1220.116939
3,4,Matt,425,1215.194401
4,5,Luke,74,1176.412152
5,6,Chandler,62,1158.149079
6,7,Konnor,41,1129.174479
7,8,Blake,1070,1101.013659
8,9,Miller,15,1078.698244
9,10,Connor,666,1032.132955


In [149]:
filter_df = rdf[rdf["n_games"] >= 0]

In [150]:
track_df = tracker.get_history_df()

track_df = track_df[track_df["player_id"].isin(filter_df["player_id"])].reset_index(
    drop=True
)


all_fig = px.line(data_frame=track_df, x="date", y="rating", color="player_id", height=800, markers=True)

all_fig


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



## SEASON BY TYPE

In [31]:
df["SEASON"].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11], dtype=int64)

In [56]:
seasons_tracked = []
seasons_overall = []

unique_seasons = v_valid["SEASON"].unique()


for season in alive_it(
    it=unique_seasons,
    total=unique_seasons.shape[0],
    theme="classic",
):

    for game_type in v_valid[v_valid["SEASON"] == season]["PLAYERS"].unique():

        tdf = (
            v_valid[(v_valid["PLAYERS"] == game_type) & (v_valid["SEASON"] == season)]
            .drop(columns=["SUID", "CHARACTER", "PLAYERS", "SEASON", "MAP"])
            .sort_values(by=["UID", "PLACE"])
            .copy()
            .reset_index(drop=True)
        )

        if game_type == 4:

            pdf = (
                tdf.pivot(index="UID", columns="PLACE", values="NAME")
                .reset_index()
                .rename(columns={1: "1st", 2: "2nd", 3: "3rd", 4: "4th"})
                .merge(tdf[["UID", "DATE"]], on="UID", how="inner", validate="1:m")
                .groupby("UID")
                .first()
                .set_index("DATE")
                .reset_index()
                .rename(columns={"DATE": "date"})
            )

        elif game_type == 3:

            pdf = (
                tdf.pivot(index="UID", columns="PLACE", values="NAME")
                .reset_index()
                .rename(columns={1: "1st", 2: "2nd", 3: "3rd"})
                .merge(tdf[["UID", "DATE"]], on="UID", how="inner", validate="1:m")
                .groupby("UID")
                .first()
                .set_index("DATE")
                .reset_index()
                .rename(columns={"DATE": "date"})
            )

        elif game_type == 2:

            pdf = (
                tdf.pivot(index="UID", columns="PLACE", values="NAME")
                .reset_index()
                .rename(columns={1: "1st", 2: "2nd"})
                .merge(tdf[["UID", "DATE"]], on="UID", how="inner", validate="1:m")
                .groupby("UID")
                .first()
                .set_index("DATE")
                .reset_index()
                .rename(columns={"DATE": "date"})
            )

        exp_elo = MultiElo(score_function_base=2)
        tracker = Tracker(elo_rater=exp_elo)
        tracker.process_data(pdf)
        rdf = tracker.get_current_ratings()
        tr_df = tracker.get_history_df()

        rdf["SEASON"] = season

        t_prefix = f"S{season}_P{game_type}"

        seasons_tracked.append((t_prefix, tr_df))
        seasons_overall.append((t_prefix, rdf))

      
      The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
      
      
      The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
      
      
      The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
      
      The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future v



In [57]:
seasons_tracked

[('S0_P4',
       player_id       date       rating
  0       Cooper 2021-09-20  1037.090909
  1       Cooper 2021-09-20  1070.791680
  2       Cooper 2021-09-20  1101.538852
  3       Cooper 2021-09-20  1130.630596
  4       Cooper 2021-09-20  1105.038405
  ...        ...        ...          ...
  1543    Colton 2021-11-25   976.632332
  1544    Colton 2021-11-29   957.838905
  1545    Colton 2021-11-29   948.550127
  1546    Colton 2021-12-04   931.536722
  1547     Mitch 2021-12-09   987.336213
  
  [1548 rows x 3 columns]),
 ('S0_P3',
      player_id       date       rating
  0      Cooper 2021-09-20  1026.666667
  1      Cooper 2021-09-20  1051.536573
  2      Cooper 2021-09-20  1042.161955
  3      Cooper 2021-09-20  1033.375700
  4      Cooper 2021-09-20  1058.011070
  ..        ...        ...          ...
  619    Colton 2021-11-26   936.540256
  620    Colton 2021-11-26   970.756899
  621    Colton 2021-11-26   954.089282
  622    Colton 2021-11-26   938.885496
  623       Sam

In [58]:
st_dict = dict(seasons_tracked)
overall_dict = dict(seasons_overall)

In [59]:
st_dict.keys()

dict_keys(['S0_P4', 'S0_P3', 'S0_P2', 'S1_P4', 'S1_P3', 'S1_P2', 'S2_P2', 'S2_P3', 'S2_P4', 'S3_P3', 'S3_P2', 'S3_P4', 'S4_P2', 'S4_P3', 'S4_P4', 'S5_P2', 'S5_P3', 'S5_P4', 'S6_P3', 'S6_P2', 'S6_P4', 'S7_P3', 'S7_P4', 'S7_P2', 'S8_P3', 'S8_P4', 'S8_P2', 'S9_P3', 'S9_P4', 'S9_P2', 'S10_P4', 'S10_P3', 'S10_P2', 'S11_P4', 'S11_P2', 'S11_P3'])

In [42]:
for k in st_dict.keys():

    temp_df = st_dict[k]

    f = px.line(data_frame=temp_df, x="date", y="rating", color="player_id", height=800, markers=True, title=k)

    f.write_html(rf"C:\Users\Cooper\sandbox\mkstream\assets\fig\elo_{k}.html")

In [86]:
file_path = r"C:\Users\Cooper\sandbox\mkstream\assets\fig"

html_file_names = pd.Series(
    [x for x in os.listdir(file_path) if not x.__contains__("all_elo")]
).sort_values()

In [87]:
season_count = [
    int(x.removesuffix(".html").split("_")[1].replace("S", "")) for x in html_file_names
]
players_count = [
    int(x.removesuffix(".html").split("_")[2].replace("P", "")) for x in html_file_names
]

files_df = pd.DataFrame(
    {"FILE_NAME": html_file_names, "SEASON": season_count, "PLAYERS": players_count}
).sort_values(["SEASON", "PLAYERS"])

In [88]:
files_df.head()

Unnamed: 0,FILE_NAME,SEASON,PLAYERS
0,elo_S0_P2.html,0,2
1,elo_S0_P3.html,0,3
2,elo_S0_P4.html,0,4
9,elo_S1_P2.html,1,2
10,elo_S1_P3.html,1,3


In [83]:
four_p_df = files_df[files_df["PLAYERS"] == 4].reset_index(drop=True)
three_p_df = files_df[files_df["PLAYERS"] == 3].reset_index(drop=True)
two_p_df = files_df[files_df["PLAYERS"] == 2].reset_index(drop=True)

In [85]:
for fdf in [four_p_df, three_p_df, two_p_df]:

    html_join = []

    for file in fdf["FILE_NAME"]:
        html_join.append(
            BeautifulSoup(
                open(rf"{file_path}\{file}", encoding="utf-8"), features="html.parser"
            )
        )

    with open(
        rf"{file_path}\{fdf['PLAYERS'].max()}P_all_elo.html", "w", encoding="utf-8"
    ) as out_file:
        out_file.write(str(html_join))