In [2]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
from multielo import MultiElo, Player, Tracker
from mktools.get_data import load_data_pd
from alive_progress import alive_it
from mktools.validate_data import validate_bad_uids

pd.options.display.max_columns = 150

In [3]:
load_dotenv()

form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df = form_df.drop(
    columns=[x for x in form_df.columns if x.__contains__("Unnamed")]
)

form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2 [1ST],PLAYERS_2 [2ND],CHARACTERS_2 [1ST],CHARACTERS_2 [2ND],PLAYERS_3 [1ST],PLAYERS_3 [2ND],PLAYERS_3 [3RD],CHARACTERS_3 [1ST],CHARACTERS_3 [2ND],CHARACTERS_3 [3RD],PLAYERS_4 [1ST],PLAYERS_4 [2ND],PLAYERS_4 [3RD],PLAYERS_4 [4TH],CHARACTERS_4 [1ST],CHARACTERS_4 [2ND],CHARACTERS_4 [3RD],CHARACTERS_4 [4TH]
0,7/16/2024 21:20:28,NO,Wario Stadium,2,Cooper,Matt,Peach,Yoshi,,,,,,,,,,,,,,
1,7/16/2024 21:20:52,NO,Koopa Troopa Beach,3,,,,,Cooper,Matt,Blake,Peach,Yoshi,Toad,,,,,,,,
2,7/16/2024 21:21:14,YES,Sherbet Land,4,,,,,,,,,,,Cooper,Matt,Blake,Regan,Peach,Yoshi,Toad,Bowser
3,7/16/2024 21:24:33,NO,Yoshi Valley,3,,,,,Matt,Cooper,Blake,Yoshi,Peach,Toad,,,,,,,,


In [4]:
df = load_data_pd(
    sheet_name="data",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

df.tail()

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
21651,6291,736,Cole,Peach,Sherbet Land,3,4,2024-07-16,11
21652,6291,736,Triston,Bowser,Sherbet Land,4,4,2024-07-16,11
21653,6292,736,Cooper,Toad,Wario Stadium,1,3,2024-07-16,11
21654,6292,736,Connor,Peach,Wario Stadium,2,3,2024-07-16,11
21655,6292,736,Konnor,Yoshi,Wario Stadium,3,3,2024-07-16,11


In [11]:
def forms_data_wide_to_long(
    form_df: pd.DataFrame, mk_data_df: pd.DataFrame
) -> pd.DataFrame:

    fdf = form_df.copy()
    mdf = mk_data_df.copy()

    last_uid = mdf.tail(1)["UID"].values[0]
    last_suid = mdf.tail(1)["SUID"].values[0]

    l = []

    for s in fdf["NEW_SESSION"]:
        if s == "NO":
            l.append(last_suid)
        if s == "YES":
            last_suid += 1
            l.append(last_suid)

    fdf["UID"] = range(last_uid + 1, (last_uid + fdf.shape[0] + 1), 1)

    fdf["SUID"] = l

    wide_df_list = []

    for game_type in fdf["PLAYERS"].unique():

        tdf = fdf[fdf["PLAYERS"] == game_type][
            [
                "UID",
                "SUID",
                "Timestamp",
                "NEW_SESSION",
                "MAP",
                "PLAYERS",
                *[x for x in fdf.columns if x.__contains__(f"_{game_type}")],
            ]
        ].rename(
            columns={
                f"PLAYERS_{game_type} [1ST]": "NAME_1",
                f"PLAYERS_{game_type} [2ND]": "NAME_2",
                f"PLAYERS_{game_type} [3RD]": "NAME_3",
                f"PLAYERS_{game_type} [4TH]": "NAME_4",
                f"CHARACTERS_{game_type} [1ST]": "CHARACTER_1",
                f"CHARACTERS_{game_type} [2ND]": "CHARACTER_2",
                f"CHARACTERS_{game_type} [3RD]": "CHARACTER_3",
                f"CHARACTERS_{game_type} [4TH]": "CHARACTER_4",
            },
            errors="ignore",
        )

        wdf = pd.wide_to_long(
            df=tdf,
            stubnames=["CHARACTER", "NAME"],
            i=["UID", "SUID", "Timestamp", "MAP"],
            j="PLACE",
            sep="_",
        ).reset_index()

        wide_df_list.append(wdf)

    wide_df = (
        pd.concat(wide_df_list)
        .sort_values(by=["UID", "PLACE"], ascending=[True, True])
        .reset_index(drop=True)
    )

    return wide_df

In [12]:
wide = forms_data_wide_to_long(form_df=form_df, mk_data_df=df)

In [13]:
wide

Unnamed: 0,UID,SUID,Timestamp,MAP,PLACE,NEW_SESSION,PLAYERS,CHARACTER,NAME
0,6293,736,7/16/2024 21:20:28,Wario Stadium,1,NO,2,Peach,Cooper
1,6293,736,7/16/2024 21:20:28,Wario Stadium,2,NO,2,Yoshi,Matt
2,6294,736,7/16/2024 21:20:52,Koopa Troopa Beach,1,NO,3,Peach,Cooper
3,6294,736,7/16/2024 21:20:52,Koopa Troopa Beach,2,NO,3,Yoshi,Matt
4,6294,736,7/16/2024 21:20:52,Koopa Troopa Beach,3,NO,3,Toad,Blake
5,6295,737,7/16/2024 21:21:14,Sherbet Land,1,YES,4,Peach,Cooper
6,6295,737,7/16/2024 21:21:14,Sherbet Land,2,YES,4,Yoshi,Matt
7,6295,737,7/16/2024 21:21:14,Sherbet Land,3,YES,4,Toad,Blake
8,6295,737,7/16/2024 21:21:14,Sherbet Land,4,YES,4,Bowser,Regan
9,6296,737,7/16/2024 21:24:33,Yoshi Valley,1,NO,3,Yoshi,Matt


In [None]:
# last_uid = df.tail(1)["UID"].values[0]
# last_suid = df.tail(1)["SUID"].values[0]

# l = []

# for s in form_df["NEW_SESSION"]:
#     if s == "NO":
#         l.append(last_suid)
#     if s == "YES":
#         last_suid += 1
#         l.append(last_suid)

# form_df["UID"] = range(last_uid + 1, (last_uid + form_df.shape[0] + 1), 1)

# form_df["SUID"] = l


# two_p_df = form_df[form_df["PLAYERS"] == 2][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_2")],
#     ]
# ]

# three_p_df = form_df[form_df["PLAYERS"] == 3][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_3")],
#     ]
# ]

# four_p_df = form_df[form_df["PLAYERS"] == 4][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_4")],
#     ]
# ]

# t = three_p_df.rename(
#     columns={
#         "PLAYERS_3 [1ST]": "NAME_1",
#         "PLAYERS_3 [2ND]": "NAME_2",
#         "PLAYERS_3 [3RD]": "NAME_3",
#         "PLAYERS_3 [4TH]": "NAME_4",
#         "CHARACTERS_3 [1ST]": "CHARACTER_1",
#         "CHARACTERS_3 [2ND]": "CHARACTER_2",
#         "CHARACTERS_3 [3RD]": "CHARACTER_3",
#         "CHARACTERS_3 [4TH]": "CHARACTER_4",
#     }
# )

# t

In [None]:
# pd.wide_to_long(
#     df=t,
#     stubnames=["CHARACTER", "NAME"],
#     i=["UID", "SUID", "Timestamp", "MAP"],
#     j="PLACE",
#     sep="_",
# ).reset_index()