In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from multielo import MultiElo, Tracker
from mktools.get_data import load_data_pd
from mktools.validate_data import validate_bad_uids
import plotly.express as px
from alive_progress import alive_it
from bs4 import BeautifulSoup

# Load Variables from .env file
load_dotenv()

# Load data_main from google sheet
df = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

# Convert date to string for... TODO: Update comment
df["DATE"] = pd.to_datetime(df["DATE"]).astype(str)

# Find UIDs that will break ELO calculation
invalid, valid = validate_bad_uids(df=df, return_valid=True)

vdf = valid.copy()

In [2]:
vdf

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20 00:00:00,0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20 00:00:00,0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20 00:00:00,0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20 00:00:00,0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20 00:00:00,0
...,...,...,...,...,...,...,...,...,...
28520,8382,906,Connor,Peach,Yoshi Valley,2,3,2025-01-05 22:27:59,15
28521,8382,906,Garrett,Toad,Yoshi Valley,3,3,2025-01-05 22:27:59,15
28522,8383,906,Regan,Yoshi,D.K.'s Jungle,1,3,2025-01-05 23:09:11,15
28523,8383,906,Blake,Peach,D.K.'s Jungle,2,3,2025-01-05 23:09:11,15


In [3]:
vdf.value_counts("UID").reset_index().UID.is_unique

True

In [4]:
vdf.UID.is_unique

False

In [5]:
vdf.columns

Index(['UID', 'SUID', 'NAME', 'CHARACTER', 'MAP', 'PLACE', 'PLAYERS', 'DATE',
       'SEASON'],
      dtype='object')

In [114]:
vdf["CHARACTER"].unique().tolist()

['Toad', 'Yoshi', 'Peach', 'Bowser', 'Mario', 'Luigi', 'Wario', 'D.K.', nan]

In [None]:
for uid in vdf["UID"].unique():
    tdf = vdf[vdf["UID"] == uid].copy().reset_index(drop=True)

    character_cond = tdf["CHARACTER"].unique().shape[0] != tdf["PLAYERS"].max()

In [103]:
bad_dfs = []

for uid in vdf["UID"].unique():
    tdf = vdf[vdf["UID"] == uid].copy().reset_index(drop=True)

    name_cond = tdf["NAME"].unique().shape[0] != tdf["PLAYERS"].max()
    place_cond = tdf["PLACE"].unique().shape[0] != tdf["PLAYERS"].max()
    character_cond = tdf["CHARACTER"].unique().shape[0] != tdf["PLAYERS"].max()

    if name_cond & place_cond & character_cond:
        print("bad name, place and character")
        tdf["REASON"] = "name_place_character"
        bad_dfs.append(tdf)

    elif name_cond & place_cond & ~character_cond:
        print("bad name and place")
        tdf["REASON"] = "name_place"
        bad_dfs.append(tdf)

    elif name_cond & ~place_cond & character_cond:
        print("bad name and character")
        tdf["REASON"] = "name_character"
        bad_dfs.append(tdf)

    elif ~name_cond & place_cond & character_cond:
        print("bad place and character")
        tdf["REASON"] = "place_character"
        bad_dfs.append(tdf)

    elif name_cond:
        print("bad name")
        tdf["REASON"] = "name"
        bad_dfs.append(tdf)

    elif place_cond:
        print("bad place")
        tdf["REASON"] = "place"
        bad_dfs.append(tdf)

    elif character_cond:
        print("bad character")
        tdf["REASON"] = "character"
        bad_dfs.append(tdf)

bad_name_data = pd.concat(bad_dfs).reset_index(drop=True)

bad place
bad character
bad character
bad name
bad place
bad character
bad character
bad character
bad name and character
bad character
bad character
bad character
bad character
bad character
bad character
bad name
bad place
bad place
bad place
bad character
bad character
bad character
bad character
bad character
bad name
bad place
bad character
bad character
bad name
bad character
bad character
bad name
bad character
bad name
bad character
bad character
bad character
bad name and place
bad character
bad character
bad name
bad name, place and character
bad character
bad character
bad character
bad character
bad place
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad character
bad place and character
bad character
bad character
bad character
bad character
bad character
bad character
bad name
bad character
bad character
bad name
bad name
ba

In [104]:
bad_name_data.shape

(530, 10)

In [105]:
bad_name_data["UID"].unique().shape[0]

141

In [106]:
bad_name_data.head(100)

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON,REASON
0,33,3,Blake,Bowser,Wario Stadium,2,4,2021-09-22 00:00:00,0,place
1,33,3,Connor,Toad,Wario Stadium,1,4,2021-09-22 00:00:00,0,place
2,33,3,Cooper,D.K.,Wario Stadium,4,4,2021-09-22 00:00:00,0,place
3,33,3,Regan,Peach,Wario Stadium,2,4,2021-09-22 00:00:00,0,place
4,78,8,Blake,Bowser,Sherbet Land,3,4,2021-09-27 00:00:00,0,character
...,...,...,...,...,...,...,...,...,...,...
95,1147,125,Blake,Yoshi,Kalimari Desert,3,4,2022-01-13 00:00:00,1,place
96,1147,125,Matt,Toad,Kalimari Desert,1,4,2022-01-13 00:00:00,1,place
97,1181,130,Chandler,Bowser,Yoshi Valley,1,4,2022-01-18 00:00:00,1,character
98,1181,130,Robert,Peach,Yoshi Valley,2,4,2022-01-18 00:00:00,1,character


In [107]:
bad_name_data.to_csv(
    rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\temp_bad_data.csv",
    index=False,
)

In [113]:
bad_name_data["REASON"].value_counts()

REASON
character               408
name                     71
place                    33
name_character            7
name_place                4
name_place_character      4
place_character           3
Name: count, dtype: int64

In [108]:
vdf_two = vdf[~vdf["UID"].isin(bad_name_data["UID"])].copy().reset_index(drop=True)

In [112]:
vdf_two_gdf = (
    vdf_two.sort_values(by=["SEASON", "SUID", "UID", "PLACE"])
    .groupby(by="UID")
    .agg(
        SUID=pd.NamedAgg("SUID", "first"),
        MAP=pd.NamedAgg("MAP", "first"),
        DATE=pd.NamedAgg("DATE", "first"),
        SEASON=pd.NamedAgg("SEASON", "first"),
    )
    .reset_index()
)

vdf_two_gdf

Unnamed: 0,UID,SUID,MAP,DATE,SEASON
0,1,1,Sherbet Land,2021-09-20 00:00:00,0
1,2,1,Kalimari Desert,2021-09-20 00:00:00,0
2,3,1,Yoshi Valley,2021-09-20 00:00:00,0
3,4,1,Wario Stadium,2021-09-20 00:00:00,0
4,5,1,Choco Mountain,2021-09-20 00:00:00,0
...,...,...,...,...,...
8086,8379,906,Bowser's Castle,2025-01-05 19:38:45,15
8087,8380,906,D.K.'s Jungle,2025-01-05 19:55:31,15
8088,8381,906,Wario Stadium,2025-01-05 21:11:39,15
8089,8382,906,Yoshi Valley,2025-01-05 22:27:59,15


In [111]:
df_wide_two_p = (
    vdf_two[vdf_two["PLAYERS"] == 2]
    .pivot(
        index=[
            "SEASON",
            "SUID",
            "UID",
            "PLAYERS",
            "MAP",
        ],
        columns=[
            "PLACE",
        ],
        values=["NAME", "CHARACTER", "DATE"],
    )
    .reset_index()
)

df_wide_two_p

Unnamed: 0_level_0,SEASON,SUID,UID,PLAYERS,MAP,NAME,NAME,CHARACTER,CHARACTER,DATE,DATE
PLACE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,1,2,1,2,1,2
0,0,3,36,2,Frappe Snowland,Cooper,Triston,Yoshi,Peach,2021-09-22 00:00:00,2021-09-22 00:00:00
1,0,8,75,2,Mario Raceway,Regan,Cooper,Peach,Luigi,2021-09-27 00:00:00,2021-09-27 00:00:00
2,0,10,104,2,Wario Stadium,Blake,Connor,Yoshi,Peach,2021-09-29 00:00:00,2021-09-29 00:00:00
3,0,10,105,2,Wario Stadium,Blake,Connor,Peach,Yoshi,2021-09-29 00:00:00,2021-09-29 00:00:00
4,0,11,111,2,Bowser's Castle,Cooper,Triston,Wario,Peach,2021-09-30 00:00:00,2021-09-30 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...
1214,15,899,8281,2,Mario Raceway,Konnor,Garrett,Yoshi,Toad,2024-12-28 21:06:04,2024-12-28 21:06:04
1215,15,899,8282,2,Moo Moo Farm,Konnor,Garrett,Yoshi,Toad,2024-12-28 21:07:05,2024-12-28 21:07:05
1216,15,899,8286,2,Toad's Turnpike,Konnor,Regan,Yoshi,Toad,2024-12-28 21:43:29,2024-12-28 21:43:29
1217,15,900,8295,2,Banshee Boardwalk,Regan,Cole,Yoshi,Toad,2024-12-29 20:39:28,2024-12-29 20:39:28


In [None]:
pd.merge(df_wide_two_p, vdf_two_gdf)

In [109]:
df_wide_three_p = (
    vdf_two[vdf_two["PLAYERS"] == 3]
    .pivot(
        index=[
            "SEASON",
            "SUID",
            "UID",
            "PLAYERS",
            "MAP",
        ],
        columns=[
            "PLACE",
        ],
        values=["NAME", "CHARACTER", "DATE"],
    )
    .reset_index()
)

df_wide_three_p

Unnamed: 0_level_0,SEASON,SUID,UID,PLAYERS,MAP,NAME,NAME,NAME,CHARACTER,CHARACTER,CHARACTER,DATE,DATE,DATE
PLACE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,1,2,3,1,2,3,1,2,3
0,0,1,5,3,Choco Mountain,Cooper,Blake,Cole,Luigi,Yoshi,Peach,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
1,0,1,8,3,Toad's Turnpike,Cooper,Blake,Matt,Bowser,Yoshi,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
2,0,1,9,3,D.K.'s Jungle,Matt,Cooper,Blake,Toad,Wario,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
3,0,1,10,3,Royal Raceway,Blake,Cooper,Jake,Peach,Wario,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
4,0,1,11,3,Wario Stadium,Cooper,Joey,Blake,Wario,Yoshi,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1958,15,905,8374,3,Wario Stadium,Regan,Konnor,Hughes,Toad,Yoshi,Peach,2025-01-04 22:24:45,2025-01-04 22:24:45,2025-01-04 22:24:45
1959,15,906,8375,3,Sherbet Land,Cooper,Antonio,Garrett,Yoshi,Peach,Toad,2025-01-05 14:35:39,2025-01-05 14:35:39,2025-01-05 14:35:39
1960,15,906,8377,3,Koopa Troopa Beach,Regan,Antonio,Garrett,Yoshi,Toad,Peach,2025-01-05 17:41:17,2025-01-05 17:41:17,2025-01-05 17:41:17
1961,15,906,8382,3,Yoshi Valley,Regan,Connor,Garrett,Yoshi,Peach,Toad,2025-01-05 22:27:59,2025-01-05 22:27:59,2025-01-05 22:27:59


In [110]:
df_wide_four_p = (
    vdf_two[vdf_two["PLAYERS"] == 4]
    .pivot(
        index=[
            "SEASON",
            "SUID",
            "UID",
            "PLAYERS",
            "MAP",
        ],
        columns=[
            "PLACE",
        ],
        values=["NAME", "CHARACTER", "DATE"],
    )
    .reset_index()
)

df_wide_four_p

Unnamed: 0_level_0,SEASON,SUID,UID,PLAYERS,MAP,NAME,NAME,NAME,NAME,CHARACTER,CHARACTER,CHARACTER,CHARACTER,DATE,DATE,DATE,DATE
PLACE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,1,2,3,4,1,2,3,4,1,2,3,4
0,0,1,1,4,Sherbet Land,Cooper,Connor,Triston,Cole,Peach,Yoshi,Bowser,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
1,0,1,2,4,Kalimari Desert,Cooper,Connor,Triston,Cole,Yoshi,Peach,Bowser,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
2,0,1,3,4,Yoshi Valley,Cooper,Connor,Triston,Cole,Toad,Peach,Bowser,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
3,0,1,4,4,Wario Stadium,Cooper,Cole,Blake,Connor,Mario,Toad,Peach,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
4,0,1,6,4,Bowser's Castle,Connor,Cole,Cooper,Blake,Toad,Peach,Bowser,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4927,15,905,8373,4,D.K.'s Jungle,Regan,Blake,Matt,Garrett,Toad,Yoshi,Bowser,Peach,2025-01-04 22:00:59,2025-01-04 22:00:59,2025-01-04 22:00:59,2025-01-04 22:00:59
4928,15,906,8376,4,Toad's Turnpike,Regan,Cooper,Antonio,Garrett,Yoshi,Toad,Peach,Luigi,2025-01-05 16:58:36,2025-01-05 16:58:36,2025-01-05 16:58:36,2025-01-05 16:58:36
4929,15,906,8378,4,Koopa Troopa Beach,Cooper,Blake,Garrett,Matt,Luigi,Yoshi,Toad,Peach,2025-01-05 19:19:01,2025-01-05 19:19:01,2025-01-05 19:19:01,2025-01-05 19:19:01
4930,15,906,8380,4,D.K.'s Jungle,Cole,Cooper,Blake,Garrett,Toad,Bowser,Yoshi,Peach,2025-01-05 19:55:31,2025-01-05 19:55:31,2025-01-05 19:55:31,2025-01-05 19:55:31


In [23]:
simple = vdf[vdf.UID < 10].copy()

simple.head()

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20 00:00:00,0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20 00:00:00,0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20 00:00:00,0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20 00:00:00,0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20 00:00:00,0


In [49]:
simple.pivot(
    index=[
        "SEASON",
        "SUID",
        "UID",
        "PLAYERS",
        "MAP",
    ],
    columns=[
        "PLACE",
    ],
    values=["NAME", "CHARACTER", "DATE"],
).reset_index()

Unnamed: 0_level_0,SEASON,SUID,UID,PLAYERS,MAP,NAME,NAME,NAME,NAME,CHARACTER,CHARACTER,CHARACTER,CHARACTER,DATE,DATE,DATE,DATE
PLACE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,1,2,3,4,1,2,3,4,1,2,3,4
0,0,1,1,4,Sherbet Land,Cooper,Connor,Triston,Cole,Peach,Yoshi,Bowser,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
1,0,1,2,4,Kalimari Desert,Cooper,Connor,Triston,Cole,Yoshi,Peach,Bowser,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
2,0,1,3,4,Yoshi Valley,Cooper,Connor,Triston,Cole,Toad,Peach,Bowser,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
3,0,1,4,4,Wario Stadium,Cooper,Cole,Blake,Connor,Mario,Toad,Peach,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
4,0,1,5,3,Choco Mountain,Cooper,Blake,Cole,,Luigi,Yoshi,Peach,,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,
5,0,1,6,4,Bowser's Castle,Connor,Cole,Cooper,Blake,Toad,Peach,Bowser,Yoshi,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
6,0,1,7,4,Banshee Boardwalk,Blake,Matt,Cooper,Connor,Yoshi,Peach,Bowser,Toad,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00
7,0,1,8,3,Toad's Turnpike,Cooper,Blake,Matt,,Bowser,Yoshi,Toad,,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,
8,0,1,9,3,D.K.'s Jungle,Matt,Cooper,Blake,,Toad,Wario,Yoshi,,2021-09-20 00:00:00,2021-09-20 00:00:00,2021-09-20 00:00:00,


In [29]:
simple_gb = (
    simple.sort_values(by=["SEASON", "SUID", "UID", "PLACE"])
    .groupby(by="UID")
    .agg(
        SUID=pd.NamedAgg("SUID", "first"),
        MAP=pd.NamedAgg("MAP", "first"),
        PLAYERS=pd.NamedAgg("PLAYERS", "first"),
        DATE=pd.NamedAgg("DATE", "first"),
        SEASON=pd.NamedAgg("SEASON", "first"),
        NAME=pd.NamedAgg("NAME", list),
        PLACE=pd.NamedAgg("PLACE", list),
        CHARACTER=pd.NamedAgg("CHARACTER", list),
    )
    .reset_index()
)

simple_gb

Unnamed: 0,UID,SUID,MAP,PLAYERS,DATE,SEASON,NAME,PLACE,CHARACTER
0,1,1,Sherbet Land,4,2021-09-20 00:00:00,0,"[Cooper, Connor, Triston, Cole]","[1, 2, 3, 4]","[Peach, Yoshi, Bowser, Toad]"
1,2,1,Kalimari Desert,4,2021-09-20 00:00:00,0,"[Cooper, Connor, Triston, Cole]","[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
2,3,1,Yoshi Valley,4,2021-09-20 00:00:00,0,"[Cooper, Connor, Triston, Cole]","[1, 2, 3, 4]","[Toad, Peach, Bowser, Yoshi]"
3,4,1,Wario Stadium,4,2021-09-20 00:00:00,0,"[Cooper, Cole, Blake, Connor]","[1, 2, 3, 4]","[Mario, Toad, Peach, Yoshi]"
4,5,1,Choco Mountain,3,2021-09-20 00:00:00,0,"[Cooper, Blake, Cole]","[1, 2, 3]","[Luigi, Yoshi, Peach]"
5,6,1,Bowser's Castle,4,2021-09-20 00:00:00,0,"[Connor, Cole, Cooper, Blake]","[1, 2, 3, 4]","[Toad, Peach, Bowser, Yoshi]"
6,7,1,Banshee Boardwalk,4,2021-09-20 00:00:00,0,"[Blake, Matt, Cooper, Connor]","[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
7,8,1,Toad's Turnpike,3,2021-09-20 00:00:00,0,"[Cooper, Blake, Matt]","[1, 2, 3]","[Bowser, Yoshi, Toad]"
8,9,1,D.K.'s Jungle,3,2021-09-20 00:00:00,0,"[Matt, Cooper, Blake]","[1, 2, 3]","[Toad, Wario, Yoshi]"


In [34]:
simple.melt(id_vars=["UID", "SUID", "SEASON"],)

Unnamed: 0,UID,SUID,SEASON,variable,value
0,1,1,0,NAME,Cole
1,1,1,0,NAME,Connor
2,1,1,0,NAME,Cooper
3,1,1,0,NAME,Triston
4,2,1,0,NAME,Cole
...,...,...,...,...,...
193,8,1,0,DATE,2021-09-20 00:00:00
194,8,1,0,DATE,2021-09-20 00:00:00
195,9,1,0,DATE,2021-09-20 00:00:00
196,9,1,0,DATE,2021-09-20 00:00:00


In [33]:
simple_gb[["UID", "NAME", "PLACE", "CHARACTER"]].explode(column="NAME")

Unnamed: 0,UID,NAME,PLACE,CHARACTER
0,1,Cooper,"[1, 2, 3, 4]","[Peach, Yoshi, Bowser, Toad]"
0,1,Connor,"[1, 2, 3, 4]","[Peach, Yoshi, Bowser, Toad]"
0,1,Triston,"[1, 2, 3, 4]","[Peach, Yoshi, Bowser, Toad]"
0,1,Cole,"[1, 2, 3, 4]","[Peach, Yoshi, Bowser, Toad]"
1,2,Cooper,"[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
1,2,Connor,"[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
1,2,Triston,"[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
1,2,Cole,"[1, 2, 3, 4]","[Yoshi, Peach, Bowser, Toad]"
2,3,Cooper,"[1, 2, 3, 4]","[Toad, Peach, Bowser, Yoshi]"
2,3,Connor,"[1, 2, 3, 4]","[Toad, Peach, Bowser, Yoshi]"


In [19]:
simple = vdf[vdf.UID < 10].copy().reset_index()

simple.pivot(
    index="index",
    columns=[
        "PLAYERS",
    ],
    values=[
        "UID",
        "SUID",
        "DATE",
        "MAP",
        "SEASON",
        "NAME",
        "CHARACTER",
        "PLACE",
    ],
)

Unnamed: 0_level_0,UID,UID,SUID,SUID,DATE,DATE,MAP,MAP,SEASON,SEASON,NAME,NAME,CHARACTER,CHARACTER,PLACE,PLACE
PLAYERS,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0,,1.0,,1.0,,2021-09-20 00:00:00,,Sherbet Land,,0.0,,Cole,,Toad,,4.0
1,,1.0,,1.0,,2021-09-20 00:00:00,,Sherbet Land,,0.0,,Connor,,Yoshi,,2.0
2,,1.0,,1.0,,2021-09-20 00:00:00,,Sherbet Land,,0.0,,Cooper,,Peach,,1.0
3,,1.0,,1.0,,2021-09-20 00:00:00,,Sherbet Land,,0.0,,Triston,,Bowser,,3.0
4,,2.0,,1.0,,2021-09-20 00:00:00,,Kalimari Desert,,0.0,,Cole,,Toad,,4.0
5,,2.0,,1.0,,2021-09-20 00:00:00,,Kalimari Desert,,0.0,,Connor,,Peach,,2.0
6,,2.0,,1.0,,2021-09-20 00:00:00,,Kalimari Desert,,0.0,,Cooper,,Yoshi,,1.0
7,,2.0,,1.0,,2021-09-20 00:00:00,,Kalimari Desert,,0.0,,Triston,,Bowser,,3.0
8,,3.0,,1.0,,2021-09-20 00:00:00,,Yoshi Valley,,0.0,,Cole,,Yoshi,,4.0
9,,3.0,,1.0,,2021-09-20 00:00:00,,Yoshi Valley,,0.0,,Connor,,Peach,,2.0


In [None]:
df_wide = vdf[["UID", "DATE", "MAP"]].pivot(
    index="UID",
    columns=["MAP"],
)

df_wide

In [None]:
form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df = form_df.drop(
    columns=[x for x in form_df.columns if x.__contains__("Unnamed")]
)

form_df["Timestamp"] = pd.to_datetime(form_df["Timestamp"])

form_df.head()

In [None]:
form_df.columns = [
    x.replace(" ", "_").replace("[", "").replace("]", "") if x.__contains__("[") else x
    for x in form_df.columns
]

form_df

In [5]:
fdf = form_df.copy()

In [6]:
dfs = []

for game_type in [2, 3, 4]:
    tdf = fdf[fdf["PLAYERS"] == game_type].copy().reset_index(drop=True)

    dfs.append(tdf)

In [None]:
two_p = dfs[0]
three_p = dfs[1]
four_p = dfs[2]

two_p.head()

In [8]:
from typing import Literal


def transform_form_data(
    df: pd.DataFrame, game_type_replace_string: Literal["_2_", "_3_", "_4_"]
) -> pd.DataFrame:

    idf = df.copy()

    idf_out = (
        idf.dropna(axis=1, how="all")
        .reset_index(drop=True)
        .reset_index()
        .rename(columns={"index": "ID"})
    )

    idf_out.columns = [
        (
            x.replace(game_type_replace_string, "_")
            if x.__contains__(game_type_replace_string)
            else x.upper()
        )
        for x in idf_out.columns
    ]

    idf_out["TIMESTAMP"] = (
        pd.to_datetime(idf_out["TIMESTAMP"])
        .dt.tz_localize("US/Eastern")
        .dt.tz_convert("UTC")
    )

    idf_out["ID"] = idf_out["ID"] + 1

    return idf_out

In [None]:
two_p_out = transform_form_data(df=two_p, game_type_replace_string="_2_")
three_p_out = transform_form_data(df=three_p, game_type_replace_string="_3_")
four_p_out = transform_form_data(df=four_p, game_type_replace_string="_4_")

two_p_out

In [10]:
for idx, odf in enumerate([two_p_out, three_p_out, four_p_out]):
    v = idx + 2
    odf.to_csv(rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\data_{v}P.csv")

In [None]:
[len(x) for x in two_p_out["MAP"].unique()]

In [None]:
[len(x) for x in four_p_out["MAP"].unique()]

In [None]:
cat_df = (
    pd.concat([two_p_out, three_p_out, four_p_out])
    .sort_values(by="TIMESTAMP")
    .drop(columns=["ID"])
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "ID"})
)

cat_df["ID"] = cat_df["ID"] + 1

cat_df.head()

In [None]:
cat_df.columns

In [15]:
cat_df_out = cat_df[
    [
        "ID",
        "TIMESTAMP",
        "NEW_SESSION",
        "MAP",
        "PLAYERS",
        "PLAYERS_1ST",
        "PLAYERS_2ND",
        "PLAYERS_3RD",
        "PLAYERS_4TH",
        "CHARACTERS_1ST",
        "CHARACTERS_2ND",
        "CHARACTERS_3RD",
        "CHARACTERS_4TH",
    ]
].copy()

In [None]:
cat_df_out

In [17]:
cat_df_out.to_csv(rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data.csv", index=False)

In [18]:
cat_df_out.drop(columns=["ID"]).to_csv(rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data_no_id_col.csv", index=False)

In [None]:
pd.Series([len(x) for x in four_p_out["CHARACTERS_1ST"].unique()]).max()

In [None]:
four_p_out["CHARACTERS_1ST"].unique()

In [None]:
pd.Series([len(x) for x in four_p_out["PLAYERS_1ST"].unique()]).max()