In [17]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from multielo import MultiElo, Tracker
from mktools.get_data import load_data_pd
from mktools.validate_data import validate_bad_uids
from mktools.form_data import fill_new_session
import plotly.express as px
from alive_progress import alive_it
from bs4 import BeautifulSoup
from typing import Literal

# Load Variables from .env file
load_dotenv()

True

## Data Main

In [18]:
FIRST_FORM_DATA_DATE = pd.Timestamp('2024-07-17 19:15:48').tz_localize("US/Eastern").tz_convert("UTC")

In [19]:
FIRST_FORM_DATA_DATE

Timestamp('2024-07-17 23:15:48+0000', tz='UTC')

In [20]:
# Load data_main from google sheet
df = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

df["DATE"] = (
    pd.to_datetime(df["DATE"]).dt.tz_localize("US/Eastern").dt.tz_convert("UTC")
)

df_filtered = (
    df[(df["DATE"] < FIRST_FORM_DATA_DATE) & (df["SEASON"] >= 11)]
    .copy()
    .reset_index(drop=True)
)

df_filtered["DATE"] = df_filtered["DATE"] + pd.Timedelta(hours=8)

assert all(df_filtered["DATE"].dt.time == pd.to_datetime("12:00:00").time())

assert (
    df_filtered[["SUID", "DATE"]]
    .value_counts()
    .reset_index()
    .sort_values(by="SUID")["SUID"]
    .is_unique
)

df_filtered

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,6002,711,Blake,Toad,Wario Stadium,1,4,2024-06-21 12:00:00+00:00,11
1,6002,711,Cooper,Bowser,Wario Stadium,2,4,2024-06-21 12:00:00+00:00,11
2,6002,711,Matt,Yoshi,Wario Stadium,3,4,2024-06-21 12:00:00+00:00,11
3,6002,711,Garrett,Peach,Wario Stadium,4,4,2024-06-21 12:00:00+00:00,11
4,6003,711,Cooper,Mario,D.K.'s Jungle,1,4,2024-06-21 12:00:00+00:00,11
...,...,...,...,...,...,...,...,...,...
986,6294,737,Connor,Toad,Toad's Turnpike,3,4,2024-07-17 12:00:00+00:00,11
987,6294,737,Garrett,Peach,Toad's Turnpike,4,4,2024-07-17 12:00:00+00:00,11
988,6295,737,Cooper,Toad,Bowser's Castle,1,3,2024-07-17 12:00:00+00:00,11
989,6295,737,Cole,Yoshi,Bowser's Castle,2,3,2024-07-17 12:00:00+00:00,11


In [21]:
df_filtered.groupby(["SEASON"]).agg(count=pd.NamedAgg("UID", "nunique")).reset_index()

Unnamed: 0,SEASON,count
0,11,294


In [22]:
# Find UIDs that will break ELO calculation
invalid, valid = validate_bad_uids(df=df_filtered, return_valid=True)

In [23]:
invalid

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON


In [24]:
vdf = valid.copy()

for uid in vdf["UID"].unique():
    tdf = vdf[vdf["UID"] == uid].copy().reset_index(drop=True)

    character_cond = tdf["CHARACTER"].unique().shape[0] != tdf["PLAYERS"].max()

bad_dfs = []

for uid in vdf["UID"].unique():
    tdf = vdf[vdf["UID"] == uid].copy().reset_index(drop=True)

    name_cond = tdf["NAME"].unique().shape[0] != tdf["PLAYERS"].max()
    place_cond = tdf["PLACE"].unique().shape[0] != tdf["PLAYERS"].max()
    character_cond = tdf["CHARACTER"].unique().shape[0] != tdf["PLAYERS"].max()

    if name_cond & place_cond & character_cond:
        print("bad name, place and character")
        tdf["REASON"] = "name_place_character"
        bad_dfs.append(tdf)

    elif name_cond & place_cond & ~character_cond:
        print("bad name and place")
        tdf["REASON"] = "name_place"
        bad_dfs.append(tdf)

    elif name_cond & ~place_cond & character_cond:
        print("bad name and character")
        tdf["REASON"] = "name_character"
        bad_dfs.append(tdf)

    elif ~name_cond & place_cond & character_cond:
        print("bad place and character")
        tdf["REASON"] = "place_character"
        bad_dfs.append(tdf)

    elif name_cond:
        print("bad name")
        tdf["REASON"] = "name"
        bad_dfs.append(tdf)

    elif place_cond:
        print("bad place")
        tdf["REASON"] = "place"
        bad_dfs.append(tdf)

    elif character_cond:
        print("bad character")
        tdf["REASON"] = "character"
        bad_dfs.append(tdf)

# bad_name_data = pd.concat(bad_dfs).reset_index(drop=True)

In [25]:
vdf = valid.copy()

In [26]:
vdf_pivot = vdf.pivot(
    index=["SEASON", "SUID", "UID", "PLAYERS", "MAP", "DATE"],
    columns=[
        "PLACE",
    ],
    values=["NAME", "CHARACTER"],
).reset_index()

# Create index of tuples for flat columns index
vdf_pivot.columns = vdf_pivot.columns.to_flat_index()
# Join tuple items together to make flat columns
vdf_pivot.columns = [f"{x[0]}{x[1]}" for x in vdf_pivot.columns]

vdf_pivot.head()

Unnamed: 0,SEASON,SUID,UID,PLAYERS,MAP,DATE,NAME1,NAME2,NAME3,NAME4,CHARACTER1,CHARACTER2,CHARACTER3,CHARACTER4
0,11,711,6002,4,Wario Stadium,2024-06-21 12:00:00+00:00,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach
1,11,711,6003,4,D.K.'s Jungle,2024-06-21 12:00:00+00:00,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad
2,11,711,6004,4,Sherbet Land,2024-06-21 12:00:00+00:00,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach
3,11,711,6005,4,Koopa Troopa Beach,2024-06-21 12:00:00+00:00,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach
4,11,711,6006,4,Yoshi Valley,2024-06-21 12:00:00+00:00,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi


In [27]:
# Rename pivoted columns
vdf_rename = vdf_pivot.drop(columns=["UID"]).rename(
    columns={
        "DATE": "TIMESTAMP",
        "NAME1": "PLAYERS_1ST",
        "NAME2": "PLAYERS_2ND",
        "NAME3": "PLAYERS_3RD",
        "NAME4": "PLAYERS_4TH",
        "CHARACTER1": "CHARACTERS_1ST",
        "CHARACTER2": "CHARACTERS_2ND",
        "CHARACTER3": "CHARACTERS_3RD",
        "CHARACTER4": "CHARACTERS_4TH",
    }
)

# Add new session dummy column
vdf_rename["NEW_SESSION"] = "NO"

tdf = vdf_rename.reset_index().copy()

df_holder = []

for suid in tdf["SUID"].unique():

    temp_df = tdf[tdf["SUID"] == suid].copy().reset_index(drop=True)

    first_session_timestamp = temp_df["TIMESTAMP"][0]

    temp_df["INCREMENT"] = 5

    temp_df["INCREMENT"] = temp_df["INCREMENT"].shift(fill_value=0).cumsum()

    for idx, time in enumerate(temp_df["TIMESTAMP"]):
        temp_df.loc[idx, "TIMESTAMP"] = first_session_timestamp + pd.Timedelta(
            minutes=temp_df["INCREMENT"][idx]
        )

    df_holder.append(temp_df)

tdf_concat = pd.concat(df_holder).reset_index(drop=True)

tdf_sort = tdf_concat[
    [
        "TIMESTAMP",
        "NEW_SESSION",
        "MAP",
        "PLAYERS",
        "PLAYERS_1ST",
        "PLAYERS_2ND",
        "PLAYERS_3RD",
        "PLAYERS_4TH",
        "CHARACTERS_1ST",
        "CHARACTERS_2ND",
        "CHARACTERS_3RD",
        "CHARACTERS_4TH",
    ]
].copy()

tdf_sort

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH
0,2024-06-21 12:00:00+00:00,NO,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach
1,2024-06-21 12:05:00+00:00,NO,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad
2,2024-06-21 12:10:00+00:00,NO,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach
3,2024-06-21 12:15:00+00:00,NO,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach
4,2024-06-21 12:20:00+00:00,NO,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi
...,...,...,...,...,...,...,...,...,...,...,...,...
291,2024-07-16 12:05:00+00:00,NO,Sherbet Land,4,Cooper,Connor,Cole,Triston,Yoshi,Toad,Peach,Bowser
292,2024-07-16 12:10:00+00:00,NO,Wario Stadium,3,Cooper,Connor,Konnor,,Toad,Peach,Yoshi,
293,2024-07-16 12:15:00+00:00,NO,Bowser's Castle,2,Cooper,Konnor,,,Peach,Yoshi,,
294,2024-07-17 12:00:00+00:00,NO,Toad's Turnpike,4,Cooper,Cole,Connor,Garrett,Yoshi,Bowser,Toad,Peach


## Form Data

In [28]:
form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df = form_df.drop(
    columns=[
        x
        for x in form_df.columns
        if x.__contains__("Unnamed") or x.__contains__("Score")
    ]
)

form_df["Timestamp"] = pd.to_datetime(form_df["Timestamp"])

form_df.columns = [
    x.replace(" ", "_").replace("[", "").replace("]", "") if x.__contains__("[") else x
    for x in form_df.columns
]

In [29]:
def transform_form_data(
    df: pd.DataFrame, game_type_replace_string: Literal["_2_", "_3_", "_4_"]
) -> pd.DataFrame:

    idf = df.copy()

    idf_out = (
        idf.dropna(axis=1, how="all")
        .reset_index(drop=True)
        .reset_index()
        .rename(columns={"index": "ID"})
    )

    idf_out.columns = [
        (
            x.replace(game_type_replace_string, "_")
            if x.__contains__(game_type_replace_string)
            else x.upper()
        )
        for x in idf_out.columns
    ]

    idf_out["TIMESTAMP"] = (
        pd.to_datetime(idf_out["TIMESTAMP"])
        .dt.tz_localize("US/Eastern")
        .dt.tz_convert("UTC")
    )

    idf_out["ID"] = idf_out["ID"] + 1

    return idf_out


dfs = []

for game_type in [2, 3, 4]:
    tdf = form_df[form_df["PLAYERS"] == game_type].copy().reset_index(drop=True)

    dfs.append(tdf)

two_p = dfs[0]
three_p = dfs[1]
four_p = dfs[2]

two_p_out = transform_form_data(df=two_p, game_type_replace_string="_2_")
three_p_out = transform_form_data(df=three_p, game_type_replace_string="_3_")
four_p_out = transform_form_data(df=four_p, game_type_replace_string="_4_")

cat_df = (
    pd.concat([two_p_out, three_p_out, four_p_out])
    .sort_values(by="TIMESTAMP")
    .drop(columns=["ID"])
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "ID"})
)[
    [
        "ID",
        "TIMESTAMP",
        "NEW_SESSION",
        "MAP",
        "PLAYERS",
        "PLAYERS_1ST",
        "PLAYERS_2ND",
        "PLAYERS_3RD",
        "PLAYERS_4TH",
        "CHARACTERS_1ST",
        "CHARACTERS_2ND",
        "CHARACTERS_3RD",
        "CHARACTERS_4TH",
    ]
]

cat_df["ID"] = cat_df["ID"] + 1

cat_df

Unnamed: 0,ID,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH
0,1,2024-07-17 23:15:48+00:00,NO,Koopa Troopa Beach,3,Cooper,Regan,Garrett,,Peach,Yoshi,Toad,
1,2,2024-07-18 00:01:53+00:00,NO,Choco Mountain,4,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,3,2024-07-18 00:15:00+00:00,NO,D.K.'s Jungle,4,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,4,2024-07-18 00:42:27+00:00,NO,Kalimari Desert,4,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,5,2024-07-18 01:13:04+00:00,NO,Frappe Snowland,3,Cooper,Regan,Blake,,Mario,Toad,Peach,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2350,2351,2025-01-28 00:52:01+00:00,NO,Royal Raceway,4,Regan,Blake,Connor,Cole,Peach,Luigi,Toad,Yoshi
2351,2352,2025-01-28 01:12:10+00:00,NO,Banshee Boardwalk,4,Cooper,Blake,Regan,Domingo,Yoshi,Toad,Mario,Peach
2352,2353,2025-01-28 01:52:17+00:00,NO,Choco Mountain,4,Cooper,Regan,Blake,Domingo,Peach,Mario,Yoshi,Toad
2353,2354,2025-01-28 02:52:54+00:00,NO,Kalimari Desert,4,Domingo,Blake,Regan,Sudur,Toad,Luigi,Mario,Peach


## Combine

In [30]:
season_gb = (
    df.groupby(["DATE"])[["SEASON"]]
    .first()
    .reset_index()
    .rename(columns={"DATE": "TIMESTAMP"})
)

season_gb.tail()

Unnamed: 0,TIMESTAMP,SEASON
2998,2025-01-28 00:52:01+00:00,15
2999,2025-01-28 01:12:10+00:00,15
3000,2025-01-28 01:52:17+00:00,15
3001,2025-01-28 02:52:54+00:00,15
3002,2025-01-28 04:40:20+00:00,15


In [34]:
form_data_initial = cat_df.drop(columns=["ID"]).copy()

form_data_ready = pd.merge(
    form_data_initial, season_gb, on="TIMESTAMP", how="inner", validate="1:1"
)

# Double check that no records were dropped in the merge
assert form_data_ready.shape[0] == form_data_initial.shape[0]

form_data_ready.head()

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2024-07-17 23:15:48+00:00,NO,Koopa Troopa Beach,3,Cooper,Regan,Garrett,,Peach,Yoshi,Toad,,11
1,2024-07-18 00:01:53+00:00,NO,Choco Mountain,4,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi,11
2,2024-07-18 00:15:00+00:00,NO,D.K.'s Jungle,4,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach,11
3,2024-07-18 00:42:27+00:00,NO,Kalimari Desert,4,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach,11
4,2024-07-18 01:13:04+00:00,NO,Frappe Snowland,3,Cooper,Regan,Blake,,Mario,Toad,Peach,,11


In [35]:
data_main_ready = tdf_sort.copy()

data_main_ready["SEASON"] = 11

data_main_ready.tail()

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
291,2024-07-16 12:05:00+00:00,NO,Sherbet Land,4,Cooper,Connor,Cole,Triston,Yoshi,Toad,Peach,Bowser,11
292,2024-07-16 12:10:00+00:00,NO,Wario Stadium,3,Cooper,Connor,Konnor,,Toad,Peach,Yoshi,,11
293,2024-07-16 12:15:00+00:00,NO,Bowser's Castle,2,Cooper,Konnor,,,Peach,Yoshi,,,11
294,2024-07-17 12:00:00+00:00,NO,Toad's Turnpike,4,Cooper,Cole,Connor,Garrett,Yoshi,Bowser,Toad,Peach,11
295,2024-07-17 12:05:00+00:00,NO,Bowser's Castle,3,Cooper,Cole,Regan,,Toad,Yoshi,Peach,,11


In [36]:
data_concat = (
    pd.concat([data_main_ready, form_data_ready])
    .sort_values(by="TIMESTAMP")
    .reset_index(drop=True)
)

data_concat

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2024-06-21 12:00:00+00:00,NO,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11
1,2024-06-21 12:05:00+00:00,NO,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11
2,2024-06-21 12:10:00+00:00,NO,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11
3,2024-06-21 12:15:00+00:00,NO,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11
4,2024-06-21 12:20:00+00:00,NO,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2646,2025-01-28 00:52:01+00:00,NO,Royal Raceway,4,Regan,Blake,Connor,Cole,Peach,Luigi,Toad,Yoshi,15
2647,2025-01-28 01:12:10+00:00,NO,Banshee Boardwalk,4,Cooper,Blake,Regan,Domingo,Yoshi,Toad,Mario,Peach,15
2648,2025-01-28 01:52:17+00:00,NO,Choco Mountain,4,Cooper,Regan,Blake,Domingo,Peach,Mario,Yoshi,Toad,15
2649,2025-01-28 02:52:54+00:00,NO,Kalimari Desert,4,Domingo,Blake,Regan,Sudur,Toad,Luigi,Mario,Peach,15


In [37]:
data_concat["SEASON"].value_counts()

SEASON
11    551
12    549
13    549
14    548
15    454
Name: count, dtype: int64

In [38]:
data_concat["TIMESTAMP"].is_unique

True

In [39]:
data_concat["TIMESTAMP"].is_monotonic_increasing

True

In [41]:
data_concat["TIMESTAMP"].is_monotonic_decreasing

False

In [42]:
data_concat_filled = fill_new_session(
    df=data_concat, timestamp_column_name="TIMESTAMP", drop_window_start_column=True
)

# Convert back to EST for Postgres
data_concat_filled["TIMESTAMP"] = data_concat_filled["TIMESTAMP"].dt.tz_convert(
    "US/Eastern"
)

data_concat_filled

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2024-06-21 08:00:00-04:00,YES,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11
1,2024-06-21 08:05:00-04:00,NO,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11
2,2024-06-21 08:10:00-04:00,NO,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11
3,2024-06-21 08:15:00-04:00,NO,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11
4,2024-06-21 08:20:00-04:00,NO,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2646,2025-01-27 19:52:01-05:00,NO,Royal Raceway,4,Regan,Blake,Connor,Cole,Peach,Luigi,Toad,Yoshi,15
2647,2025-01-27 20:12:10-05:00,NO,Banshee Boardwalk,4,Cooper,Blake,Regan,Domingo,Yoshi,Toad,Mario,Peach,15
2648,2025-01-27 20:52:17-05:00,NO,Choco Mountain,4,Cooper,Regan,Blake,Domingo,Peach,Mario,Yoshi,Toad,15
2649,2025-01-27 21:52:54-05:00,NO,Kalimari Desert,4,Domingo,Blake,Regan,Sudur,Toad,Luigi,Mario,Peach,15


In [43]:
data_concat_filled[["SEASON", "NEW_SESSION"]].value_counts()

SEASON  NEW_SESSION
12      NO             508
11      NO             507
13      NO             507
14      NO             499
15      NO             415
14      YES             49
11      YES             44
13      YES             42
12      YES             41
15      YES             39
Name: count, dtype: int64

In [44]:
data_concat_filled["TIMESTAMP"]

0      2024-06-21 08:00:00-04:00
1      2024-06-21 08:05:00-04:00
2      2024-06-21 08:10:00-04:00
3      2024-06-21 08:15:00-04:00
4      2024-06-21 08:20:00-04:00
                  ...           
2646   2025-01-27 19:52:01-05:00
2647   2025-01-27 20:12:10-05:00
2648   2025-01-27 20:52:17-05:00
2649   2025-01-27 21:52:54-05:00
2650   2025-01-27 23:40:20-05:00
Name: TIMESTAMP, Length: 2651, dtype: datetime64[ns, US/Eastern]

In [50]:
new_session_df = data_concat_filled.copy()

new_session_df["temp_session"] = np.where(new_session_df["NEW_SESSION"] == "YES", 1, 0)

new_session_df["SUID"] = new_session_df["temp_session"].cumsum()

In [54]:
new_session_df.columns

Index(['TIMESTAMP', 'NEW_SESSION', 'MAP', 'PLAYERS', 'PLAYERS_1ST',
       'PLAYERS_2ND', 'PLAYERS_3RD', 'PLAYERS_4TH', 'CHARACTERS_1ST',
       'CHARACTERS_2ND', 'CHARACTERS_3RD', 'CHARACTERS_4TH', 'SEASON',
       'temp_session', 'SUID'],
      dtype='object')

In [55]:
out_df = new_session_df[
    [
        "TIMESTAMP",
        "NEW_SESSION",
        "SUID",
        "MAP",
        "PLAYERS",
        "PLAYERS_1ST",
        "PLAYERS_2ND",
        "PLAYERS_3RD",
        "PLAYERS_4TH",
        "CHARACTERS_1ST",
        "CHARACTERS_2ND",
        "CHARACTERS_3RD",
        "CHARACTERS_4TH",
        "SEASON",
    ]
].copy()

In [56]:
out_df.to_csv(
    rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data_valid.csv",
    index=False,
)

## images

In [60]:
import os

map_icons = [f"/maps/{x}" for x in os.listdir(rf"\\wsl$\Ubuntu\root\learn\mk\public\maps")]

In [69]:
pd.Series(out_df["MAP"].unique()).to_list()

['Wario Stadium',
 "D.K.'s Jungle",
 'Sherbet Land',
 'Koopa Troopa Beach',
 'Yoshi Valley',
 'Banshee Boardwalk',
 'Royal Raceway',
 'Kalimari Desert',
 "Bowser's Castle",
 "Toad's Turnpike",
 'Frappe Snowland',
 'Choco Mountain',
 'Mario Raceway',
 'Moo Moo Farm',
 'Luigi Raceway']

In [71]:
map_names_sorted = (
    pd.Series(
        [
            "Wario Stadium",
            "D.K.'s Jungle",
            "Sherbet Land",
            "Koopa Troopa Beach",
            "Yoshi Valley",
            "Banshee Boardwalk",
            "Royal Raceway",
            "Kalimari Desert",
            "Bowser's Castle",
            "Toad's Turnpike",
            "Frappe Snowland",
            "Choco Mountain",
            "Mario Raceway",
            "Moo Moo Farm",
            "Luigi Raceway",
            "Rainbow Road",
        ]
    )
    .sort_values()
    .reset_index(drop=True)
)

In [72]:
map_paths_sorted = pd.Series(map_icons).sort_values().reset_index(drop=True)

In [75]:
maps_df = pd.DataFrame(
    {
        "MAP": map_names_sorted,
        "IMAGE_URL": map_paths_sorted,
    }
)

maps_df

Unnamed: 0,MAP,IMAGE_URL
0,Banshee Boardwalk,/maps/MK64_Banshee_Boardwalk_Icon.png
1,Bowser's Castle,/maps/MK64_Bowser_Castle_Icon.png
2,Choco Mountain,/maps/MK64_Choco_Mountain_Icon.png
3,D.K.'s Jungle,/maps/MK64_DK_Jungle_Parkway_Icon.png
4,Frappe Snowland,/maps/MK64_Frappe_Snowland_Icon.png
5,Kalimari Desert,/maps/MK64_Kalimari_Desert_Icon.png
6,Koopa Troopa Beach,/maps/MK64_Koopa_Troopa_Beach_Icon.png
7,Luigi Raceway,/maps/MK64_Luigi_Raceway_Icon.png
8,Mario Raceway,/maps/MK64_Mario_Raceway_Icon.png
9,Moo Moo Farm,/maps/MK64_Moo_Moo_Farm_Icon.png


In [76]:
maps_df.to_csv(
    rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\maps_valid.csv",
    index=False,
)