In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from multielo import MultiElo, Tracker
from mktools.get_data import load_data_pd
from mktools.validate_data import validate_bad_uids
import plotly.express as px
from alive_progress import alive_it
from bs4 import BeautifulSoup

# Load Variables from .env file
load_dotenv()

# Load data_main from google sheet
df = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

# Convert date to string for... TODO: Update comment
df["DATE"] = pd.to_datetime(df["DATE"]).astype(str)

# Find UIDs that will break ELO calculation
invalid, valid = validate_bad_uids(df=df, return_valid=True)

vdf = valid.copy()

In [2]:
vdf

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20 00:00:00,0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20 00:00:00,0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20 00:00:00,0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20 00:00:00,0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20 00:00:00,0
...,...,...,...,...,...,...,...,...,...
26959,7969,862,Colton,Yoshi,Yoshi Valley,4,4,2024-11-21 22:13:09,14
26960,7970,862,Blake,Toad,Wario Stadium,1,4,2024-11-22 00:03:49,14
26961,7970,862,Domingo,Yoshi,Wario Stadium,2,4,2024-11-22 00:03:49,14
26962,7970,862,Konnor,Bowser,Wario Stadium,3,4,2024-11-22 00:03:49,14


In [6]:
form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df = form_df.drop(
    columns=[x for x in form_df.columns if x.__contains__("Unnamed")]
)

form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2 [1ST],PLAYERS_2 [2ND],CHARACTERS_2 [1ST],CHARACTERS_2 [2ND],PLAYERS_3 [1ST],PLAYERS_3 [2ND],...,CHARACTERS_3 [2ND],CHARACTERS_3 [3RD],PLAYERS_4 [1ST],PLAYERS_4 [2ND],PLAYERS_4 [3RD],PLAYERS_4 [4TH],CHARACTERS_4 [1ST],CHARACTERS_4 [2ND],CHARACTERS_4 [3RD],CHARACTERS_4 [4TH]
0,7/17/2024 19:15:48,NO,Koopa Troopa Beach,3,,,,,Cooper,Regan,...,Yoshi,Toad,,,,,,,,
1,7/17/2024 20:01:53,NO,Choco Mountain,4,,,,,,,...,,,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,7/17/2024 20:15:00,NO,D.K.'s Jungle,4,,,,,,,...,,,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,7/17/2024 20:42:27,NO,Kalimari Desert,4,,,,,,,...,,,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,7/17/2024 21:13:04,NO,Frappe Snowland,3,,,,,Cooper,Regan,...,Toad,Peach,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,11/21/2024 19:09:42,NO,Frappe Snowland,4,,,,,,,...,,,Regan,Connor,Cole,Domingo,Yoshi,Peach,Toad,Bowser
1671,11/21/2024 19:38:11,NO,Kalimari Desert,4,,,,,,,...,,,Blake,Colton,Regan,Sudur,Toad,Yoshi,Peach,D.K.
1672,11/21/2024 20:51:27,NO,Wario Stadium,4,,,,,,,...,,,Regan,Connor,Blake,Colton,Toad,Peach,Yoshi,Mario
1673,11/21/2024 22:13:09,NO,Yoshi Valley,4,,,,,,,...,,,Regan,Konnor,Domingo,Colton,Toad,Peach,Bowser,Yoshi


In [8]:
form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2 [1ST],PLAYERS_2 [2ND],CHARACTERS_2 [1ST],CHARACTERS_2 [2ND],PLAYERS_3 [1ST],PLAYERS_3 [2ND],...,CHARACTERS_3 [2ND],CHARACTERS_3 [3RD],PLAYERS_4 [1ST],PLAYERS_4 [2ND],PLAYERS_4 [3RD],PLAYERS_4 [4TH],CHARACTERS_4 [1ST],CHARACTERS_4 [2ND],CHARACTERS_4 [3RD],CHARACTERS_4 [4TH]
0,7/17/2024 19:15:48,NO,Koopa Troopa Beach,3,,,,,Cooper,Regan,...,Yoshi,Toad,,,,,,,,
1,7/17/2024 20:01:53,NO,Choco Mountain,4,,,,,,,...,,,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,7/17/2024 20:15:00,NO,D.K.'s Jungle,4,,,,,,,...,,,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,7/17/2024 20:42:27,NO,Kalimari Desert,4,,,,,,,...,,,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,7/17/2024 21:13:04,NO,Frappe Snowland,3,,,,,Cooper,Regan,...,Toad,Peach,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,11/21/2024 19:09:42,NO,Frappe Snowland,4,,,,,,,...,,,Regan,Connor,Cole,Domingo,Yoshi,Peach,Toad,Bowser
1671,11/21/2024 19:38:11,NO,Kalimari Desert,4,,,,,,,...,,,Blake,Colton,Regan,Sudur,Toad,Yoshi,Peach,D.K.
1672,11/21/2024 20:51:27,NO,Wario Stadium,4,,,,,,,...,,,Regan,Connor,Blake,Colton,Toad,Peach,Yoshi,Mario
1673,11/21/2024 22:13:09,NO,Yoshi Valley,4,,,,,,,...,,,Regan,Konnor,Domingo,Colton,Toad,Peach,Bowser,Yoshi


In [11]:
form_df["Timestamp"] = pd.to_datetime(form_df["Timestamp"])

form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2 [1ST],PLAYERS_2 [2ND],CHARACTERS_2 [1ST],CHARACTERS_2 [2ND],PLAYERS_3 [1ST],PLAYERS_3 [2ND],...,CHARACTERS_3 [2ND],CHARACTERS_3 [3RD],PLAYERS_4 [1ST],PLAYERS_4 [2ND],PLAYERS_4 [3RD],PLAYERS_4 [4TH],CHARACTERS_4 [1ST],CHARACTERS_4 [2ND],CHARACTERS_4 [3RD],CHARACTERS_4 [4TH]
0,2024-07-17 19:15:48,NO,Koopa Troopa Beach,3,,,,,Cooper,Regan,...,Yoshi,Toad,,,,,,,,
1,2024-07-17 20:01:53,NO,Choco Mountain,4,,,,,,,...,,,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,2024-07-17 20:15:00,NO,D.K.'s Jungle,4,,,,,,,...,,,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,2024-07-17 20:42:27,NO,Kalimari Desert,4,,,,,,,...,,,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,2024-07-17 21:13:04,NO,Frappe Snowland,3,,,,,Cooper,Regan,...,Toad,Peach,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,2024-11-21 19:09:42,NO,Frappe Snowland,4,,,,,,,...,,,Regan,Connor,Cole,Domingo,Yoshi,Peach,Toad,Bowser
1671,2024-11-21 19:38:11,NO,Kalimari Desert,4,,,,,,,...,,,Blake,Colton,Regan,Sudur,Toad,Yoshi,Peach,D.K.
1672,2024-11-21 20:51:27,NO,Wario Stadium,4,,,,,,,...,,,Regan,Connor,Blake,Colton,Toad,Peach,Yoshi,Mario
1673,2024-11-21 22:13:09,NO,Yoshi Valley,4,,,,,,,...,,,Regan,Konnor,Domingo,Colton,Toad,Peach,Bowser,Yoshi


In [12]:
form_df.columns

Index(['Timestamp', 'NEW_SESSION', 'MAP', 'PLAYERS', 'PLAYERS_2 [1ST]',
       'PLAYERS_2 [2ND]', 'CHARACTERS_2 [1ST]', 'CHARACTERS_2 [2ND]',
       'PLAYERS_3 [1ST]', 'PLAYERS_3 [2ND]', 'PLAYERS_3 [3RD]',
       'CHARACTERS_3 [1ST]', 'CHARACTERS_3 [2ND]', 'CHARACTERS_3 [3RD]',
       'PLAYERS_4 [1ST]', 'PLAYERS_4 [2ND]', 'PLAYERS_4 [3RD]',
       'PLAYERS_4 [4TH]', 'CHARACTERS_4 [1ST]', 'CHARACTERS_4 [2ND]',
       'CHARACTERS_4 [3RD]', 'CHARACTERS_4 [4TH]'],
      dtype='object')

In [20]:
form_df.columns = [
    x.replace(" ", "_").replace("[", "").replace("]", "") if x.__contains__("[") else x
    for x in form_df.columns
]

form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2_1ST,PLAYERS_2_2ND,CHARACTERS_2_1ST,CHARACTERS_2_2ND,PLAYERS_3_1ST,PLAYERS_3_2ND,...,CHARACTERS_3_2ND,CHARACTERS_3_3RD,PLAYERS_4_1ST,PLAYERS_4_2ND,PLAYERS_4_3RD,PLAYERS_4_4TH,CHARACTERS_4_1ST,CHARACTERS_4_2ND,CHARACTERS_4_3RD,CHARACTERS_4_4TH
0,2024-07-17 19:15:48,NO,Koopa Troopa Beach,3,,,,,Cooper,Regan,...,Yoshi,Toad,,,,,,,,
1,2024-07-17 20:01:53,NO,Choco Mountain,4,,,,,,,...,,,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,2024-07-17 20:15:00,NO,D.K.'s Jungle,4,,,,,,,...,,,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,2024-07-17 20:42:27,NO,Kalimari Desert,4,,,,,,,...,,,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,2024-07-17 21:13:04,NO,Frappe Snowland,3,,,,,Cooper,Regan,...,Toad,Peach,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,2024-11-21 19:09:42,NO,Frappe Snowland,4,,,,,,,...,,,Regan,Connor,Cole,Domingo,Yoshi,Peach,Toad,Bowser
1671,2024-11-21 19:38:11,NO,Kalimari Desert,4,,,,,,,...,,,Blake,Colton,Regan,Sudur,Toad,Yoshi,Peach,D.K.
1672,2024-11-21 20:51:27,NO,Wario Stadium,4,,,,,,,...,,,Regan,Connor,Blake,Colton,Toad,Peach,Yoshi,Mario
1673,2024-11-21 22:13:09,NO,Yoshi Valley,4,,,,,,,...,,,Regan,Konnor,Domingo,Colton,Toad,Peach,Bowser,Yoshi


In [21]:
fdf = form_df.copy()

In [22]:
dfs = []

for game_type in [2, 3, 4]:
    tdf = fdf[fdf["PLAYERS"] == game_type].copy().reset_index(drop=True)

    dfs.append(tdf)

In [24]:
two_p = dfs[0]
three_p = dfs[1]
four_p = dfs[2]

two_p

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2_1ST,PLAYERS_2_2ND,CHARACTERS_2_1ST,CHARACTERS_2_2ND,PLAYERS_3_1ST,PLAYERS_3_2ND,...,CHARACTERS_3_2ND,CHARACTERS_3_3RD,PLAYERS_4_1ST,PLAYERS_4_2ND,PLAYERS_4_3RD,PLAYERS_4_4TH,CHARACTERS_4_1ST,CHARACTERS_4_2ND,CHARACTERS_4_3RD,CHARACTERS_4_4TH
0,2024-07-18 21:05:00,NO,Toad's Turnpike,2,Cooper,Regan,Peach,Toad,,,...,,,,,,,,,,
1,2024-07-21 16:32:47,YES,Bowser's Castle,2,Matt,Cooper,Toad,Peach,,,...,,,,,,,,,,
2,2024-07-21 16:56:38,NO,Yoshi Valley,2,Matt,Cooper,Peach,Yoshi,,,...,,,,,,,,,,
3,2024-07-24 13:46:27,YES,Wario Stadium,2,Regan,Martin,Toad,Yoshi,,,...,,,,,,,,,,
4,2024-07-24 13:47:06,NO,Toad's Turnpike,2,Regan,Martin,D.K.,Yoshi,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,2024-11-06 02:55:25,NO,Frappe Snowland,2,Konnor,Regan,Yoshi,Toad,,,...,,,,,,,,,,
112,2024-11-08 00:37:30,NO,D.K.'s Jungle,2,Regan,Domingo,Toad,Peach,,,...,,,,,,,,,,
113,2024-11-08 00:45:44,NO,Toad's Turnpike,2,Regan,Domingo,Peach,Toad,,,...,,,,,,,,,,
114,2024-11-08 01:12:53,NO,Toad's Turnpike,2,Regan,Connor,Peach,Toad,,,...,,,,,,,,,,


In [25]:
two_p.columns

Index(['Timestamp', 'NEW_SESSION', 'MAP', 'PLAYERS', 'PLAYERS_2_1ST',
       'PLAYERS_2_2ND', 'CHARACTERS_2_1ST', 'CHARACTERS_2_2ND',
       'PLAYERS_3_1ST', 'PLAYERS_3_2ND', 'PLAYERS_3_3RD', 'CHARACTERS_3_1ST',
       'CHARACTERS_3_2ND', 'CHARACTERS_3_3RD', 'PLAYERS_4_1ST',
       'PLAYERS_4_2ND', 'PLAYERS_4_3RD', 'PLAYERS_4_4TH', 'CHARACTERS_4_1ST',
       'CHARACTERS_4_2ND', 'CHARACTERS_4_3RD', 'CHARACTERS_4_4TH'],
      dtype='object')

In [50]:
two_p_out = (
    two_p.dropna(axis=1, how="all")
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "ID"})
)

two_p_out.columns = [
    x.replace("_2_", "_") if x.__contains__("_2_") else x for x in two_p_out.columns
]

two_p_out["ID"] = two_p_out["ID"] + 1

two_p_out

Unnamed: 0,ID,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,CHARACTERS_1ST,CHARACTERS_2ND
0,1,2024-07-18 21:05:00,NO,Toad's Turnpike,2,Cooper,Regan,Peach,Toad
1,2,2024-07-21 16:32:47,YES,Bowser's Castle,2,Matt,Cooper,Toad,Peach
2,3,2024-07-21 16:56:38,NO,Yoshi Valley,2,Matt,Cooper,Peach,Yoshi
3,4,2024-07-24 13:46:27,YES,Wario Stadium,2,Regan,Martin,Toad,Yoshi
4,5,2024-07-24 13:47:06,NO,Toad's Turnpike,2,Regan,Martin,D.K.,Yoshi
...,...,...,...,...,...,...,...,...,...
111,112,2024-11-06 02:55:25,NO,Frappe Snowland,2,Konnor,Regan,Yoshi,Toad
112,113,2024-11-08 00:37:30,NO,D.K.'s Jungle,2,Regan,Domingo,Toad,Peach
113,114,2024-11-08 00:45:44,NO,Toad's Turnpike,2,Regan,Domingo,Peach,Toad
114,115,2024-11-08 01:12:53,NO,Toad's Turnpike,2,Regan,Connor,Peach,Toad


In [51]:
three_p_out = (
    three_p.dropna(axis=1, how="all")
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "ID"})
)

three_p_out.columns = [
    x.replace("_3_", "_") if x.__contains__("_3_") else x for x in three_p_out.columns
]

three_p_out["ID"] = three_p_out["ID"] + 1

three_p_out

Unnamed: 0,ID,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD
0,1,2024-07-17 19:15:48,NO,Koopa Troopa Beach,3,Cooper,Regan,Garrett,Peach,Yoshi,Toad
1,2,2024-07-17 21:13:04,NO,Frappe Snowland,3,Cooper,Regan,Blake,Mario,Toad,Peach
2,3,2024-07-18 19:42:21,NO,Sherbet Land,3,Regan,Blake,Cooper,Yoshi,Peach,Toad
3,4,2024-07-18 22:38:14,NO,Koopa Troopa Beach,3,Regan,Cooper,Konnor,Peach,Toad,Yoshi
4,5,2024-07-20 18:46:58,NO,Kalimari Desert,3,Regan,Triston,Cooper,Peach,Yoshi,Mario
...,...,...,...,...,...,...,...,...,...,...,...
138,139,2024-11-02 15:32:05,NO,D.K.'s Jungle,3,Matt,Luke,Antonio,Yoshi,Toad,Peach
139,140,2024-11-08 13:41:04,YES,Toad's Turnpike,3,Blake,Regan,Matt,Toad,Yoshi,Peach
140,141,2024-11-16 12:10:00,YES,Koopa Troopa Beach,3,Chandler,Luke,Robert,Yoshi,Peach,Toad
141,142,2024-11-16 23:53:58,NO,Mario Raceway,3,Konnor,Blake,Jake,Peach,Yoshi,Toad


In [52]:
four_p_out = (
    four_p.dropna(axis=1, how="all")
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "ID"})
)

four_p_out.columns = [
    x.replace("_3_", "_") if x.__contains__("_3_") else x.upper()
    for x in four_p_out.columns
]

four_p_out["ID"] = four_p_out["ID"] + 1

four_p_out

Unnamed: 0,ID,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_4_1ST,PLAYERS_4_2ND,PLAYERS_4_3RD,PLAYERS_4_4TH,CHARACTERS_4_1ST,CHARACTERS_4_2ND,CHARACTERS_4_3RD,CHARACTERS_4_4TH
0,1,2024-07-17 20:01:53,NO,Choco Mountain,4,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
1,2,2024-07-17 20:15:00,NO,D.K.'s Jungle,4,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
2,3,2024-07-17 20:42:27,NO,Kalimari Desert,4,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
3,4,2024-07-17 23:25:28,NO,Koopa Troopa Beach,4,Cooper,Blake,Triston,Connor,Wario,Toad,Peach,Yoshi
4,5,2024-07-17 23:40:49,NO,Wario Stadium,4,Blake,Cooper,Triston,Regan,Toad,D.K.,Peach,Yoshi
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1411,1412,2024-11-21 19:09:42,NO,Frappe Snowland,4,Regan,Connor,Cole,Domingo,Yoshi,Peach,Toad,Bowser
1412,1413,2024-11-21 19:38:11,NO,Kalimari Desert,4,Blake,Colton,Regan,Sudur,Toad,Yoshi,Peach,D.K.
1413,1414,2024-11-21 20:51:27,NO,Wario Stadium,4,Regan,Connor,Blake,Colton,Toad,Peach,Yoshi,Mario
1414,1415,2024-11-21 22:13:09,NO,Yoshi Valley,4,Regan,Konnor,Domingo,Colton,Toad,Peach,Bowser,Yoshi


In [53]:
for idx, odf in enumerate([two_p_out, three_p_out, four_p_out]):
    v = idx + 2
    odf.to_csv(rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\data_{v}P.csv")

In [7]:
form_df.dtypes

Timestamp             object
NEW_SESSION           object
MAP                   object
PLAYERS                int64
PLAYERS_2 [1ST]       object
PLAYERS_2 [2ND]       object
CHARACTERS_2 [1ST]    object
CHARACTERS_2 [2ND]    object
PLAYERS_3 [1ST]       object
PLAYERS_3 [2ND]       object
PLAYERS_3 [3RD]       object
CHARACTERS_3 [1ST]    object
CHARACTERS_3 [2ND]    object
CHARACTERS_3 [3RD]    object
PLAYERS_4 [1ST]       object
PLAYERS_4 [2ND]       object
PLAYERS_4 [3RD]       object
PLAYERS_4 [4TH]       object
CHARACTERS_4 [1ST]    object
CHARACTERS_4 [2ND]    object
CHARACTERS_4 [3RD]    object
CHARACTERS_4 [4TH]    object
dtype: object

In [3]:
vdf.dtypes

UID           int64
SUID          int64
NAME         object
CHARACTER    object
MAP          object
PLACE         int64
PLAYERS       int64
DATE         object
SEASON        int64
dtype: object

In [4]:
vdf.DATE.dtype

dtype('O')

In [5]:
vdf.DATE[0]

'2021-09-20 00:00:00'