In [1]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from mktools.get_data import load_data_pd
from mktools.form_data import form_data_wide_to_long

pd.options.display.max_columns = 150

In [2]:
load_dotenv()

form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df["Timestamp"] = pd.to_datetime(form_df["Timestamp"]).astype(str)

form_df = form_df.drop(
    columns=[x for x in form_df.columns if x.__contains__("Unnamed")]
)

form_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2 [1ST],PLAYERS_2 [2ND],CHARACTERS_2 [1ST],CHARACTERS_2 [2ND],PLAYERS_3 [1ST],PLAYERS_3 [2ND],PLAYERS_3 [3RD],CHARACTERS_3 [1ST],CHARACTERS_3 [2ND],CHARACTERS_3 [3RD],PLAYERS_4 [1ST],PLAYERS_4 [2ND],PLAYERS_4 [3RD],PLAYERS_4 [4TH],CHARACTERS_4 [1ST],CHARACTERS_4 [2ND],CHARACTERS_4 [3RD],CHARACTERS_4 [4TH]
0,2024-07-17 19:15:48,NO,Koopa Troopa Beach,3,,,,,Cooper,Regan,Garrett,Peach,Yoshi,Toad,,,,,,,,
1,2024-07-17 20:01:53,NO,Choco Mountain,4,,,,,,,,,,,Cooper,Connor,Blake,Triston,Luigi,Toad,Peach,Yoshi
2,2024-07-17 20:15:00,NO,D.K.'s Jungle,4,,,,,,,,,,,Cole,Regan,Cooper,Triston,Toad,Yoshi,Mario,Peach
3,2024-07-17 20:42:27,NO,Kalimari Desert,4,,,,,,,,,,,Regan,Cole,Cooper,Blake,Yoshi,Toad,Mario,Peach
4,2024-07-17 21:13:04,NO,Frappe Snowland,3,,,,,Cooper,Regan,Blake,Mario,Toad,Peach,,,,,,,,


In [9]:
form_transformed_df = load_data_pd(
    sheet_name="form_data_transform", sheet_id=os.environ["SHEET_ID"]
)

form_transformed_df = form_transformed_df.drop(
    columns=[x for x in form_transformed_df.columns if x.__contains__("Unnamed")]
)

form_transformed_df["DATE"] = pd.to_datetime(form_transformed_df["DATE"]).astype(str)

form_transformed_df

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,6296,737,Cooper,Peach,Koopa Troopa Beach,1,3,2024-07-17 19:15:48,11
1,6296,737,Regan,Yoshi,Koopa Troopa Beach,2,3,2024-07-17 19:15:48,11
2,6296,737,Garrett,Toad,Koopa Troopa Beach,3,3,2024-07-17 19:15:48,11


In [7]:
sheet_name = "data_main"
sheet_id = os.environ["SHEET_ID"]

# Construct the Google Drive URL using `sheet_id` and `sheet_name`
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

df = pd.read_csv(url, engine="pyarrow", dtype={"DATE": str})

df

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON,SEASON_INDEX,INDEX_CHECK_1,BAD_UIDS,UID_COUNT,UID_REPAIR,INDEX_CHECK_2
0,1,1,Cole,Toad,Sherbet Land,4,4,2021-09-20,0,1,1,False,4.0,1.0,1.0
1,1,1,Connor,Yoshi,Sherbet Land,2,4,2021-09-20,0,1,0,False,4.0,1.0,0.0
2,1,1,Cooper,Peach,Sherbet Land,1,4,2021-09-20,0,1,0,False,4.0,1.0,0.0
3,1,1,Triston,Bowser,Sherbet Land,3,4,2021-09-20,0,1,0,False,4.0,1.0,0.0
4,2,1,Cole,Toad,Kalimari Desert,4,4,2021-09-20,0,2,1,False,4.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21663,6295,737,Cole,Yoshi,Bowser's Castle,2,3,2024-07-17,11,294,0,False,,,
21664,6295,737,Regan,Peach,Bowser's Castle,3,3,2024-07-17,11,294,0,False,,,
21665,6296,737,Cooper,Peach,Koopa Troopa Beach,1,3,2024-07-17,11,295,1,False,,,
21666,6296,737,Regan,Yoshi,Koopa Troopa Beach,2,3,2024-07-17,11,295,0,False,,,


In [8]:
df = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "UID",
        "SUID",
        "NAME",
        "CHARACTER",
        "MAP",
        "PLACE",
        "PLAYERS",
        "DATE",
        "SEASON",
    ],
)

df["DATE"] = pd.to_datetime(df["DATE"], format="ISO8601").astype(str)

df.tail()

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
21663,6295,737,Cole,Yoshi,Bowser's Castle,2,3,2024-07-17,11
21664,6295,737,Regan,Peach,Bowser's Castle,3,3,2024-07-17,11
21665,6296,737,Cooper,Peach,Koopa Troopa Beach,1,3,2024-07-17,11
21666,6296,737,Regan,Yoshi,Koopa Troopa Beach,2,3,2024-07-17,11
21667,6296,737,Garrett,Toad,Koopa Troopa Beach,3,3,2024-07-17,11


In [None]:
ddf = df[df[]]

In [10]:
wide = form_data_wide_to_long(form_df=form_df, mk_data_df=df, form_data_transform_df=form_transformed_df)

Season 11 Games Remaining: 250


In [11]:
wide

Unnamed: 0,UID,SUID,NAME,CHARACTER,MAP,PLACE,PLAYERS,DATE,SEASON
0,6296,737,Cooper,Peach,Koopa Troopa Beach,1,3,2024-07-17 19:15:48,11
1,6296,737,Regan,Yoshi,Koopa Troopa Beach,2,3,2024-07-17 19:15:48,11
2,6296,737,Garrett,Toad,Koopa Troopa Beach,3,3,2024-07-17 19:15:48,11
0,6297,737,Cooper,Luigi,Choco Mountain,1,4,2024-07-17 20:01:53,11
1,6297,737,Connor,Toad,Choco Mountain,2,4,2024-07-17 20:01:53,11
2,6297,737,Blake,Peach,Choco Mountain,3,4,2024-07-17 20:01:53,11
3,6297,737,Triston,Yoshi,Choco Mountain,4,4,2024-07-17 20:01:53,11
4,6298,737,Cole,Toad,D.K.'s Jungle,1,4,2024-07-17 20:15:00,11
5,6298,737,Regan,Yoshi,D.K.'s Jungle,2,4,2024-07-17 20:15:00,11
6,6298,737,Cooper,Mario,D.K.'s Jungle,3,4,2024-07-17 20:15:00,11


In [None]:
df.tail()

In [None]:
# last_uid = df.tail(1)["UID"].values[0]
# last_suid = df.tail(1)["SUID"].values[0]

# l = []

# for s in form_df["NEW_SESSION"]:
#     if s == "NO":
#         l.append(last_suid)
#     if s == "YES":
#         last_suid += 1
#         l.append(last_suid)

# form_df["UID"] = range(last_uid + 1, (last_uid + form_df.shape[0] + 1), 1)

# form_df["SUID"] = l


# two_p_df = form_df[form_df["PLAYERS"] == 2][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_2")],
#     ]
# ]

# three_p_df = form_df[form_df["PLAYERS"] == 3][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_3")],
#     ]
# ]

# four_p_df = form_df[form_df["PLAYERS"] == 4][
#     [
#         "UID",
#         "SUID",
#         "Timestamp",
#         "NEW_SESSION",
#         "MAP",
#         "PLAYERS",
#         *[x for x in form_df.columns if x.__contains__("_4")],
#     ]
# ]

# t = three_p_df.rename(
#     columns={
#         "PLAYERS_3 [1ST]": "NAME_1",
#         "PLAYERS_3 [2ND]": "NAME_2",
#         "PLAYERS_3 [3RD]": "NAME_3",
#         "PLAYERS_3 [4TH]": "NAME_4",
#         "CHARACTERS_3 [1ST]": "CHARACTER_1",
#         "CHARACTERS_3 [2ND]": "CHARACTER_2",
#         "CHARACTERS_3 [3RD]": "CHARACTER_3",
#         "CHARACTERS_3 [4TH]": "CHARACTER_4",
#     }
# )

# t

In [None]:
# pd.wide_to_long(
#     df=t,
#     stubnames=["CHARACTER", "NAME"],
#     i=["UID", "SUID", "Timestamp", "MAP"],
#     j="PLACE",
#     sep="_",
# ).reset_index()