In [1]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from multielo import MultiElo, Tracker
from mktools.get_data import load_data_pd
from mktools.validate_data import validate_bad_uids
from mktools.form_data import fill_new_session
import plotly.express as px
from alive_progress import alive_it
from bs4 import BeautifulSoup
from typing import Literal

# Load Variables from .env file
load_dotenv()

# LAST_FORM_DATA_DATE = pd.Timestamp('1/27/2025 23:40:20').tz_localize("US/Eastern").tz_convert("UTC")

# LAST_FORM_DATA_DATE

True

In [2]:
migrated_data = pd.read_csv(
    r"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data_valid.csv"
).drop(columns=["SUID_WINDOW_START", "SUID_WINDOW_END"])

migrated_data["TIMESTAMP"] = pd.to_datetime(
    migrated_data["TIMESTAMP"], utc=True
).dt.tz_convert("US/Eastern")

In [3]:
pd.Timestamp('2024-07-09 08:40:00').tz_localize("US/Eastern")

Timestamp('2024-07-09 08:40:00-0400', tz='US/Eastern')

In [4]:
pd.Timestamp("2025-01-29 03:58:15").tz_localize("US/Eastern")

Timestamp('2025-01-29 03:58:15-0500', tz='US/Eastern')

In [5]:
# # Temporarily remove bad data can remove later
# migrated_data = migrated_data[
#     ~migrated_data["TIMESTAMP"].isin(
#         [
#             pd.Timestamp("2024-07-09 08:40:00").tz_localize("US/Eastern"),
#             pd.Timestamp("2024-07-09 08:35:00").tz_localize("US/Eastern"),
#         ]
#     )
# ]

In [6]:
LAST_FORM_DATA_DATE = migrated_data["TIMESTAMP"].max()

LAST_FORM_DATA_DATE

Timestamp('2025-02-28 20:31:11-0500', tz='US/Eastern')

In [7]:
form_df = load_data_pd(sheet_name="form_data", sheet_id=os.environ["SHEET_ID"])

form_df = form_df.drop(
    columns=[
        x
        for x in form_df.columns
        if x.__contains__("Unnamed") or x.__contains__("Score")
    ]
)

form_df["Timestamp"] = pd.to_datetime(form_df["Timestamp"]).dt.tz_localize("US/Eastern")

form_df.columns = [
    x.replace(" ", "_").replace("[", "").replace("]", "") if x.__contains__("[") else x
    for x in form_df.columns
]

In [8]:
new_df = (
    form_df[form_df["Timestamp"] > LAST_FORM_DATA_DATE].copy().reset_index(drop=True)
)

new_df

Unnamed: 0,Timestamp,NEW_SESSION,MAP,PLAYERS,PLAYERS_2_1ST,PLAYERS_2_2ND,CHARACTERS_2_1ST,CHARACTERS_2_2ND,PLAYERS_3_1ST,PLAYERS_3_2ND,...,CHARACTERS_3_2ND,CHARACTERS_3_3RD,PLAYERS_4_1ST,PLAYERS_4_2ND,PLAYERS_4_3RD,PLAYERS_4_4TH,CHARACTERS_4_1ST,CHARACTERS_4_2ND,CHARACTERS_4_3RD,CHARACTERS_4_4TH
0,2025-02-28 20:59:49-05:00,NO,Royal Raceway,4,,,,,,,...,,,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach


In [9]:
if new_df.shape[0] == 0:
    raise IndexError("No new records to update")

In [10]:
def transform_form_data(
    df: pd.DataFrame, game_type_replace_string: Literal["_2_", "_3_", "_4_"]
) -> pd.DataFrame:

    if df.shape[0] > 0:
        idf = df.copy()

        idf_out = idf.dropna(axis=1, how="all").reset_index(drop=True)

        idf_out.columns = [
            (
                x.replace(game_type_replace_string, "_")
                if x.__contains__(game_type_replace_string)
                else x.upper()
            )
            for x in idf_out.columns
        ]

        idf_out["TIMESTAMP"] = pd.to_datetime(idf_out["TIMESTAMP"])

        return idf_out
    else:
        return pd.DataFrame()

In [11]:
dfs = []

for game_type in [2, 3, 4]:
    tdf = new_df[new_df["PLAYERS"] == game_type].copy().reset_index(drop=True)

    dfs.append(tdf)

two_p = dfs[0]
three_p = dfs[1]
four_p = dfs[2]

two_p_out = transform_form_data(df=two_p, game_type_replace_string="_2_")
three_p_out = transform_form_data(df=three_p, game_type_replace_string="_3_")
four_p_out = transform_form_data(df=four_p, game_type_replace_string="_4_")

cat_df = (
    pd.concat([two_p_out, three_p_out, four_p_out])
    .sort_values(by="TIMESTAMP")
    .reset_index(drop=True)
    .reset_index()
)[
    [
        "TIMESTAMP",
        "NEW_SESSION",
        "MAP",
        "PLAYERS",
        "PLAYERS_1ST",
        "PLAYERS_2ND",
        "PLAYERS_3RD",
        "PLAYERS_4TH",
        "CHARACTERS_1ST",
        "CHARACTERS_2ND",
        "CHARACTERS_3RD",
        "CHARACTERS_4TH",
    ]
]

cat_df

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH
0,2025-02-28 20:59:49-05:00,NO,Royal Raceway,4,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach


In [12]:
season_initial = load_data_pd(
    sheet_name="data_main",
    sheet_id=os.environ["SHEET_ID"],
    usecols=[
        "DATE",
        "SEASON",
    ],
)

season_initial["DATE"] = pd.to_datetime(season_initial["DATE"]).dt.tz_localize(
    "US/Eastern"
)

season_gb = (
    season_initial[season_initial["DATE"] > LAST_FORM_DATA_DATE]
    .groupby(["DATE"])[["SEASON"]]
    .first()
    .reset_index()
    .rename(columns={"DATE": "TIMESTAMP"})
)

season_gb.tail()

Unnamed: 0,TIMESTAMP,SEASON
0,2025-02-28 20:59:49-05:00,16


In [13]:
season_gb

Unnamed: 0,TIMESTAMP,SEASON
0,2025-02-28 20:59:49-05:00,16


In [14]:
new_data_ready = pd.merge(
    cat_df, season_gb, on="TIMESTAMP", how="inner", validate="1:1"
)

# Double check that no records were dropped in the merge
assert new_data_ready.shape[0] == cat_df.shape[0]

In [15]:
cat_df

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH
0,2025-02-28 20:59:49-05:00,NO,Royal Raceway,4,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach


In [16]:
new_data_ready

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2025-02-28 20:59:49-05:00,NO,Royal Raceway,4,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach,16


In [17]:
migrated_data.tail()

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
2991,2025-02-28 19:20:29-05:00,NO,253,Sherbet Land,4,Cole,Connor,Garrett,Antonio,Bowser,Yoshi,Peach,Toad,16
2992,2025-02-28 19:34:51-05:00,NO,253,Yoshi Valley,4,Cooper,Cole,Antonio,Garrett,Mario,Peach,Yoshi,Toad,16
2993,2025-02-28 19:40:52-05:00,NO,253,Bowser's Castle,4,Antonio,Blake,Colton,Connor,Bowser,Yoshi,Peach,Toad,16
2994,2025-02-28 19:59:42-05:00,NO,253,D.K.'s Jungle,4,Cooper,Blake,Colton,Cole,Toad,Yoshi,Bowser,Peach,16
2995,2025-02-28 20:31:11-05:00,NO,253,Kalimari Desert,4,Blake,Connor,Colton,Cooper,Toad,Yoshi,Peach,Luigi,16


In [18]:
new_data_ready.head()

Unnamed: 0,TIMESTAMP,NEW_SESSION,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2025-02-28 20:59:49-05:00,NO,Royal Raceway,4,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach,16


In [19]:
data_concat = (
    pd.concat([migrated_data, new_data_ready])
    .sort_values(by="TIMESTAMP")
    .reset_index(drop=True)
)

data_concat

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON
0,2024-06-21 08:00:00-04:00,YES,1.0,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11
1,2024-06-21 08:05:00-04:00,NO,1.0,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11
2,2024-06-21 08:10:00-04:00,NO,1.0,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11
3,2024-06-21 08:15:00-04:00,NO,1.0,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11
4,2024-06-21 08:20:00-04:00,NO,1.0,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2992,2025-02-28 19:34:51-05:00,NO,253.0,Yoshi Valley,4,Cooper,Cole,Antonio,Garrett,Mario,Peach,Yoshi,Toad,16
2993,2025-02-28 19:40:52-05:00,NO,253.0,Bowser's Castle,4,Antonio,Blake,Colton,Connor,Bowser,Yoshi,Peach,Toad,16
2994,2025-02-28 19:59:42-05:00,NO,253.0,D.K.'s Jungle,4,Cooper,Blake,Colton,Cole,Toad,Yoshi,Bowser,Peach,16
2995,2025-02-28 20:31:11-05:00,NO,253.0,Kalimari Desert,4,Blake,Connor,Colton,Cooper,Toad,Yoshi,Peach,Luigi,16


In [20]:
assert data_concat["TIMESTAMP"].is_unique
assert data_concat["TIMESTAMP"].is_monotonic_increasing

In [21]:
data_concat_filled = fill_new_session(
    df=data_concat, timestamp_column_name="TIMESTAMP", drop_window_start_column=False
)

# # Convert back to EST for Postgres
# data_concat_filled["TIMESTAMP"] = data_concat_filled["TIMESTAMP"].dt.tz_convert(
#     "US/Eastern"
# )

data_concat_filled

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,window_start,window_end
0,2024-06-21 08:00:00-04:00,YES,1.0,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
1,2024-06-21 08:05:00-04:00,NO,1.0,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
2,2024-06-21 08:10:00-04:00,NO,1.0,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
3,2024-06-21 08:15:00-04:00,NO,1.0,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
4,2024-06-21 08:20:00-04:00,NO,1.0,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2992,2025-02-28 19:34:51-05:00,NO,253.0,Yoshi Valley,4,Cooper,Cole,Antonio,Garrett,Mario,Peach,Yoshi,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2993,2025-02-28 19:40:52-05:00,NO,253.0,Bowser's Castle,4,Antonio,Blake,Colton,Connor,Bowser,Yoshi,Peach,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2994,2025-02-28 19:59:42-05:00,NO,253.0,D.K.'s Jungle,4,Cooper,Blake,Colton,Cole,Toad,Yoshi,Bowser,Peach,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2995,2025-02-28 20:31:11-05:00,NO,253.0,Kalimari Desert,4,Blake,Connor,Colton,Cooper,Toad,Yoshi,Peach,Luigi,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00


In [22]:
new_session_df = data_concat_filled.copy()

new_session_df["temp_session"] = np.where(new_session_df["NEW_SESSION"] == "YES", 1, 0)

new_session_df["SUID"] = new_session_df["temp_session"].cumsum()

In [23]:
new_session_df

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,window_start,window_end,temp_session
0,2024-06-21 08:00:00-04:00,YES,1,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00,1
1,2024-06-21 08:05:00-04:00,NO,1,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00,0
2,2024-06-21 08:10:00-04:00,NO,1,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00,0
3,2024-06-21 08:15:00-04:00,NO,1,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00,0
4,2024-06-21 08:20:00-04:00,NO,1,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2992,2025-02-28 19:34:51-05:00,NO,253,Yoshi Valley,4,Cooper,Cole,Antonio,Garrett,Mario,Peach,Yoshi,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00,0
2993,2025-02-28 19:40:52-05:00,NO,253,Bowser's Castle,4,Antonio,Blake,Colton,Connor,Bowser,Yoshi,Peach,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00,0
2994,2025-02-28 19:59:42-05:00,NO,253,D.K.'s Jungle,4,Cooper,Blake,Colton,Cole,Toad,Yoshi,Bowser,Peach,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00,0
2995,2025-02-28 20:31:11-05:00,NO,253,Kalimari Desert,4,Blake,Connor,Colton,Cooper,Toad,Yoshi,Peach,Luigi,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00,0


In [24]:
temp_df = new_session_df.copy()

holder = []

for suid in temp_df["SUID"].unique():

    temp = temp_df[temp_df["SUID"] == suid].copy()

    temp_first_game = temp[temp["NEW_SESSION"] == "YES"].copy().reset_index(drop=True)

    temp["window_start"] = temp_first_game["window_start"][0]
    temp["window_end"] = (
        temp_first_game["window_start"][0]
        + pd.Timedelta(days=1)
        - pd.Timedelta(nanoseconds=1)
    )

    holder.append(temp)

temp_out = pd.concat(holder).sort_values(by=["TIMESTAMP"]).reset_index(drop=True)


temp_out.tail(20)

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,window_start,window_end,temp_session
2977,2025-02-27 19:07:45-05:00,NO,252,Bowser's Castle,4,Cole,Luke,Cooper,Colton,Yoshi,Peach,Toad,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2978,2025-02-27 19:30:53-05:00,NO,252,Frappe Snowland,4,Cole,Blake,Domingo,Graber,Peach,Yoshi,Toad,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2979,2025-02-27 20:18:46-05:00,NO,252,Wario Stadium,4,Cooper,Colton,Antonio,Cole,Luigi,Peach,Toad,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2980,2025-02-27 21:38:39-05:00,NO,252,Choco Mountain,4,Cooper,Antonio,Colton,Konnor,D.K.,Peach,Yoshi,Toad,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2981,2025-02-27 23:15:22-05:00,NO,252,Banshee Boardwalk,3,Antonio,Colton,Cooper,,Peach,Mario,Yoshi,,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2982,2025-02-28 00:27:17-05:00,NO,252,Yoshi Valley,4,Cooper,Connor,Cole,Domingo,Yoshi,Toad,Peach,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2983,2025-02-28 00:46:18-05:00,NO,252,Frappe Snowland,4,Antonio,Domingo,Blake,Randy,Peach,Toad,Bowser,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2984,2025-02-28 01:13:24-05:00,NO,252,Toad's Turnpike,4,Regan,Cole,Cooper,Blake,Toad,Peach,Mario,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2985,2025-02-28 01:30:23-05:00,NO,252,Frappe Snowland,4,Regan,Antonio,Domingo,Blake,Yoshi,Peach,Bowser,Toad,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0
2986,2025-02-28 01:59:10-05:00,NO,252,Kalimari Desert,4,Antonio,Cooper,Regan,Domingo,Toad,Mario,Peach,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00,0


In [25]:
out_df = (
    temp_out[
        [
            "TIMESTAMP",
            "NEW_SESSION",
            "SUID",
            "MAP",
            "PLAYERS",
            "PLAYERS_1ST",
            "PLAYERS_2ND",
            "PLAYERS_3RD",
            "PLAYERS_4TH",
            "CHARACTERS_1ST",
            "CHARACTERS_2ND",
            "CHARACTERS_3RD",
            "CHARACTERS_4TH",
            "SEASON",
            "window_start",
            "window_end",
        ]
    ]
    .copy()
    .rename(
        columns={
            "window_start": "SUID_WINDOW_START",
            "window_end": "SUID_WINDOW_END",
        }
    )
)

out_df

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,SUID_WINDOW_START,SUID_WINDOW_END
0,2024-06-21 08:00:00-04:00,YES,1,Wario Stadium,4,Blake,Cooper,Matt,Garrett,Toad,Bowser,Yoshi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
1,2024-06-21 08:05:00-04:00,NO,1,D.K.'s Jungle,4,Cooper,Blake,Matt,Domingo,Mario,Peach,Yoshi,Toad,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
2,2024-06-21 08:10:00-04:00,NO,1,Sherbet Land,4,Cooper,Blake,Colton,Garrett,Yoshi,Toad,Bowser,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
3,2024-06-21 08:15:00-04:00,NO,1,Koopa Troopa Beach,4,Blake,Cooper,Matt,Cole,Yoshi,Toad,Luigi,Peach,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
4,2024-06-21 08:20:00-04:00,NO,1,Yoshi Valley,4,Cooper,Blake,Matt,Cole,Toad,Peach,Luigi,Yoshi,11,2024-06-21 07:00:00-04:00,2024-06-22 06:59:59.999999999-04:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2992,2025-02-28 19:34:51-05:00,NO,253,Yoshi Valley,4,Cooper,Cole,Antonio,Garrett,Mario,Peach,Yoshi,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2993,2025-02-28 19:40:52-05:00,NO,253,Bowser's Castle,4,Antonio,Blake,Colton,Connor,Bowser,Yoshi,Peach,Toad,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2994,2025-02-28 19:59:42-05:00,NO,253,D.K.'s Jungle,4,Cooper,Blake,Colton,Cole,Toad,Yoshi,Bowser,Peach,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00
2995,2025-02-28 20:31:11-05:00,NO,253,Kalimari Desert,4,Blake,Connor,Colton,Cooper,Toad,Yoshi,Peach,Luigi,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00


In [28]:
out_df.tail(20)

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,SUID_WINDOW_START,SUID_WINDOW_END
2977,2025-02-27 19:07:45-05:00,NO,252,Bowser's Castle,4,Cole,Luke,Cooper,Colton,Yoshi,Peach,Toad,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2978,2025-02-27 19:30:53-05:00,NO,252,Frappe Snowland,4,Cole,Blake,Domingo,Graber,Peach,Yoshi,Toad,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2979,2025-02-27 20:18:46-05:00,NO,252,Wario Stadium,4,Cooper,Colton,Antonio,Cole,Luigi,Peach,Toad,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2980,2025-02-27 21:38:39-05:00,NO,252,Choco Mountain,4,Cooper,Antonio,Colton,Konnor,D.K.,Peach,Yoshi,Toad,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2981,2025-02-27 23:15:22-05:00,NO,252,Banshee Boardwalk,3,Antonio,Colton,Cooper,,Peach,Mario,Yoshi,,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2982,2025-02-28 00:27:17-05:00,NO,252,Yoshi Valley,4,Cooper,Connor,Cole,Domingo,Yoshi,Toad,Peach,Bowser,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2983,2025-02-28 00:46:18-05:00,NO,252,Frappe Snowland,4,Antonio,Domingo,Blake,Randy,Peach,Toad,Bowser,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2984,2025-02-28 01:13:24-05:00,NO,252,Toad's Turnpike,4,Regan,Cole,Cooper,Blake,Toad,Peach,Mario,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2985,2025-02-28 01:30:23-05:00,NO,252,Frappe Snowland,4,Regan,Antonio,Domingo,Blake,Yoshi,Peach,Bowser,Toad,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00
2986,2025-02-28 01:59:10-05:00,NO,252,Kalimari Desert,4,Antonio,Cooper,Regan,Domingo,Toad,Mario,Peach,Yoshi,16,2025-02-27 07:00:00-05:00,2025-02-28 06:59:59.999999999-05:00


In [29]:
new_records = out_df[out_df["TIMESTAMP"].isin(new_df["Timestamp"])].reset_index(drop=True)

new_records

Unnamed: 0,TIMESTAMP,NEW_SESSION,SUID,MAP,PLAYERS,PLAYERS_1ST,PLAYERS_2ND,PLAYERS_3RD,PLAYERS_4TH,CHARACTERS_1ST,CHARACTERS_2ND,CHARACTERS_3RD,CHARACTERS_4TH,SEASON,SUID_WINDOW_START,SUID_WINDOW_END
0,2025-02-28 20:59:49-05:00,NO,253,Royal Raceway,4,Cooper,Cole,Connor,Garrett,Bowser,Yoshi,Toad,Peach,16,2025-02-28 07:00:00-05:00,2025-03-01 06:59:59.999999999-05:00


In [30]:
out_df.to_csv(
    rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data_valid.csv",
    index=False,
)

new_records.to_csv(
    rf"C:\Users\Cooper\sandbox\mkstream\form_data_migration\form_data_valid_new_records.csv",
    index=False,
)