In [1]:
import os
import numpy as np
import polars as pl
import pandas as pd
from plotly import express as px
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
def load_data(sheet_name: str, sheet_id: str) -> pl.DataFrame:
    """Load specified sheet from Google Drive.

    Args:
        sheet_name (str): Name of the table to be loaded
        sheet_id (str): Google Drive Sheets ID Unique value stored in `.env` file.
        Environment variable is accessible with `dotenv.load_dotenv()` and `os.environ["<YOUR_SHEET_ID>"]`.

    Returns:
        pl.DataFrame: Polars DataFrame loaded from the specified Google Drive sheet.
    """

    # Construct the Google Drive URL using `sheet_id` and `sheet_name`
    url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

    # Load the data from the URL as a Polars DataFrame
    # Fill Null values with `np.nan`
    df = pl.read_csv(
        url,
        columns=["UID", "SUID", "NAME", "CHARACTER", "MAP", "PLACE", "PLAYERS", "DATE"],
    ).fill_nan(np.nan)

    return df


def load_data_pd(sheet_name: str, sheet_id: str) -> pd.DataFrame:
    """Load specified sheet from Google Drive.

    Args:
        sheet_name (str): Name of the table to be loaded
        sheet_id (str): Google Drive Sheets ID Unique value stored in `.env` file.
        Environment variable is accessible with `dotenv.load_dotenv()` and `os.environ["<YOUR_SHEET_ID>"]`.

    Returns:
        pd.DataFrame: Pandas DataFrame loaded from the specified Google Drive sheet.
    """

    # Construct the Google Drive URL using `sheet_id` and `sheet_name`
    url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

    # Load the data from the URL as a Polars DataFrame
    # Fill Null values with `np.nan`
    df = pd.read_csv(
        url,
        usecols=[
            "UID",
            "SUID",
            "NAME",
            "CHARACTER",
            "MAP",
            "PLACE",
            "PLAYERS",
            "DATE",
            "SEASON",
        ],
    ).fillna(np.nan)

    return df

In [4]:
# %%timeit
panda_df = load_data_pd(sheet_name="data", sheet_id=os.environ["SHEET_ID"])

In [20]:
unique_seasons = (
    pd.Series(panda_df["SEASON"].unique()).sort_values(ascending=False).values.tolist()
)

unique_seasons

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

In [16]:
new_season_gb = (
    panda_df[panda_df["SEASON"] == panda_df["SEASON"].max()]
    .groupby(["NAME"])
    .agg(
        PLACE=pd.NamedAgg(column="PLACE", aggfunc="mean"),
        GAMES_PLAYED=pd.NamedAgg(column="NAME", aggfunc="count"),
    )
)

new_season_gb

Unnamed: 0_level_0,PLACE,GAMES_PLAYED
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
Blake,2.386364,44
Cole,2.272727,22
Connor,2.444444,27
Cooper,1.73494,83
Domingo,3.419355,31
Garrett,3.666667,9
Kieran,4.0,2
Konnor,3.6,5
Luke,2.5,2
Matt,2.333333,27


In [12]:
# %%timeit
# polar_df = load_data(sheet_name="data", sheet_id=os.environ["SHEET_ID"])

1.09 s ± 61.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
panda_df["NAME"].value_counts().index.tolist()

['Blake',
 'Cooper',
 'Connor',
 'Cole',
 'Regan',
 'Matt',
 'Triston',
 'Jake',
 'Graber',
 'Anthony',
 'Luke',
 'Colton',
 'Chandler',
 'Robert',
 'Domingo',
 'Miller',
 'Ben',
 'Sam',
 'Konnor',
 'Joey',
 'Randy',
 'Andrew',
 'Caskey',
 'Chloe',
 'Hughes',
 'Kayla',
 'Austin',
 'Martin',
 'Coop W',
 'Mikey',
 'Garrett',
 'Justin',
 'Duncan P',
 'Mitch',
 'Billy',
 'Kieran',
 'Duncan P ']

In [11]:
names = df["NAME"].value_counts().sort(by="count", descending=True)["NAME"]


names

NAME
str
"""Blake"""
"""Cooper"""
"""Connor"""
"""Cole"""
"""Regan"""
…
"""Duncan P"""
"""Mitch"""
"""Kieran"""
"""Billy"""


In [None]:
px.histogram(data_frame=df, x="NAME").update_xaxes(categoryorder="total descending")

In [14]:
names_unique = (
    df["NAME"].value_counts().sort(by="count", descending=True)["NAME"].to_list()
)
map_names_unique = (
    df["MAP"].value_counts().sort(by="count", descending=True)["MAP"].to_list()
)
char_names_unique = (
    df["CHARACTER"]
    .value_counts()
    .sort(by="count", descending=True)["CHARACTER"]
    .to_list()
)

last_suid = df.select(pl.last("SUID")).item()
last_uid = df.select(pl.last("UID")).item()

In [13]:
names_unique

['Blake',
 'Cooper',
 'Connor',
 'Cole',
 'Regan',
 'Matt',
 'Triston',
 'Jake',
 'Graber',
 'Anthony',
 'Luke',
 'Colton',
 'Chandler',
 'Robert',
 'Domingo',
 'Miller',
 'Ben',
 'Sam',
 'Konnor',
 'Joey',
 'Randy',
 'Andrew',
 'Caskey',
 'Chloe',
 'Hughes',
 'Austin',
 'Kayla',
 'Martin',
 'Mikey',
 'Coop W',
 'Garrett',
 'Justin',
 'Duncan P',
 'Mitch',
 'Kieran',
 'Billy',
 'Duncan P ']

In [15]:
df.select(pl.last("SUID")).item()

7

In [None]:
last_suid = df.select(pl.last("SUID")).item()
last_uid = df.select(pl.last("UID")).item()
last_season = df.select(pl.last("SEASON")).item()

In [None]:
df.filter(pl.col("SEASON") == (last_season - 1)).select(pl.tail(df.columns, 1))

In [None]:
# idx = df[df["SEASON"] == (last_season - 1)]
# print(idx)

In [None]:
datetime.now().strftime("%Y-%m-%d")

In [16]:
players_write = ["Cooper", "Blake", "Connor", "Dom"]
characters_write = ["Yoshi", "Toad", "Peach", "Mario"]

new_data = pl.DataFrame(
    {
        "NAME": players_write,
        "CHARACTER": characters_write,
        "DATE": datetime.now().strftime("%Y-%m-%d"),
    }
)

new_data = new_data.with_columns(
    PLACE=pl.int_range(start=1, end=(1 + new_data.shape[0]), step=1)
)

new_data = new_data.with_columns(PLAYERS=new_data.shape[0])

new_data = new_data[["NAME", "CHARACTER", "PLACE", "PLAYERS", "DATE"]]

nd = df[["NAME", "CHARACTER", "PLACE", "PLAYERS", "DATE"]]

concat_data = pl.concat(items=[new_data, new_data], how="vertical_relaxed")

concat_data

NAME,CHARACTER,PLACE,PLAYERS,DATE
str,str,i64,i32,str
"""Cooper""","""Yoshi""",1,4,"""2024-05-10"""
"""Blake""","""Toad""",2,4,"""2024-05-10"""
"""Connor""","""Peach""",3,4,"""2024-05-10"""
"""Dom""","""Mario""",4,4,"""2024-05-10"""
"""Cooper""","""Yoshi""",1,4,"""2024-05-10"""
"""Blake""","""Toad""",2,4,"""2024-05-10"""
"""Connor""","""Peach""",3,4,"""2024-05-10"""
"""Dom""","""Mario""",4,4,"""2024-05-10"""


In [22]:
cat_df = concat_data.to_pandas()

concat_data.to_pandas().values.tolist()

[['Cooper', 'Yoshi', 1, 4, '2024-05-10'],
 ['Blake', 'Toad', 2, 4, '2024-05-10'],
 ['Connor', 'Peach', 3, 4, '2024-05-10'],
 ['Dom', 'Mario', 4, 4, '2024-05-10'],
 ['Cooper', 'Yoshi', 1, 4, '2024-05-10'],
 ['Blake', 'Toad', 2, 4, '2024-05-10'],
 ['Connor', 'Peach', 3, 4, '2024-05-10'],
 ['Dom', 'Mario', 4, 4, '2024-05-10']]

In [18]:
concat_data.rows()

[('Cooper', 'Yoshi', 1, 4, '2024-05-10'),
 ('Blake', 'Toad', 2, 4, '2024-05-10'),
 ('Connor', 'Peach', 3, 4, '2024-05-10'),
 ('Dom', 'Mario', 4, 4, '2024-05-10'),
 ('Cooper', 'Yoshi', 1, 4, '2024-05-10'),
 ('Blake', 'Toad', 2, 4, '2024-05-10'),
 ('Connor', 'Peach', 3, 4, '2024-05-10'),
 ('Dom', 'Mario', 4, 4, '2024-05-10')]

In [24]:
concat_data.shape[0]

8

In [29]:
type(new_data["PLAYERS"].len())

int

In [26]:
type(concat_data.shape)

bytes

In [19]:
import gspread as gs

gc = gs.service_account()
sheet_id = os.environ["SHEET_ID"]
sheet_name = "data"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
sh = gc.open_by_url(url)
ws = sh.worksheet(title="data")

In [20]:
ws

<Worksheet 'data' id:0>

In [23]:
ws.append_rows(values=concat_data.to_pandas().values.tolist())

{'spreadsheetId': '1wM-5mZqL85VQnhb-W5pV5XIcp53gpE37vrkN_psHiJw',
 'tableRange': 'data!A1:N18818',
 'updates': {'spreadsheetId': '1wM-5mZqL85VQnhb-W5pV5XIcp53gpE37vrkN_psHiJw',
  'updatedRange': 'data!A18819:E18826',
  'updatedRows': 8,
  'updatedColumns': 5,
  'updatedCells': 40}}