<a target="_blank" href="https://colab.research.google.com/github/gox6/colab-demos/blob/main/use-cases/getting-youtube-transcripts
.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

### Github stars
Resources:
* [Online tool: Start Explorer](https://emanuelef.github.io/daily-stars-explorer/#/)
* [Medium article: "Unmasking the GitHub Star Story: Track Daily Trends & Break the 40k Limit" by Emanuele Fumagalli](https://medium.com/@emafuma/how-to-get-full-history-of-github-stars-f03cc93183a7)
* [Github repo for Start Explorer Project](https://github.com/emanuelef/daily-stars-explorer)
* [Embedding plotly graph on Medium explained](https://jennifer-banks8585.medium.com/how-to-embed-interactive-plotly-visualizations-on-medium-blogs-710209f93bd)


In [1]:
# Installing Python packages quietly

!pip install --quiet \
  hvplot==0.10.0 \
  nbformat \
  polars \
  pyarrow \
  pyparsing \
  plotly \
  1> /dev/null

!pip install --upgrade nbformat 1> /dev/null


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Importing the packages
from getpass import getpass
import os

import plotly.io as pio
import plotly.express as px
import polars as pl
import hvplot.polars

# Configure plotly in VS Code
pio.renderers.default = "notebook"

In [3]:
polars_df = pl.read_csv(
    "https://gist.github.com/gox6/a66338e60103309cb8f215ae9568743e/raw/pola-rs_polars-stars-history.csv"
).with_columns(pl.lit("Polars").alias("package"))
pandas_df = pl.read_csv(
    "https://gist.github.com/gox6/699bd9fdb1f44ef88a8e4fd80b2e7ef9/raw/pandas-dev_pandas-stars-history.csv"
).with_columns(pl.lit("Pandas").alias("package"))

df = pl.concat([polars_df, pandas_df], how="vertical").with_columns(
    pl.col("date").str.to_date()
)
df = df.sort(by=["date"])

df

date,day-stars,total-stars,package
date,i64,i64,str
2010-08-24,539,539,"""Pandas"""
2010-08-25,1,540,"""Pandas"""
2010-08-26,1,541,"""Pandas"""
2010-08-27,0,541,"""Pandas"""
2010-08-28,0,541,"""Pandas"""
…,…,…,…
2024-05-05,7,42033,"""Pandas"""
2024-05-06,27,26415,"""Polars"""
2024-05-07,24,26439,"""Polars"""
2024-05-08,28,26467,"""Polars"""


In [4]:
df.describe()

statistic,date,day-stars,total-stars,package
str,str,f64,f64,str
"""count""","""6462""",6462.0,6462.0,"""6462"""
"""null_count""","""0""",0.0,0.0,"""0"""
"""mean""","""2018-08-04""",10.602755,13141.238471,
"""std""",,14.613174,12674.764827,
"""min""","""2010-08-24""",0.0,0.0,"""Pandas"""
"""25%""","""2015-01-25""",3.0,1953.0,
"""50%""","""2019-06-29""",8.0,7746.0,
"""75%""","""2022-02-20""",14.0,22931.0,
"""max""","""2024-05-09""",539.0,42033.0,"""Polars"""


In [9]:
fig = px.line(
    df,
    x="date",
    y="total-stars",
    color="package",
    color_discrete_map={"Pandas": "#e4048c", "Polars": "#107cff"},
)
fig.update_layout(
    {  "height": 630,
        "width": 1200,
        "plot_bgcolor": "rgba(0, 0, 0, 0)",
        "paper_bgcolor": "rgb(37, 50, 59)",  # dark blue background
        "legend": {
            "font": {"color": "white", "size": 25},
            "title_text": "",
            "yanchor": "top",
            "y": 0.8,
            "xanchor": "center",
            "x": 0.5,
        },  # white legend
        "title": {
            "text": "Total GitHub Stars in Time",
            "font": {"color": "white", "size": 40},  # white title
            "x": 0.5,  # center title
            "xanchor": "center",  # center title
        },
        "margin": {"l": 100, "r": 100, "t": 100, "b": 100},  # increase margins
    }
)
fig.update_xaxes(
    showgrid=False,
    showline=True,
    tickfont=dict(color="white", size=20),
    titlefont=dict(color="white", size=25),
    title=dict(text="Time"),
)
fig.update_yaxes(
    showgrid=False,
    showline=True,
    range=[0, max(df["total-stars"])],
    tickfont=dict(color="white", size=20),
    titlefont=dict(color="white", size=25),
    title=dict(text="GitHub Stars"),
)

for package in df["package"].unique().to_list():
    df_package = df.filter(pl.col("package") == package)
    last_row = df_package.tail(1).to_pandas().iloc[0]
    fig.add_annotation(
        x=last_row["date"],
        y=last_row["total-stars"],
        text=str(last_row["total-stars"]),
        showarrow=False,
        font=dict(color="white", size=20),
        yshift=15,
    )

fig.update_traces(line=dict(width=4))

fig.show()

In [12]:
from datetime import datetime
downloads = (
    pl.read_csv("https://gist.github.com/gox6/033254f79f0b0053029515af4fb298de/raw//pypi_data_on_downloads_of_pandas_and_polars.csv")
        .with_columns([pl.col("date").str.to_date(),
                       pl.col("project").replace({"polars": "Polars", "pandas": "Pandas"})])
        .filter(pl.col("date") >= datetime(2021, 1, 1))
        .rename({"project": "package"})
        .sort(by=["date"])
)
downloads


date,package,num_downloads
date,str,i64
2021-01-01,"""Pandas""",38813789
2021-02-01,"""Pandas""",39955590
2021-03-01,"""Pandas""",47419314
2021-03-01,"""Polars""",5049
2021-04-01,"""Pandas""",46373579
…,…,…
2024-03-01,"""Polars""",5253048
2024-04-01,"""Pandas""",213008389
2024-04-01,"""Polars""",7051666
2024-05-01,"""Pandas""",74802956


In [13]:
fig = px.line(
    downloads,
    x="date",
    y="num_downloads",
    color="package",
    color_discrete_map={"Pandas": "#e4048c", "Polars": "#107cff"},
)
fig.update_layout(
    {
        "plot_bgcolor": "rgba(0, 0, 0, 0)",
        "paper_bgcolor": "rgb(37, 50, 59)",  # dark blue background
        "legend": {
            "font": {"color": "white", "size": 25},
            "title_text": "",
            "yanchor": "top",
            "y": 0.8,
            "xanchor": "center",
            "x": 0.5,
        },  # white legend
        "title": {
            "text": "Total GitHub Stars in Time",
            "font": {"color": "white", "size": 40},  # white title
            "x": 0.5,  # center title
            "xanchor": "center",  # center title
        },
        "margin": {"l": 100, "r": 100, "t": 100, "b": 100},  # increase margins
    }
)
fig.update_xaxes(
    showgrid=False,
    showline=True,
    tickfont=dict(color="white", size=20),
    titlefont=dict(color="white", size=25),
    title=dict(text="Time"),
)
fig.update_yaxes(
    showgrid=False,
    showline=True,
    range=[0, max(downloads["num_downloads"])],
    tickfont=dict(color="white", size=20),
    titlefont=dict(color="white", size=25),
    title=dict(text="GitHub Stars"),
)

# # Add annotations for the last values
# for package in downloads["package"].unique().to_list():
#     df_package = df.filter(pl.col("package") == package)
#     last_row = df_package.tail(1).to_pandas().iloc[0]
#     fig.add_annotation(
#         x=last_row["date"],
#         y=last_row["num_downloads"],
#         text=str(last_row["num_downloads"]),
#         showarrow=False,
#         font=dict(color="white", size=20),
#         yshift=15,
#     )
fig.show()