In [1]:
# Auto update impots
%load_ext autoreload
%autoreload 2

## Daily Back Fill

In [None]:
from op_analytics.datasources.github.dataaccess import Github
prs_df = Github.PRS.read_polars()
comments_df = Github.PR_COMMENTS.read_polars()
reviews_df = Github.PR_REVIEWS.read_polars()

In [None]:
# print the schemas and the names of the dataframes
print("Pull Requests Schema:")
print(prs_df.schema)
print("\nPR Comments Schema:")
print(comments_df.schema) 
print("\nPR Reviews Schema:")
print(reviews_df.schema)

In [4]:
# Assert all max dates of the 3 datasets are the same
assert prs_df["dt"].max() == comments_df["dt"].max() == reviews_df["dt"].max(), "Max dates should be equal across all datasets"

In [None]:
from op_analytics.datasources.github.metrics.compute import compute_all_metrics
detailed_metrics = compute_all_metrics(
    prs_df=prs_df,
    comments_df=comments_df,
    reviews_df=reviews_df,
)
detailed_metrics

## Visualizations

In [None]:
import polars as pl
import plotly.express as px

def plot_repo_metric_over_time(
    combined_df: pl.DataFrame,
    repo_name: str,
    metric: str,
    x_axis: str = "period_start",
    markers: bool = True
) -> None:
    """
    Plot a single metric for one repo, across all rolling timeframes (e.g. 'rolling_week',
    'rolling_month', etc.), in a single timeseries chart.

    Args:
        combined_df: Polars DataFrame containing rows for multiple period_type 
                     (e.g. 'rolling_week', 'rolling_month', etc.). 
                     Must have columns:
                       - 'repo' (String)
                       - 'period_start' (Datetime)
                       - 'period_type' (String; e.g. 'rolling_30d' or 'rolling_week')
                       - <metric> (the numeric column to plot)
        repo_name: The repo to filter on (e.g. 'optimism', 'supersim', etc.)
        metric: The numeric column to plot (e.g. 'new_prs', 'total_comments')
        x_axis: Column name to use for the time axis (default: 'period_start')
        markers: Whether to show markers on the line chart (default: True)
    """
    # 1) Filter for the selected repo
    df_pd = (
        combined_df
        .filter(pl.col("repo") == repo_name)
        .sort([x_axis, "period_type"])
        .select(["period_type", x_axis, metric])
        .to_pandas()
    )

    # 2) Plot a single figure with color='period_type'
    fig = px.line(
        df_pd,
        x=x_axis,
        y=metric,
        color="period_type",
        title=f"{metric} over time for '{repo_name}' by all timeframes",
        markers=markers
    )

    fig.update_layout(
        xaxis_title=x_axis,
        yaxis_title=metric,
        legend_title="Timeframe"
    )
    fig.show()

plot_repo_metric_over_time(
    combined_df=detailed_metrics,
    repo_name="optimism",
    metric="stale_ratio",
    x_axis="period_end",
    markers=True
)