In [1]:
from pathlib import Path

import polars as pl

In [4]:
# Data dir
data_dir = (
    Path("../data/per-day-pkg-releases-metrics-one-year.parquet")
    .expanduser()
    .resolve()
    .absolute()
)

# Lazy read parquet files (all files in dir via glob)
lazy_frame = pl.scan_parquet(source=data_dir)

# Lowercase all column names
lazy_frame = lazy_frame.select(pl.all().reverse().name.to_lowercase())

# Rename "normalized_package_manager" to "ecosystem"
lazy_frame = lazy_frame.with_columns(
    pl.col("normalized_package_manager").alias("ecosystem")
)

In [5]:
# Counts from "major releases"
lazy_frame.filter(
    pl.col("ecosystem").is_in(["npm", "pypi", "rubygems"])
).filter(
    pl.col("release_type") != "unknown"
).filter(
    pl.col("release_type").eq("major")
).filter(
    pl.col("num_dependents_plus_180_days").gt(pl.lit(1))
).select(
    pl.col([
        "package_version",
        "num_dependents_on_release_date",
        "num_dependents_plus_90_days",
        "num_dependents_plus_180_days",
        "num_dependents_plus_360_days",
    ])
).collect().sample(10, seed=12)

package_version,num_dependents_on_release_date,num_dependents_plus_90_days,num_dependents_plus_180_days,num_dependents_plus_360_days
str,i64,i64,i64,i64
"""5.0.0""",1136,1149,1150,1149
"""4.0.0""",19,19,19,18
"""3.0.0""",7,7,7,6
"""2.0.0""",4,5,5,5
"""6.0.0""",1062,1085,1056,1028
"""1.0.0""",0,2,2,2
"""10.0.0""",199,196,203,200
"""65.0.0""",17,17,17,17
"""3.0.0""",769,0,796,829
"""7.0.0""",3,3,3,3


In [6]:
# Counts from "minor releases"
lazy_frame.filter(
    pl.col("ecosystem").is_in(["npm", "pypi", "rubygems"])
).filter(
    pl.col("release_type") != "unknown"
).filter(
    pl.col("release_type").eq("minor")
).filter(
    pl.col("num_dependents_plus_180_days").gt(pl.lit(1))
).select(
    pl.col([
        "package_version",
        "num_dependents_on_release_date",
        "num_dependents_plus_90_days",
        "num_dependents_plus_180_days",
        "num_dependents_plus_360_days",
    ])
).collect().sample(10, seed=12)

package_version,num_dependents_on_release_date,num_dependents_plus_90_days,num_dependents_plus_180_days,num_dependents_plus_360_days
str,i64,i64,i64,i64
"""4.37.0""",176,179,180,182
"""11.33.0""",4,4,4,6
"""2.6.0""",10,9,12,16
"""3.1.0""",68,69,69,73
"""8.1.0""",2047,2137,2191,2271
"""1.1.0""",2,4,4,6
"""14.2.0""",3,3,3,3
"""9.7.0""",475,491,495,499
"""1.6.0""",66,72,73,72
"""5.18.0""",389,392,391,397
