In [1]:
import json
import graphviz
import polars as pl
import datetime as dt
from numpy import log

In [2]:
pl.Config.set_tbl_rows(20)
pl.Config.set_tbl_cols(20)
pl.Config.set_float_precision(2)


polars.config.Config

In [3]:
filepath: str = "../data/records05-29_23-57.json"
lf: pl.LazyFrame = pl.read_json(filepath).lazy()

In [4]:
lf: pl.LazyFrame = lf.with_columns(
            date=pl.col("date").cast(pl.Date),
            position=pl.col("position").cast(pl.UInt8),
            record_id=pl.arange(0, pl.len()).sort(descending=True),
            artists=pl.col("artist").str.split("Featuring").list.first().str.split("&"),
            features=pl.col("artist")
            .str.split("Featuring")
            .list.get(index=1, null_on_oob=True)
            .str.split("&"),
        ).filter(
            pl.col("date") >= dt.date(1960, 1, 1)
        ).select(
            [
                "record_id",
                "date",
                "position",
                "song",
                "artists",
                "features",
            ]
        )

In [5]:
maindf = lf.collect()

In [6]:
maindf

record_id,date,position,song,artists,features
i64,date,u8,str,list[str],list[str]
339286,2025-04-26,1,"""Luther""","[""Kendrick Lamar "", "" SZA""]",
339285,2025-04-26,2,"""Die With A Smile""","[""Lady Gaga "", "" Bruno Mars""]",
339284,2025-04-26,3,"""Nokia""","[""Drake""]",
339283,2025-04-26,4,"""Pink Pony Club""","[""Chappell Roan""]",
339282,2025-04-26,5,"""Ordinary""","[""Alex Warren""]",
339281,2025-04-26,6,"""A Bar Song (Tipsy)""","[""Shaboozey""]",
339280,2025-04-26,7,"""Lose Control""","[""Teddy Swims""]",
339279,2025-04-26,8,"""All The Way""","[""BigXthaPlug ""]","["" Bailey Zimmerman""]"
339278,2025-04-26,9,"""Beautiful Things""","[""Benson Boone""]",
339277,2025-04-26,10,"""I'm The Problem""","[""Morgan Wallen""]",


In [7]:
lf: pl.LazyFrame = maindf.lazy().group_by(["song", "artists", "features"]).agg(
    power_score=(1 / pl.col("position")).sum(),
    longevity_score=(1 / pl.col("position").log1p()).sum(),
    weeks_on_chart=pl.len(),
    proportion_top10=((pl.col("position") <= 10).sum() / pl.len()),
    earliest=pl.min("date"),
    latest=pl.max("date"),
)

In [None]:
lf: pl.LazyFrame = lf.with_columns(
    track_id=pl.arange(0, pl.len()).sort(descending=True),
    average_percentile=(pl.col("power_score").rank("ordinal") + pl.col("longevity_score").rank("ordinal")).truediv(2*pl.len()).mul(100),
    longevity_rating=(pl.col("longevity_score").rank("ordinal") / pl.len()).mul(100),
    power_rating=(pl.col("power_score").rank("ordinal") / pl.len()).mul(100),

).sort("average_percentile", descending=True).select(
    [
        "track_id",
        "song",
        "artists",
        "features",
        "average_percentile",
        "power_rating",
        "longevity_rating",
        "weeks_on_chart",
        "proportion_top10",
        "earliest",
        "latest"
    ]
)

In [9]:
lf.collect()

track_id,song,artists,features,average_percentile,power_rating,longevity_rating,weeks_on_chart,proportion_top10,earliest,latest
i64,str,list[str],list[str],f64,f64,f64,u32,f64,date,date
20519,"""A Bar Song (Tipsy)""","[""Shaboozey""]",,100.00,100.00,100.00,58,0.93,2024-04-27,2025-05-31
5018,"""All I Want For Christmas Is Yo…","[""Mariah Carey""]",,100.00,100.00,100.00,67,0.52,2000-01-08,2025-01-04
23654,"""As It Was""","[""Harry Styles""]",,99.99,99.99,99.99,61,0.62,2022-04-16,2023-06-10
11787,"""Last Night""","[""Morgan Wallen""]",,99.99,99.99,99.99,60,0.68,2023-02-11,2024-03-30
10778,"""Uptown Funk!""","[""Mark Ronson ""]","["" Bruno Mars""]",99.98,99.98,99.97,55,0.56,2014-11-29,2016-03-05
1851,"""Stay""","[""The Kid LAROI "", "" Justin Bieber""]",,99.98,99.98,99.98,62,0.69,2021-07-24,2022-10-01
1831,"""Old Town Road""","[""Lil Nas X ""]","["" Billy Ray Cyrus""]",99.98,99.99,99.97,44,0.59,2019-03-16,2020-01-18
9041,"""Shape Of You""","[""Ed Sheeran""]",,99.97,99.97,99.97,58,0.57,2017-01-28,2018-03-03
2079,"""Despacito""","[""Luis Fonsi "", "" Daddy Yankee ""]","["" Justin Bieber""]",99.97,99.98,99.96,51,0.49,2017-02-04,2018-01-20
21121,"""Closer""","[""The Chainsmokers ""]","["" Halsey""]",99.97,99.97,99.96,52,0.62,2016-08-20,2017-08-12


In [10]:
breaks = [1970, 1980, 1990, 2000, 2010, 2020]

In [11]:
lf: pl.LazyFrame = lf.with_columns(
    pl.col("earliest").dt.year().cut(breaks=breaks, labels=[f"{x-10}s" for x in breaks]+["2020s"], left_closed=True).alias("decade")
)

In [12]:
lf.collect()

track_id,song,artists,features,average_percentile,power_rating,longevity_rating,weeks_on_chart,proportion_top10,earliest,latest,decade
i64,str,list[str],list[str],f64,f64,f64,u32,f64,date,date,cat
20688,"""A Bar Song (Tipsy)""","[""Shaboozey""]",,100.00,100.00,100.00,58,0.93,2024-04-27,2025-05-31,"""2020s"""
734,"""All I Want For Christmas Is Yo…","[""Mariah Carey""]",,100.00,100.00,100.00,67,0.52,2000-01-08,2025-01-04,"""2000s"""
3981,"""As It Was""","[""Harry Styles""]",,99.99,99.99,99.99,61,0.62,2022-04-16,2023-06-10,"""2020s"""
29366,"""Last Night""","[""Morgan Wallen""]",,99.99,99.99,99.99,60,0.68,2023-02-11,2024-03-30,"""2020s"""
27860,"""Uptown Funk!""","[""Mark Ronson ""]","["" Bruno Mars""]",99.98,99.98,99.97,55,0.56,2014-11-29,2016-03-05,"""2010s"""
24140,"""Stay""","[""The Kid LAROI "", "" Justin Bieber""]",,99.98,99.98,99.98,62,0.69,2021-07-24,2022-10-01,"""2020s"""
14758,"""Old Town Road""","[""Lil Nas X ""]","["" Billy Ray Cyrus""]",99.98,99.99,99.97,44,0.59,2019-03-16,2020-01-18,"""2010s"""
23371,"""Shape Of You""","[""Ed Sheeran""]",,99.97,99.97,99.97,58,0.57,2017-01-28,2018-03-03,"""2010s"""
1842,"""Despacito""","[""Luis Fonsi "", "" Daddy Yankee ""]","["" Justin Bieber""]",99.97,99.98,99.96,51,0.49,2017-02-04,2018-01-20,"""2010s"""
28934,"""Closer""","[""The Chainsmokers ""]","["" Halsey""]",99.97,99.97,99.96,52,0.62,2016-08-20,2017-08-12,"""2010s"""


In [13]:
lf: pl.LazyFrame = (
    lf.group_by("decade")
    .agg(pl.all().top_k_by(by="average_percentile", k=10)).explode(pl.all().exclude("decade"))
)

In [14]:
lf.collect()

decade,track_id,song,artists,features,average_percentile,power_rating,longevity_rating,weeks_on_chart,proportion_top10,earliest,latest
cat,i64,str,list[str],list[str],f64,f64,f64,u32,f64,date,date
"""1960s""",3943,"""Rockin' Around The Christmas T…","[""Brenda Lee""]",,99.92,99.90,99.95,62,0.47,1960-12-10,2025-01-04
"""1960s""",2481,"""The Twist""","[""Chubby Checker""]",,99.73,99.72,99.74,40,0.65,1960-07-30,1962-04-07
"""1960s""",20953,"""Jingle Bell Rock""","[""Bobby Helms""]",,99.51,99.26,99.75,56,0.48,1960-12-10,2025-01-04
"""1960s""",11052,"""Hey Jude""","[""The Beatles""]",,99.43,99.71,99.16,19,0.74,1968-09-14,1969-01-18
"""1960s""",24994,"""The Theme From ""A Summer Place…","[""Percy Faith And His Orchestra""]",,99.31,99.61,99.02,21,0.57,1960-01-09,1960-05-28
"""1960s""",19449,"""Tossin' And Turnin'""","[""Bobby Lewis""]",,99.24,99.51,98.96,23,0.52,1961-04-22,1961-09-23
"""1960s""",6635,"""I Want To Hold Your Hand""","[""The Beatles""]",,98.93,99.46,98.41,15,0.80,1964-01-18,1964-04-25
"""1960s""",26165,"""Aquarius/Let The Sunshine In (…","[""The 5th Dimension""]",,98.83,99.33,98.33,17,0.65,1969-03-08,1969-06-28
"""1960s""",1829,"""Are You Lonesome To-night?""","[""Elvis Presley With The Jordanaires""]",,98.73,99.31,98.15,16,0.69,1960-11-12,1961-02-25
"""1960s""",16734,"""I Heard It Through The Grapevi…","[""Marvin Gaye""]",,98.67,99.29,98.05,15,0.73,1968-11-23,1969-03-01
