In [23]:
import polars as pl

In [24]:
pl.Config.set_fmt_str_lengths(40)

polars.config.Config

In [25]:
playlist = (
    pl.read_json("spotify-data/Playlist1.json")
    .explode("playlists")
    .unnest("playlists")
    .explode("items")
    .unnest("items")
    .unnest("track")
    .filter(
        pl.col("episode").is_not_null().not_(),  # drop episodes
    )
    .drop("episode", "localTrack", "audiobook", "description", "numberOfFollowers")
)
print(playlist.shape)
playlist.head()

(1150, 7)


name,lastModifiedDate,trackName,artistName,albumName,trackUri,addedDate
str,str,str,str,str,str,str
"""BANOOK""","""2024-01-26""","""Jesus to a Child""","""George Michael""","""Older""","""spotify:track:2SzCxX6M6vDwdEwnHDiTaY""","""2023-07-18"""
"""BANOOK""","""2024-01-26""","""O Willow Waly""","""Ashley Serena""","""O Willow Waly""","""spotify:track:6lI5gSYb2glYqp6SSCdN82""","""2023-07-18"""
"""BANOOK""","""2024-01-26""","""My Confession""","""Josh Groban""","""Closer""","""spotify:track:1jOLSsbFpAQhqMX65cbu2p""","""2023-07-18"""
"""BANOOK""","""2024-01-26""","""A Different Corner""","""George Michael""","""Ladies And Gentlemen... The Best Of Geor…","""spotify:track:6khoye7kysa6cLGY0AO9dO""","""2023-07-18"""
"""BANOOK""","""2024-01-26""","""Cry Me a River""","""Justin Timberlake""","""Justified""","""spotify:track:7Lf7oSEVdzZqTA0kEDSlS5""","""2023-07-18"""


In [26]:
print("No. of distinct playlists in library:", playlist["name"].n_unique())
print("No. of distinct tracks in library:", playlist["trackUri"].n_unique())
print("No. of distinct artists in library:", playlist["artistName"].n_unique())

No. of distinct playlists in library: 21
No. of distinct tracks in library: 1073
No. of distinct artists in library: 629


In [27]:
# preprocess playlist dataset

playlist = playlist.filter(
    pl.col("name").is_in(["Me in 2023"]).not_(),
).with_columns(
    # remove extras from trackName
    pl.col("trackName").str.replace_all(r"\((.?*)\)|- (.?*)", ""),
    # extract extra descriptions from trackName column
    trackDesc=pl.col("trackName").str.extract_all(r"\((.?*)\)|- (.?*)"),
    # create custom trackId column identifier for different tracks
    trackId=pl.col("trackName").add("@").add(pl.col("artistName")),
)
playlist.sample(5)

name,lastModifiedDate,trackName,artistName,albumName,trackUri,addedDate,trackDesc,trackId
str,str,str,str,str,str,str,list[str],str
"""enything""","""2024-10-15""","""Slow Talkin'""","""Haley Heynderickx""","""Among Horses III""","""spotify:track:67QtnDkCfHCmWTwWaHN9hw""","""2024-09-29""",[],"""Slow Talkin'@Haley Heynderickx"""
"""enything""","""2024-10-15""","""New York City""","""Adrianne Lenker""","""b-sides""","""spotify:track:5McZ3LHgxlz8Fpea1oo8Ad""","""2024-09-29""",[],"""New York City@Adrianne Lenker"""
"""imported""","""2024-09-29""","""Mystery of Love""","""Sufjan Stevens""","""Call Me By Your Name (Original Motion Pi…","""spotify:track:4HbeGjbt7u3pvwDk1vN7P0""","""2023-07-05""",[],"""Mystery of Love@Sufjan Stevens"""
"""Theme Music""","""2024-11-13""","""Gullak Theme""","""Anurag Saikia""","""Gullak: Season 1 (Music from the Tvf Ori…","""spotify:track:1gRNd0m1e3VqF2OGSp5YuQ""","""2024-11-13""",[],"""Gullak Theme@Anurag Saikia"""
"""Wcowin""","""2024-04-16""","""Letter""","""iris""","""YOUR FRIENDSHIP MEANS SO MUCH TO ME""","""spotify:track:6UTEi5a5qIYkHQYNptqC0l""","""2024-04-05""",[],"""Letter@iris"""


In [28]:
(
    playlist.group_by("name")
    .agg(
        pl.col("lastModifiedDate").first().str.to_date().dt.strftime("%d %b, %y"),
        pl.len().alias("trackCount"),
        pl.col("artistName").n_unique().alias("artistCount"),
        pl.col("trackName", "artistName").unique().sample(3, with_replacement=True),
        pl.col("addedDate").str.to_date().min(),
    )
    .with_columns(
        pl.col("trackName", "artistName").list.join(" | "),
    )
    .sort("addedDate")
    .drop("addedDate")
    .style.tab_header(
        title="Playlists' Summary",
        subtitle="No. of tracks in each playlist (sorted by creation date)",
    )
)

Playlists' Summary,Playlists' Summary,Playlists' Summary,Playlists' Summary,Playlists' Summary,Playlists' Summary
No. of tracks in each playlist (sorted by creation date),No. of tracks in each playlist (sorted by creation date),No. of tracks in each playlist (sorted by creation date),No. of tracks in each playlist (sorted by creation date),No. of tracks in each playlist (sorted by creation date),No. of tracks in each playlist (sorted by creation date)
name,lastModifiedDate,trackCount,artistCount,trackName,artistName
HNDi,"26 Oct, 24",43,30,Piyu Bole | Tumhe Apna Banane Ka-Chand Chhupa | Hawaa Banke,Mohammed Irfan | Zain Zohaib | Amit Trivedi
English Hai,"01 Nov, 24",88,71,Falling Up | a thousand years | Girls Like You,Dan + Shay | Zedd | Henry Moodie
Jd,"22 Jun, 23",32,26,Bones | Stay | Bara Bara' Bere Bere',The Weeknd | Jennifer Lopez | CNCO
Thend,"04 Nov, 23",21,6,One Right Now | Take My Breath | Pray For Me,The Weeknd | Swedish House Mafia | The Weeknd
Part-EE,"16 Mar, 24",44,39,Sade Dil Vich | Are Rafta Rafta Dekho | Nachde Ne Saare,Majbul Khan | Ram Sampath | Tanishk Bagchi
Yeah! Mode,"01 Nov, 23",24,11,Bach Ke Rehna | Rolex Theme | Vikram,Amar Mangrulkar | Ghibran | Ritviz
हींग-lish,"09 Jan, 23",19,8,Missing Me | Do You Remember | U-n-I,Bali Brahmbhatt | Rishi Rich | Rishi Rich
Mastered,"05 Oct, 24",24,21,Pal | Jaane Kya Dhoondta Hai | Hind Ke Sitara,Stephen Sanchez | Lucky Ali | Advait Nemlekar
PnJbi,"30 Aug, 23",21,17,Champion | Excuses | Chaar Din,Parichay | Shubh | Parichay
think nothing,"14 Nov, 24",146,90,Teri Meri Pehli Sham | O Yara | Callin' U,Arooj Aftab | Ashim Kemson featuring Pierre | Ashish Zachariah


In [29]:
(
    playlist.group_by("name")
    .agg(
        pl.col("trackId").filter(pl.col("trackId").is_duplicated()).unique(),
    )
    .filter(pl.col("trackId").list.len().gt(0))
    .style.tab_header("Playlist containing duplicate Tracks")
)

Playlist containing duplicate Tracks,Playlist containing duplicate Tracks
name,trackId
think exception,['Yad Lagla@Ajay Gogavale']
English Hai,['At My Worst@Pink Sweat$']
Compose It !!,['Cold Little Heart@Michael Kiwanuka']


In [30]:
(
    playlist.group_by("trackId")
    .agg(pl.col("name"))
    .filter(pl.col("name").list.len().gt(2))
    .style.tab_header("Tracks in Multiple Playlist")
)

Tracks in Multiple Playlist,Tracks in Multiple Playlist
trackId,name


## Import saved albums data to compare with playlists


In [31]:
album = (
    pl.read_json("spotify-data/YourLibrary.json")
    .select("albums")
    .explode("albums")
    .unnest("albums")
    .rename({"album": "name"})
    .rename(lambda x: "album" + x.title())
)
print(album.shape)
album.head()

(76, 3)


albumArtist,albumName,albumUri
str,str,str
"""Tommee Profitt""","""In The End""","""spotify:album:4ogDiddFmcxnT4PC9lVEoV"""
"""Various Artists""","""What Are the Odds?""","""spotify:album:33iMwGCgwBkVoi3UZgcfws"""
"""Deepak Peace""","""1947 Se Ak-47 Tak""","""spotify:album:4ovgqD1QkgAIrN3ZsuKmzG"""
"""Sharon Van Etten""","""Are We There""","""spotify:album:6EQWzHbd3EYO8J4EAIJst4"""
"""Agnes Obel""","""Myopia""","""spotify:album:1XFhwj2xUtypgyEqAmTrQV"""


In [37]:
pl_album = playlist.join(album, on="albumName")
print(pl_album.shape)
pl_album.head()

(61, 11)


name,lastModifiedDate,trackName,artistName,albumName,trackUri,addedDate,trackDesc,trackId,albumArtist,albumUri
str,str,str,str,str,str,str,list[str],str,str,str
"""imported""","""2024-09-29""","""invisible string""","""Taylor Swift""","""folklore""","""spotify:track:6VsvKPJ4xjVNKpI8VVZ3SV""","""2023-07-05""",[],"""invisible string@Taylor Swift""","""Taylor Swift""","""spotify:album:2fenSS68JI1h4Fo296JfGr"""
"""imported""","""2024-09-29""","""Mystery of Love""","""Sufjan Stevens""","""Call Me By Your Name (Original Motion Pi…","""spotify:track:4HbeGjbt7u3pvwDk1vN7P0""","""2023-07-05""",[],"""Mystery of Love@Sufjan Stevens""","""Various Artists""","""spotify:album:7K0x1O9gqMQlDwbMkyCCIM"""
"""imported""","""2024-09-29""","""hoax""","""Taylor Swift""","""folklore""","""spotify:track:0YeDG5HnKnG7jpArkzsSPa""","""2023-07-05""",[],"""hoax@Taylor Swift""","""Taylor Swift""","""spotify:album:2fenSS68JI1h4Fo296JfGr"""
"""imported""","""2024-09-29""","""Madhubala""","""Amit Trivedi""","""Songs of Love""","""spotify:track:3hcwpXiJtR7kwDrWllcH0v""","""2024-04-16""",[],"""Madhubala@Amit Trivedi""","""Amit Trivedi""","""spotify:album:0jpXgPEyyujKElDwbCWL7o"""
"""imported""","""2024-09-29""","""Teri Ay""","""Umer Farooq""","""Patang""","""spotify:track:3WBsWNtL054HCFz7UUGK9e""","""2024-04-16""",[],"""Teri Ay@Umer Farooq""","""Umer Farooq""","""spotify:album:2dtBK0Vkm1vrkucjfaqR3I"""


In [39]:
pl_album.select("name", "trackId", "albumName").style

name,trackId,albumName
imported,invisible string@Taylor Swift,folklore
imported,Mystery of Love@Sufjan Stevens,Call Me By Your Name (Original Motion Picture Soundtrack)
imported,hoax@Taylor Swift,folklore
imported,Madhubala@Amit Trivedi,Songs of Love
imported,Teri Ay@Umer Farooq,Patang
imported,Tu Aisa Kaise Hai@Osho Jain,Saar
anything,Flesh & Bone@Anoushka Maskey,Things I Saw in a Dream
anything,Hollow - acoustic@Prateek Kuhad,The Way That Lovers Do
anything,Trampoline@Anoushka Maskey,Things I Saw in a Dream
anything,Things I Saw in a Dream@Anoushka Maskey,Things I Saw in a Dream


In [18]:
artist = (
    pl.read_json("spotify-data/YourLibrary.json")
    .select("artists")
    .explode("artists")
    .unnest("artists")
    .rename(lambda x: "artist" + x.title())
)
print(artist.shape)
artist.head()

(21, 2)


artistName,artistUri
str,str
"""AMAN""","""spotify:artist:2fbOTJal9uKzOkYEWxaiCK"""
"""Adrianne Lenker""","""spotify:artist:4aKWmkWAKviFlyvHYPTNQY"""
"""Agnes Obel""","""spotify:artist:1rKrEdI6GKirxWHxIUPYms"""
"""Anuv Jain""","""spotify:artist:4gdMJYnopf2nEUcanAwstx"""
"""Bayaan""","""spotify:artist:3atMq790wQ7IqjeSO0HFeP"""


In [44]:
playlist.join(artist, on="artistName").select("name", "trackName", "artistName").style

name,trackName,artistName
imported,Mystery of Love,Sufjan Stevens
imported,Faasle,Kaavish
imported,100 words,Prateek Kuhad
imported,Teri Ay,Umer Farooq
imported,Shehron Ke Raaz,Prateek Kuhad
imported,Tu Aisa Kaise Hai,Osho Jain
imported,Uljhe Hue,Osho Jain
imported,Nindiya Re,Kaavish
anything,Hollow,Prateek Kuhad
anything,I Never Knew,Prateek Kuhad
