In [2]:
import requests
import pandas as pd
import base64
import json
from dotenv import load_dotenv
import os
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")


In [2]:
def get_access_token(env_vars_path):
    print("Getting access token...")
    dotenv_path = Path(env_vars_path)
    load_dotenv(dotenv_path=dotenv_path)
    client_id = os.getenv("CLIENT_ID")
    client_secret = os.getenv("CLIENT_SECRET")

    # ---- Get Access Token ----
    auth_str = f"{client_id}:{client_secret}"
    b64_auth_str = base64.b64encode(auth_str.encode()).decode()
    token_url = "https://accounts.spotify.com/api/token"
    data = {"grant_type": "client_credentials"}
    headers = {"Authorization": f"Basic {b64_auth_str}",
            "Content-Type": "application/x-www-form-urlencoded"}

    response = requests.post(token_url, data=data, headers=headers)
    json_result=json.loads(response.content)
    access_token =json_result["access_token"]
    return access_token

access_token=get_access_token(".env")

Getting access token...


In [None]:
df=pd.read_csv(r"C:\Users\21650\Downloads\New folder (4)\spotify_songs.csv",header=0)
df.head()

In [None]:
def get_popularity_scores(token, ids_list):
    headers = {"Authorization": f"Bearer {token}"}
    popularity_scores = []

    # Loop through the list in batches of 50
    for i in range(0, len(ids_list), 50):
        batch_ids = ids_list[i:i+50]
        ids_string = ",".join(batch_ids)
        url = f'https://api.spotify.com/v1/tracks?ids={ids_string}'
        result = requests.get(url, headers=headers)
        json_result = result.json()

        # Append popularity scores in order
        for track in json_result["tracks"]:
            popularity_scores.append(track["popularity"])

    return popularity_scores

# Usage
ids_list = df["track_id"].tolist()
popularity_list = get_popularity_scores(access_token, ids_list)

# Add it back to your DataFrame
df["popularity"] = popularity_list

In [None]:
df.to_csv("all_songs_updated.csv",index=False)

In [3]:
df=pd.read_csv(r"all_songs_updated.csv")
pd.set_option("display.max_columns",None)
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,playlist_subgenre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,popularity
0,6ZMda6mscE8eRkuwlw8yFB,Bedsitter,Soft Cell,18,1xza4j8vMQdGqqQdHa30lI,Non Stop Erotic Cabaret (Deluxe Edition),1981-12,"Maxi Pop GOLD (New Wave, Electropop, Synth Po...",2nRWtTI9a2LWjJ9Wy3JZs5,pop,electropop,0.63,0.526,7,-9.121,1,0.0389,0.316,0.00538,0.0747,0.61,145.271,215333,12
1,4GDmAT5ZZyHdBo32UYDIvM,Tainted Love,Soft Cell,53,1xza4j8vMQdGqqQdHa30lI,Non Stop Erotic Cabaret (Deluxe Edition),1981-12,Classic Rock Radio,4lIywN6kXl9KPm3OQ8u8G7,rock,classic rock,0.518,0.51,0,-8.057,0,0.0352,0.476,0.0,0.292,0.651,144.543,153880,44
2,4bnNwCbIo9vxlIpu88KV0K,Der Mussolini - 1998 - Remaster,DAF,2,2nQmZO698ZhHOkxUaCZPJS,Alles Ist Gut,1981-03,Gothic / Industrial / Mittelalter / EBM / Futu...,53CmFroG6MWR5reOOXJX6B,pop,electropop,0.593,0.796,6,-9.404,1,0.0318,0.124,0.494,0.116,0.867,156.308,235107,0
3,6KrMDGTp9CtkMLKyftDUTD,Wavelength - Remastered,Van Morrison,44,47nhRTYeYBKV5mEfXLV4fb,Wavelength,1978-09,Classic Rock Retrogamer,6gUFdcGzKAHyDXY9TKC6cP,rock,classic rock,0.541,0.863,0,-6.999,1,0.0703,0.0736,0.00115,0.463,0.522,142.829,346987,22
4,3JXOMZdeJ7uMR7UfC3BN5T,Top Of The Bill - Live,Scorpions,2,3fWEWigB3tgsGAUN4kNIKf,Tokyo Tapes (50th Anniversary Deluxe Edition),1978-08,This Is Scorpions,37i9dQZF1DWViGKI2U5P2K,rock,hard rock,0.365,0.895,3,-6.118,0,0.129,0.00445,0.675,0.913,0.232,142.665,407333,0


In [4]:
df[df["track_id"].duplicated(keep=False)]

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,playlist_subgenre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,popularity
5,2ccUQnjjNWT0rsNnsBpsCA,Surrender,Cheap Trick,69,5w20U3G3GyWiPvvDeVzkhN,Heaven Tonight,1978-04,The Sound of Album Rock,3yj9YnQGTdnFuKbDyXGDi6,rock,album rock,0.524,0.955,0,-7.081,1,0.0411,0.00717,0.000009,0.6590,0.869,133.849,253733,67
6,2ccUQnjjNWT0rsNnsBpsCA,Surrender,Cheap Trick,69,5w20U3G3GyWiPvvDeVzkhN,Heaven Tonight,1978-04,Classic Rock Legends,3NcxM1LJJdua8AcRxtijNY,rock,classic rock,0.524,0.955,0,-7.081,1,0.0411,0.00717,0.000009,0.6590,0.869,133.849,253733,67
11,3Sm5TYFgMXyXwgAcY2xweX,So Into You,Atlanta Rhythm Section,54,5vC0MxBXL4i9iowdXSx4yO,A Rock And Roll Alternative,1976-12,The Sound of Album Rock,3yj9YnQGTdnFuKbDyXGDi6,rock,album rock,0.622,0.409,5,-13.484,0,0.0362,0.65000,0.023000,0.1420,0.785,86.175,260867,63
12,3Sm5TYFgMXyXwgAcY2xweX,So Into You,Atlanta Rhythm Section,54,5vC0MxBXL4i9iowdXSx4yO,A Rock And Roll Alternative,1976-12,Soft Rock Drive,37i9dQZF1DX6TIU4D13lOY,rock,classic rock,0.622,0.409,5,-13.484,0,0.0362,0.65000,0.023000,0.1420,0.785,86.175,260867,63
20,683b4ikwa62JevCjwrmfg6,Moondance - 2013 Remaster,Van Morrison,69,7diHYi0CglGJekoM3KaWBK,Moondance (Deluxe Edition),1970-02,I didn’t know perm stood for permanent (wave),3e6gYPyrTbaB8BWgSHCt5j,rock,permanent wave,0.606,0.282,9,-12.207,0,0.0339,0.50100,0.000008,0.1520,0.563,67.409,274040,67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32811,63OFKbMaZSDZ4wtesuuq6f,Born To Be Wild,Steppenwolf,73,6GLHwIp1K3u1zdLOdPRG0W,Steppenwolf,1968,Rock Classics,37i9dQZF1DWXRqgorJj26U,rock,classic rock,0.438,0.710,2,-12.412,1,0.0919,0.26200,0.537000,0.2210,0.530,145.928,210373,13
32812,1Qc7zCw6k2KTvSEl4IKSdP,The Pusher,Steppenwolf,54,6GLHwIp1K3u1zdLOdPRG0W,Steppenwolf,1968,Blues Rock,56dbowk1V5ycS5jW7DSvi5,rock,classic rock,0.467,0.382,0,-13.807,1,0.0264,0.23600,0.125000,0.1870,0.704,81.754,350467,1
32813,63OFKbMaZSDZ4wtesuuq6f,Born To Be Wild,Steppenwolf,73,6GLHwIp1K3u1zdLOdPRG0W,Steppenwolf,1968,Hard Rock Cafe Classics,3sv5ViKoPDNnZRsklzEGMN,rock,hard rock,0.438,0.710,2,-12.412,1,0.0919,0.26200,0.537000,0.2210,0.530,145.928,210373,13
32827,4fQMGlCawbTkH9yPPZ49kP,Green Onions,Booker T. & the M.G.'s,64,2aGFVLz0oQPa3uxCfq9lcU,Green Onions,1962,Supernatural Classic Rock,6oghIlByD49KFGNmNU8GSH,rock,classic rock,0.816,0.514,10,-8.741,1,0.0339,0.76700,0.949000,0.0899,0.912,136.837,176333,69


In [5]:
# Note: id columns can be deleted since they can be replaced with name columns,
# however later on in machine learning id columns can prove helpful in identifying songs/albums/playlists
# without relying on the actual names which can be unreliable in some cases

# df.drop(columns=["track_id","track_album_id","playlist_id","playlist_name"],inplace=True)
# df.tail()

In [6]:
#rename popularity 2023 and 2025 columns
df.rename(columns={"track_popularity":"popularity_2023","popularity":"popularity_2025"},inplace=True)
df.describe(include="object")

Unnamed: 0,track_id,track_name,track_artist,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,playlist_subgenre
count,32833,32828,32828,32833,32828,32833,32833,32833,32833,32833
unique,28356,23448,10692,22545,19741,4531,449,471,6,24
top,7BKLCZ1jbUBVqRi2FVlTVw,Poison,Martin Garrix,5L1xcowSxwzFUSJzvyMp48,Greatest Hits,1/10/2020,Indie Poptimism,4JkkvMpVl4lSioqQjeAL0q,edm,progressive electro house
freq,10,22,161,42,139,270,308,247,6043,1809


In [7]:
#trim categorical columns
df_obj=df.select_dtypes(include="object")
obj_cols=df_obj.columns.tolist()
for col in obj_cols:
    df[col]=df[col].str.strip()

#recheck for null entries
df.isna().sum()

track_id                     0
track_name                   5
track_artist                 5
popularity_2023              0
track_album_id               0
track_album_name             5
track_album_release_date    65
playlist_name                0
playlist_id                  0
playlist_genre               0
playlist_subgenre            0
danceability                 0
energy                       0
key                          0
loudness                     0
mode                         0
speechiness                  0
acousticness                 0
instrumentalness             0
liveness                     0
valence                      0
tempo                        0
duration_ms                  0
popularity_2025              0
dtype: int64

In [8]:
df[df["track_album_release_date"].isna()]
print("rows count before removal",len(df))
df.drop(df[df["track_album_release_date"].isna()].index,inplace=True) #or use dropna() which is easier
print("rows count after removal",len(df))


rows count before removal 32833
rows count after removal 32768


In [9]:
#will be handled later
# import numpy as np
# for col in ["track_name","track_artist","track_album_name"]:
#     df[col]=df[col].replace(np.nan,"unkown")

In [10]:
df.isna().sum()

track_id                    0
track_name                  5
track_artist                5
popularity_2023             0
track_album_id              0
track_album_name            5
track_album_release_date    0
playlist_name               0
playlist_id                 0
playlist_genre              0
playlist_subgenre           0
danceability                0
energy                      0
key                         0
loudness                    0
mode                        0
speechiness                 0
acousticness                0
instrumentalness            0
liveness                    0
valence                     0
tempo                       0
duration_ms                 0
popularity_2025             0
dtype: int64

In [11]:
import datetime as dt
pd.set_option("display.max_rows",10)
dates=df["track_album_release_date"].value_counts()
dates.reset_index()

Unnamed: 0,track_album_release_date,count
0,1/10/2020,270
1,11/22/2019,244
2,12/6/2019,235
3,12/13/2019,220
4,1/1/2013,219
...,...,...
4514,11/22/2006,1
4515,11/13/2006,1
4516,9/15/2006,1
4517,11/25/2017,1


In [12]:
mask = df["track_album_release_date"].str.fullmatch(r"\d{4}")
year_only_rows = df[mask]
year_only_rows

Unnamed: 0,track_id,track_name,track_artist,popularity_2023,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,playlist_subgenre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,popularity_2025
30978,3VM1WBA5RfhvBIAlXtrwxC,真っ赤な太陽,RAMMELLS,29,2BiaouZAxi9H6n2cLEc16d,真っ赤な太陽,2019,Best of 2019 Dance Pop: Japan,37i9dQZF1DXdOtZGKonFlM,pop,dance pop,0.612,0.843,8,-4.552,1,0.0431,0.1520,0.001630,0.4020,0.322,125.997,181187,1
30979,7wFybC8jBH3zE139OpCtpG,Lost in the Fire,Gesaffelstein,20,3gXi45Aift9hCoB6lvuVJb,Lost in the Fire,2019,Electropop 2019,4Bi8VLtaSu0JILliif8lH6,pop,electropop,0.663,0.675,2,-12.159,1,0.0359,0.0863,0.001330,0.1170,0.176,101.004,202093,1
30980,1F2HXIJrE2Xn81OzT2zZvW,Unforgettable,French Montana,14,1udLKdsDr7GRmJU6Cxyt7j,Unforgettable,2017,Indie Poptimism,2QiMewRbSavfZ9MSAYz2h6,pop,indie poptimism,0.726,0.769,6,-5.043,1,0.1230,0.0293,0.010100,0.1040,0.733,97.985,233833,1
30981,0rU1aBF8cQ8xS3H25qWuMz,Kill Jill,Big Boi,1,71tBINhopdR5mLZuLWmmje,Kill Jill,2017,"🔥💵 Hip Hop, Rap, Heavy 808's - New School",3jPkaExIWXQWklcmmF5180,rap,southern hip hop,0.771,0.527,7,-6.798,0,0.2320,0.2050,0.000000,0.0978,0.113,139.934,266857,0
30982,5aRwivY58BBZXIorDujeNd,Bae - KVR Remix,Marcus & Martinus,0,55o36ECoAEuHRrEy7Sicar,Bae (KVR Remix),2017,EDM TROPICAL,3pS63EDS40FVGYL41zAcU4,latin,tropical,0.762,0.562,0,-8.648,0,0.0363,0.0800,0.000000,0.0806,0.683,101.965,197653,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32763,6JWlRd8ZFnXhCzg0mi3N5V,Tin Man,America,38,64sDz3NRE1xYuaIZWAyu4N,America's Greatest Hits - History,1972,70's Classic Rock,76lrxCrKrGDkDDf3SVPnl3,rock,classic rock,0.646,0.531,7,-10.785,1,0.0671,0.5280,0.013300,0.0836,0.655,172.020,209867,44
32764,4wumYSGWMGz0EZazVy9sRd,Ventura Highway,America,39,64sDz3NRE1xYuaIZWAyu4N,America's Greatest Hits - History,1972,70's Classic Rock,76lrxCrKrGDkDDf3SVPnl3,rock,classic rock,0.612,0.761,2,-5.929,1,0.0283,0.1040,0.000200,0.1220,0.838,130.707,211680,46
32765,0Osbam159ngkQww6isBBQJ,A Horse with No Name,America,50,64sDz3NRE1xYuaIZWAyu4N,America's Greatest Hits - History,1972,70's Classic Rock,76lrxCrKrGDkDDf3SVPnl3,rock,classic rock,0.654,0.506,11,-17.180,0,0.0535,0.6860,0.015500,0.1550,0.831,123.179,252240,48
32766,2DnJjbjNTV9Nd5NOa1KGba,You're so Vain,Carly Simon,75,79x0PRGIZv33znrCkPkCZ5,No Secrets,1972,70's Classic Rock,76lrxCrKrGDkDDf3SVPnl3,rock,classic rock,0.659,0.678,0,-8.180,1,0.0313,0.1570,0.000007,0.0784,0.647,106.186,258411,77


In [13]:
import pandas as pd
import numpy as np
from dateutil import parser

def clean_and_standardize_date(x):
    """
    Tries to parse any date-like value into a standardized datetime object.
    Returns NaT for unrecognized or invalid values.
    """
    if pd.isna(x):  # Handle NaN or None
        return np.nan
    
    if isinstance(x, (int, float)):
        # Sometimes years are stored as integers (e.g., 2021)
        if 1000 <= x <= 2100:
            return pd.Timestamp(year=int(x), month=1, day=1)
        return np.nan

    if isinstance(x, str):
        x = x.strip()
        # Handle year-only strings
        if x.isdigit() and 1000 <= int(x) <= 2100:
            return pd.Timestamp(year=int(x), month=1, day=1)
        try:
            return parser.parse(x, fuzzy=True)
        except Exception:
            return np.nan
    
    return np.nan  # For weird objects

# Apply the function
df["track_album_release_date"] = df["track_album_release_date"].apply(clean_and_standardize_date)

# Optional: format as standard ISO strings
df["track_album_release_date"] = df["track_album_release_date"].dt.strftime("%Y-%m-%d")

df["track_album_release_date"]=pd.to_datetime(df["track_album_release_date"])

In [14]:
print(df["track_album_release_date"].isna().sum())
print(df["track_album_release_date"].dtype)

0
datetime64[ns]


In [15]:
pd.set_option("display.max_rows",10)
dates=df["track_album_release_date"].value_counts()
dates.reset_index().sort_values("track_album_release_date")

Unnamed: 0,track_album_release_date,count
4453,1957-01-01,1
2879,1957-03-15,1
4452,1958-03-21,1
4451,1961-10-26,1
4450,1963-03-22,1
...,...,...
229,2020-01-15,28
297,2020-01-16,21
35,2020-01-17,131
2851,2020-01-20,2


In [16]:
def get_season(input_date):
    month = input_date.month
    day = input_date.day

    if (month == 3 and day >= 21) or (month == 4) or (month == 5) or \
       (month == 6 and day < 21):
        return "Spring"
    elif (month == 6 and day >= 21) or (month == 7) or (month == 8) or \
         (month == 9 and day < 21):
        return "Summer"
    elif (month == 9 and day >= 21) or (month == 10) or (month == 11) or \
         (month == 12 and day < 21):
        return "Autumn"
    else:
        return "Winter"

df["season"]=df["track_album_release_date"].apply(func=get_season)
df.loc[df.index.isin(year_only_rows.index), "season"] = None
df["season"].isna().sum()

np.int64(1790)

In [17]:
# Ensure the column is datetime
df["track_album_release_date"] = pd.to_datetime(df["track_album_release_date"], errors="coerce")

# Extract the release year
df["year"] = df["track_album_release_date"].dt.year

# Define a function to map year ranges to decades
def map_decade(year):
    if pd.isna(year):
        return "Unknown"
    elif 1950 <= year < 1960:
        return "1950s"
    elif 1960 <= year < 1970:
        return "1960s"
    elif 1970 <= year < 1980:
        return "1970s"
    elif 1980 <= year < 1990:
        return "1980s"
    elif 1990 <= year < 2000:
        return "1990s"
    elif 2000 <= year < 2010:
        return "2000s"
    elif 2010 <= year < 2021:
        return "2010s"
    elif year >= 2022:
        return "2020s"
    else:
        return "Before 1970"

# Apply the function
df["period"] = df["year"].apply(map_decade)


In [18]:
df["period"].value_counts().reset_index()

Unnamed: 0,period,count
0,2010s,23999
1,2000s,4077
2,1990s,2310
3,1980s,1306
4,1970s,934
5,1960s,139
6,1950s,3


In [19]:
#Create new categorical feature popularity_categorized 
bins=[0,20,60,100]
cats=["hardly recognizable","famous","hit"]
df["popularity_categorized"]=pd.cut(df["popularity_2025"],bins=bins,labels=cats,include_lowest=True)
df["popularity_categorized"]

#check for the popularity_categorized distribution
df["popularity_categorized"].value_counts().reset_index()

Unnamed: 0,popularity_categorized,count
0,hardly recognizable,16966
1,famous,10583
2,hit,5219


In [20]:
categorical_cols=["track_name","track_artist","track_album_name"]
for col in categorical_cols:
    df[col]=df[col].str.lower()

In [21]:
# Remove hashtags and numbers, and replace "#name?" with empty string
def clean_text(s):
    if pd.isna(s):
        return s
    s = str(s)
    s = s.replace("#name?", "")  # remove exact "#name?"
    s = s.replace("#-.*+'/",  "")       # remove remaining #
    return s.strip()

df["track_name"] = df["track_name"].apply(clean_text)
df["track_album_name"] = df["track_album_name"].apply(clean_text)

# Check results
df[["track_name", "track_album_name"]].head(10)


Unnamed: 0,track_name,track_album_name
0,bedsitter,non stop erotic cabaret (deluxe edition)
1,tainted love,non stop erotic cabaret (deluxe edition)
2,der mussolini - 1998 - remaster,alles ist gut
3,wavelength - remastered,wavelength
4,top of the bill - live,tokyo tapes (50th anniversary deluxe edition)
5,surrender,heaven tonight
6,surrender,heaven tonight
7,surrender,heaven tonight
8,imaginary lover,champagne jam
9,i want you to want me,in color


In [22]:
# Replace empty strings with NaN
df["track_name"].replace("", pd.NA, inplace=True)
df["track_album_name"].replace("", pd.NA, inplace=True)

# Drop rows with NaN in either column
df.dropna(subset=["track_name", "track_album_name"], inplace=True)

# Check the result
df[["track_name", "track_album_name"]].head()
df.reset_index(drop=True,inplace=True)


In [23]:
print("number of rows before grouping:",len(df))
df_grouped = (
    df.groupby("track_name", as_index=False)
      .agg({
          "playlist_genre": lambda x: "; ".join(sorted(set(x))),
          "track_album_name": "first",
          "playlist_subgenre": lambda x: "; ".join(sorted(set(x))),
          "track_artist": "first",
          "duration_ms": "mean",
          "track_artist":"first",
          "popularity_2023":"mean",
          "popularity_2025":"mean",
          "track_album_release_date":"first",
          "danceability":"mean",
		  "energy":"mean",
          "key":"first",
          "loudness":"mean",
          "mode":"first",
          "speechiness":"mean",
          "acousticness":"mean",
          "instrumentalness":"mean",
          "liveness":"mean",
          "valence":"mean",
          "tempo":"mean",
          "duration_ms":"mean",
          "season":"first",
          "popularity_categorized":"first",
          "year":"first",
          "period":"first"
      })
)
print("number of rows after grouping:",len(df_grouped))
df_grouped.head()

number of rows before grouping: 32762
number of rows after grouping: 23052


Unnamed: 0,track_name,playlist_genre,track_album_name,playlist_subgenre,track_artist,duration_ms,popularity_2023,popularity_2025,track_album_release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,season,popularity_categorized,year,period
0,"""i tried for years... nobody listened""",rap,war,gangster rap,iceberg black,150909.0,18.0,0.0,2018-09-22,0.914,0.408,10,-6.712,0,0.141,0.0268,0.00179,0.116,0.0944,140.026,Autumn,hardly recognizable,2018,2010s
1,"""this is seagull….""",pop,smother earth,electropop,the snake corps,238227.0,34.0,35.0,1990-01-01,0.516,0.58,9,-13.288,0,0.0295,2e-06,0.857,0.11,0.235,135.903,Winter,famous,1990,1990s
2,#1 stunna,rap,i got that work,hip hop; southern hip hop,big tymers,281960.0,24.0,25.5,2000-01-01,0.552,0.8405,8,-4.9725,1,0.2845,0.0163,0.003655,0.258,0.565,89.0435,Winter,famous,2000,2000s
3,#nakama,r&b,#nakama,hip pop,xlii,192094.0,26.0,6.0,2019-12-25,0.797,0.97,3,-3.204,1,0.0545,0.385,0.000157,0.318,0.568,108.041,Winter,hardly recognizable,2019,2010s
4,#natural,latin,#natural,latin hip hop,paty cantú,227013.0,50.0,33.0,2017-10-20,0.8,0.836,0,-3.535,0,0.0568,0.114,0.0,0.134,0.816,97.023,Autumn,famous,2017,2010s


In [24]:
artist_pop = df_grouped.groupby("track_artist")["popularity_2023"].mean()
df_grouped["artist_avg_pop"] = df_grouped["track_artist"].map(artist_pop)

df_grouped['release_age'] = 2025 - df_grouped['year']  # years since release
df_grouped["artist_song_count"] = df_grouped.groupby("track_artist")["track_name"].transform("count")

df_grouped["track_album_release_date"]=pd.to_datetime(df_grouped["track_album_release_date"])

In [25]:
df.to_csv("2023_songs_cleaned.csv",index=False)
df_grouped.to_csv("songs_grouped_by_song.csv",index=False)