## Deezer playlist to csv

In [1]:
import time
import requests
import pandas as pd


def fetch_with_retry(url, max_retries=5, delay=2):
    """
    Effectue une requête GET avec des tentatives de retry en cas de dépassement de quota.
    """
    for attempt in range(max_retries):
        response = requests.get(url)
        data = response.json()
        if "error" in data and data["error"].get("code") == 4:  # Quota limit exceeded
            print(f"Quota exceeded. Retry {attempt + 1}/{max_retries} in {delay}s...")
            time.sleep(delay)
        else:
            return data
    raise Exception(f"Failed to fetch data from {url} after {max_retries} retries.")

In [2]:
playlist_id_list = [
                    # Genre
                    "11164434904", #POJ
                    "11109920664", #POE
                    "11883027301", #POF
                    "11109921064", #POK
                    "11109919544", #JRK
                    "13877296541", #ERK
                    "11109919324", #RFR
                    "11109919084", #RUS
                    "11109929464", #JFK
                    "11109921684", #LOF
                    "11109921284", #MSC
                    "11034992782", #DEM

                    # Style
                    "11109918904", #?XD
                    "11109921884", #SLV
                    "11912452141", #CHV
                    "12126532871", #CTV
                    "11959053381", #HPV
                    "11917580341", #NRV
                    "13696246261", #FCV

                    # Theme
                    "11109922464", #CUD
                    "13877324381", #GMG
                    "11109924124", #ONE
                    "11109924684", #100
                    "12913702663", #ADD
                    "13301519863", #RCN
                    "11917626441", #SGM
                    "11109923824", #VCL
                    "11109923384", #LRB

                    "11109924384", #FAV
                    "11109924924", #JFA
                    "11537618704", #EFA
                    "13877385301", #FFA
                    ]
track = None
df_tracks = pd.DataFrame(columns=["id", "title", "album", "duration", "release_date", "rank", "bpm", "gain"])

for playlist_id in playlist_id_list:
    playlist = fetch_with_retry(f"https://api.deezer.com/playlist/{playlist_id}")
    playlist_name = f'in_{playlist["title"].replace(" ", "_")}'

    track_id_list = [playlist["tracks"]["data"][i]["id"] for i in range(len(playlist["tracks"]["data"]))]

    for track_id in track_id_list:
        if track_id not in df_tracks["id"].values:
            try:
                track = fetch_with_retry(f"https://api.deezer.com/track/{track_id}")
                artists = ([artist["name"] for artist in track["contributors"]])
                df_tracks = pd.concat([df_tracks, pd.DataFrame([{
                    "id": track["id"],
                    "title": track["title"],
                    "album": track["album"]["title"],
                    "duration": track["duration"],
                    "release_date": track["album"]["release_date"],
                    "rank": track["rank"],
                    "bpm": track["bpm"],
                    "gain": track["gain"],
                    f"{playlist_name}": True
                }])], ignore_index=True)
                for i in range(len(artists)):
                    df_tracks.loc[df_tracks.index[-1], f'artist_{i+1}'] = artists[i] if i < len(artists) else None

            except Exception as e:
                print(f"Failed to fetch track {track_id}: {e}")

        else:
            df_tracks.loc[df_tracks["id"] == track_id, f'{playlist_name}'] = True


artist_columns = [col for col in df_tracks.columns if col.startswith("artist")]
in_columns = [col for col in df_tracks.columns if col.startswith("in_")]
other_columns = [col for col in df_tracks.columns if col not in artist_columns and col not in in_columns]
df_tracks = df_tracks[other_columns + artist_columns + in_columns]

df_tracks.head()

  df_tracks = pd.concat([df_tracks, pd.DataFrame([{


Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...
Quota exceeded. Retry 1/5 in 2s...


Unnamed: 0,id,title,album,duration,release_date,rank,bpm,gain,artist_1,artist_2,...,in_CUD,in_ONE,in_ADD,in_RCN,in_SGM,in_VCL,in_LRB,in_FAV,in_JFA,in_EFA
0,347419311,Ai Wo Tsutaetaidatoka,Ai wo Tsutaetaidatoka,235,2017-05-03,310792,99.86,-7.5,aimyon,,...,,True,,,,,,,,
1,2274138747,ai no hana,ai no hana,241,2023-06-07,256356,0.0,-7.1,aimyon,,...,,,,,,,,,,
2,1425383492,wi(l)d-screen baroque,Gekijouban Shojokageki Revuestarlight Gekichuk...,260,2021-07-21,110931,0.0,-8.7,Daiba Nana(CV:Moeka Koizumi),,...,,,,,,,True,True,True,
3,135189966,Daze（Re Ver.）,Mekakucity M's 1 ～Mekakucity Actors Vocal & So...,233,2015-04-01,166410,197.82,-6.0,JIN,Maria,...,,,,,,,,True,True,
4,135189976,Kagerou Daze,Mekakucity M's 1 ～Mekakucity Actors Vocal & So...,243,2015-04-01,114657,192.3,-6.9,JIN,Taguchi Shoichi,...,,,,,,,,,,


In [3]:
duplicate_titles = df_tracks[df_tracks.duplicated(subset='title', keep=False)]
duplicate_titles

Unnamed: 0,id,title,album,duration,release_date,rank,bpm,gain,artist_1,artist_2,...,in_CUD,in_ONE,in_ADD,in_RCN,in_SGM,in_VCL,in_LRB,in_FAV,in_JFA,in_EFA
56,1400300352,Phoenix,TOKYO,248,2021-06-23,400378,0.0,-7.6,BURNOUT SYNDROMES,,...,,,,,,,,,,
94,2205268347,WORK,WORK,201,2023-04-01,21735,0.0,-9.2,ꉈꀧ꒒꒒ꁄꍈꍈꀧ꒦ꉈ ꉣꅔꎡꅔꁕꁄ,Sheena Ringo,...,,True,,,,,,,,
105,1100228952,Flamingo,STRAY SHEEP,196,2020-08-05,295767,0.0,-9.1,Kenshi Yonezu,,...,,,,,,,,True,True,
107,118072614,Flamingo,Flamingo,197,2014-09-23,59805,178.21,-7.5,Kero Kero Bonito,,...,,,,,,,,True,True,
196,1228437032,Nightmare,HOPE,223,2021-02-17,17958,0.0,-6.4,Seven Billion Dots,,...,,,,,,,,,,
250,1914090367,UNSTOPPABLE,R·I·O·T,230,2018-12-12,25364,0.0,-6.2,RAISE A SUILEN,,...,,,,,,,,,,
253,1786490627,UNSTOPPABLE,ERA,230,2020-08-19,104284,0.0,-6.2,RAISE A SUILEN,,...,,,,,,,,,,
280,1228409662,Crazy,PURE,190,2018-11-28,11678,0.0,-9.0,Taichi Mukai,,...,,,,,,,,,,
294,1100435222,STAY,eyes,204,2020-05-27,88683,0.0,-8.5,milet,,...,,,,,,,,,,
320,593087512,The Greatest Show,The Greatest Showman: Reimagined (Deluxe),302,2017-12-08,165528,157.8,-8.6,Hugh Jackman,Keala Settle,...,,,,,,,,,,


In [4]:
all_artists = df_tracks[artist_columns].stack().reset_index(level=1, drop=True).to_frame('artist')
all_artists['in_FAV'] = df_tracks.loc[all_artists.index, 'in_FAV'].values

artist_counts = all_artists['artist'].value_counts().to_frame('nombre_de_titres')
fav_counts = all_artists[all_artists['in_FAV'] == True]['artist'].value_counts().to_frame('nombre_de_favoris')

artist_summary = artist_counts.join(fav_counts, how='left').fillna(0)
artist_summary['coef'] = round((artist_summary['nombre_de_titres'] * 0.1) + (artist_summary['nombre_de_favoris'] * 0.25), 3)
artist_summary.reset_index(inplace=True)
artist_summary.rename(columns={'index': 'artiste'}, inplace=True)
artist_summary.sort_values(by='coef', ascending=False, inplace=True)
artist_summary

Unnamed: 0,artist,nombre_de_titres,nombre_de_favoris,coef
0,milet,44,13.0,7.65
1,League of Legends,24,9.0,4.65
2,MIYAVI,24,7.0,4.15
4,Shayfer James,14,6.0,2.90
5,NateWantsToBattle,14,5.0,2.65
...,...,...,...,...
627,Sheet Music Boss,1,0.0,0.10
626,Ari Pulkkinen,1,0.0,0.10
625,C418,1,0.0,0.10
624,Robert White,1,0.0,0.10


In [8]:
# Create a dictionary to map artist names to their order in artist_summary
artist_order = {artist: i for i, artist in enumerate(artist_summary['artist'])}

# Define a function to get the minimum order of artists in a track
def get_min_artist_order(row):
  artist_orders = [artist_order.get(row[artist], float('inf')) for artist in artist_columns if row[artist] in artist_order]
  return min(artist_orders) if artist_orders else float('inf')

# Apply the function to df_tracks to get the order for each track
df_tracks['artist_order'] = df_tracks.apply(get_min_artist_order, axis=1)

# Sort df_tracks by the artist_order
df_tracks_sorted = df_tracks.sort_values(by=['artist_order', 'album']).drop(columns='artist_order')

df_tracks_sorted.head()

Unnamed: 0,id,title,album,duration,release_date,rank,bpm,gain,artist_1,artist_2,...,in_CUD,in_ONE,in_ADD,in_RCN,in_SGM,in_VCL,in_LRB,in_FAV,in_JFA,in_EFA
120,2383115925,Living My Life,5am,225,2023-08-30,73926,0.0,-7.6,milet,,...,,,,,,,,,,
300,2383115935,Noel In July,5am,286,2023-08-30,88865,0.0,-9.8,milet,,...,,,,,,,,,,
301,2383115985,Hey Song,5am,211,2023-08-30,114814,0.0,-8.5,milet,,...,,,,,,,,True,True,
302,2383115995,Flare,5am,234,2023-08-30,51443,0.0,-7.9,milet,,...,,,,,,,,,,
303,2383116025,HELL CLUB,5am,230,2023-08-30,72351,0.0,-8.1,milet,,...,,,,,,,,,,
