# Algorithm for sort music songs 

## Libraries and dependencies

In [1]:
import pandas as pd

In [2]:
import musicbrainzngs

In [3]:
import re

## Data Ingestion

In [4]:
path = 'data/spotify_data.csv'

In [5]:
# read a csv 
df = pd.read_csv(path)

In [6]:
df

Unnamed: 0,id,name,artists,album,album_id,album_artists,album_release_date,duration_ms,popularity,explicit,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,2iUXsYOEPhVqEBwsqP70rE,Youngblood,['5 Seconds of Summer'],Youngblood (Deluxe),2D0Hi3Jj6RFnpWDcSa0Otu,['5 Seconds of Summer'],2018-06-15,203417,82,False,...,7,-5.114,0,0.4630,0.016900,0.000000,0.1240,0.1520,120.274,4
1,2v5lLKdZG0PsXGWfvigk55,Rewind.. (But I Love You),['WHIPPED CREAM'],Someone You Can Count On,5wj4dMktjUCPq8BfsQCMR2,['WHIPPED CREAM'],2023-03-08,173609,60,False,...,6,-8.346,0,0.0418,0.024800,0.795000,0.0821,0.0381,130.034,4
2,5jA67K9o5mEW5NzjRCdAUg,Breath,['Elohim'],Breath,4ZjGSYAVcd7TbZfFNUFWM8,['Elohim'],2023-04-07,200339,51,False,...,0,-3.569,1,0.0589,0.045000,0.000733,0.1000,0.2740,118.039,4
3,4461Ozpndhv2AjNqe6d0Ic,Forever - Pauline Herr Rework,"['Alison Wonderland', 'Pauline Herr']",Loner (Remixes),0MOgYRLnYJNdSd8EsqT4dJ,['Alison Wonderland'],2023-03-16,170095,48,False,...,11,-6.846,0,0.0328,0.198000,0.000003,0.3490,0.4280,139.787,4
4,46bI9wmq6kDJJ3yAqTvkzH,BACK ONLINE,"['MEMBA', 'pluko', 'EVAN GIIA', 'Biicla']",BACK ONLINE,4Jo971sLoS2lO4bf9hKe2j,"['MEMBA', 'pluko', 'EVAN GIIA']",2023-02-20,217846,52,False,...,4,-5.787,0,0.0710,0.145000,0.025400,0.0914,0.4280,130.031,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,5yyEGZbHFcFAXo9V8J79Od,Slow Dive,['CLAVVS'],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,['CLAVVS'],2020-02-25,231143,43,False,...,10,-6.101,0,0.0315,0.200000,0.000067,0.1220,0.6000,140.015,4
558,02qBlMtpcv3t4cKhFjilk5,Devils I Know,['CLAVVS'],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,['CLAVVS'],2020-02-25,215092,28,False,...,6,-3.616,0,0.0398,0.124000,0.010500,0.1590,0.6320,156.113,4
559,5KhZNvQ9eXo53ZKeQE8G2b,All Your Friends,"['Icarus', 'Quelle T']",All Your Friends,3eKaMTFMWyFwKIzOKCdJEo,"['Icarus', 'Quelle T']",2020-10-16,225306,20,False,...,5,-6.865,0,0.0498,0.256000,0.169000,0.1980,0.3750,98.001,4
560,4Bif5TdVlzTiEff3hTwS5W,Enter the Dreamland,['Meresha'],Look How Far,4bHyxz0m1csBnNZD1Ta1Jt,['Meresha'],2020-09-25,239666,26,False,...,5,-4.240,0,0.0354,0.000421,0.006720,0.1590,0.3090,119.997,4


### Getting Track name and artist name from Spotify data

In [7]:
# get track name and artist name from Spotify data
track_name = df['name']
artist_name = df['artists']

# now define a list called tracks with the next format:
# tracks = [{'title': 'track name', 'artist': 'artist name'}, ...]
tracks = []
for i in range(len(track_name)):
    tracks.append({'title': track_name[i], 'artist': artist_name[i]})

## MusicBrainz API 

In [8]:
# get the musicbrainz id for each track
musicbrainzngs.set_useragent('Example music app', '0.1', 'http://example.com/music')

### Getting IDs from MusicBrainz API

In [9]:
# Get MusicBrainz IDs for each track
for track in tracks:
    result = musicbrainzngs.search_recordings(
        query=track["title"],
        artist=track["artist"],
        limit=1
    )
    #print(result)
    track["mbid"] = result["recording-list"][0]["id"]

In [10]:
track_ids = [track["mbid"] for track in tracks]
track_info_list = []

### Getting Genre data

In [11]:
track_info_list = []

for track in tracks:
    recording = musicbrainzngs.get_recording_by_id(
        track["mbid"],
        includes=["artists", "tags"]
    )["recording"]

    genres = []
    if "artist-credit" in recording:
        artist_credit = recording["artist-credit"]
        if isinstance(artist_credit, list) and len(artist_credit) > 0:
            artist = artist_credit[0].get("artist", {})
            if "tag-list" in artist:
                tags = artist["tag-list"]
                print(tags)
                genres = re.findall(r"'name': '(.*?)'}", str(tags))
                   
    track_info = {
        "title": recording["title"],
        "artist": recording.get("artist-credit-phrase", ""),
        "genres": genres   
    }
    track_info_list.append(track_info)


[{'count': '2', 'name': '2010s'}, {'count': '2', 'name': 'pop'}, {'count': '4', 'name': 'pop punk'}, {'count': '8', 'name': 'pop rock'}, {'count': '1', 'name': 'power pop'}, {'count': '3', 'name': 'teen pop'}]
[{'count': '1', 'name': 'edm'}, {'count': '1', 'name': 'electronic'}]
[{'count': '1', 'name': 'warp'}]
[{'count': '1', 'name': 'dance-pop'}, {'count': '1', 'name': 'electronic'}, {'count': '1', 'name': 'hip hop'}, {'count': '1', 'name': 'pop rap'}, {'count': '1', 'name': 'synthpop'}]
[{'count': '1', 'name': '2020s'}, {'count': '1', 'name': 'dance'}, {'count': '1', 'name': 'edm'}, {'count': '1', 'name': 'electro house'}, {'count': '1', 'name': 'electronic'}, {'count': '2', 'name': 'electronic dance'}, {'count': '1', 'name': 'future bass'}, {'count': '2', 'name': 'house'}]
[{'count': '1', 'name': 'hard rock'}, {'count': '1', 'name': 'heavy metal'}, {'count': '3', 'name': 'instrumental rock'}, {'count': '2', 'name': 'metal'}, {'count': '3', 'name': 'neo-classical metal'}, {'count': 

In [12]:
df_track_info = pd.DataFrame(track_info_list)

In [18]:
df_track_info

Unnamed: 0,title,artist,genres
0,Youngblood,5 Seconds of Summer,"'pop', 'pop punk', 'pop rock', 'power pop'"
1,Rewind.. (But I Love You),WHIPPED CREAM,
2,Breath...Breath,Dale Cornelius,
3,Dodgeball (Lightyear remix),Pauline Herr,
4,Back Online,DJ Outblast,
...,...,...,...
557,Slow Dive,CLAVVS,
558,Devils I Know,CLAVVS,
559,All Your Friends,One Man Army,
560,Enter the Dreamland,Meresha,


In [16]:
# from genres drop frist and alast chracter of each string 
# convert genres to string
df_track_info['genres'] = df_track_info['genres'].astype(str)
df_track_info['genres'] = df_track_info['genres'].str[1:-1]
df_track_info.head(10)

Unnamed: 0,title,artist,genres
0,Youngblood,5 Seconds of Summer,"'pop', 'pop punk', 'pop rock', 'power pop'"
1,Rewind.. (But I Love You),WHIPPED CREAM,
2,Breath...Breath,Dale Cornelius,
3,Dodgeball (Lightyear remix),Pauline Herr,
4,Back Online,DJ Outblast,
5,You and Me,K. Hand,
6,U‐Huh,Tkay Maidza,"'electronic', 'hip hop', 'pop rap'"
7,Thirst (Hex Cougar remix),Alison Wonderland,"'dance', 'edm', 'electro house', 'electronic',..."
8,falling,chris†††,
9,Missing U,Divine,


In [17]:
# count all the rows where genres is " " 
df_track_info['genres'].value_counts()

genres
                                                                                                                  379
'dubstep', 'electro house'                                                                                         26
'2020s', 'contemporary r&b', 'english', 'hip hop', 'pop rap'                                                        7
'dubstep', 'edm'                                                                                                    6
'electronic'                                                                                                        5
                                                                                                                 ... 
'electronic', 'electropop', 'future bass'                                                                           1
'indie pop', 'indietronica', 'nuno', 'synth-pop'                                                                    1
'edm', 'electro house', 'future house', 'house'  