# Music Popularity Analysis

<hr style="border:2px solid black"> </hr>

## Notebook 04 - last.fm API

---

### Import libraries

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import requests
import sys

In [2]:
def num_uniques(ser):
    try:
        return len(ser.unique())
    except:
        return "Not unique check-able"

In [3]:
def summarize_df(df):
    print("======DATA SUMMARY======")
    print("{} rows by {} columns".format(df.shape[0], df.shape[1]))
    print("\n======COLUMNS======")
    print(df.dtypes)
    print("\n======PREVIEW======")
    display(df.head())
    print("\n======NUMERICAL COL SUMMARY======")
    print(df.describe())
    print("\n")
    for col in df.columns:
        print("{}: {} unique values".format(col, num_uniques(df[col])))

---
### Read in data

In [4]:
mb_db_songs = pd.read_parquet('mb_db_songs_sample.parquet')

In [5]:
summarize_df(mb_db_songs)

90191 rows by 14 columns

track_id               int64
track_gid             object
recording_id           int64
medium_id              int64
track_name            object
recording_gid         object
isrc                  object
release_id             int64
release_gid           object
release_name          object
artist_credit_id       int64
barcode               object
artist_credit_name    object
date_year              int64
dtype: object



Unnamed: 0,track_id,track_gid,recording_id,medium_id,track_name,recording_gid,isrc,release_id,release_gid,release_name,artist_credit_id,barcode,artist_credit_name,date_year
5138944,30694977,f315f0e6-8b85-4af2-b18b-6015e552d422,3485200,2870106,What the World Needs Now Is Love (Dionne Sings...,e852fa84-c2be-4104-90db-8ef118566f28,USCMG9800120,2643114,56d792d8-2482-43d8-a552-8ab13566a106,She’s Back,16172,634164608521,Dionne Warwick,2019
6379854,28274156,fed2cf3e-2053-4282-ae8a-aca89ded4027,24794010,2626676,なんてこった,b51f01d5-fce5-44ae-ad0b-b32ac9a54e0b,JPVI01900906,2424086,5c8fd7dd-e054-44b3-b7d4-1b422b8c344a,TVアニメ「私に天使が舞い降りた！」サウンドコレクション,1821450,4580325327977,伊賀拓郎,2019
7784570,36445759,52519c42-73c9-468b-b44a-6ff5ad88879b,30688971,3506956,"La nemica d’amore fatta amante, serenata à 3: ...",5cc269f2-b522-4b44-b568-e3d3e3d3e002,FR1P70201730,3218908,3fd04337-6682-4cf8-8e7c-d5330c7c933c,La nemica d’amore fatta amante,3124554,3760014196119,"Bononcini; Adriana Fernández, Martín Oro, Furi...",2021
7674635,33863813,6efedc41-368f-461b-b83c-0fcd2b1b2165,28792342,3214362,Chill Your Eyes,a04503c7-72ff-4ffc-9538-9e79a95564ec,DEW872001575,2953727,bcb6d88c-288a-48e0-a367-385c5fbbc879,"Coffee Bar Lounge, Vol. 20",1,4251794433321,Various Artists,2020
6516875,29738294,e467ccc6-dcc1-461c-82e4-69a10dcc7f4b,25854890,2774306,Ouverture,7133e0a5-6c17-4478-92a5-124baf17e4b6,CAA4H1903001,2557577,56cd6377-bc3e-4f26-8300-98bc1f879263,De temps et de vents,1599253,3616402095765,Bodh’aktan,2019



           track_id  recording_id     medium_id    release_id  \
count  9.019100e+04  9.019100e+04  9.019100e+04  9.019100e+04   
mean   3.316650e+07  2.374400e+07  3.145906e+06  2.892086e+06   
std    2.983588e+06  9.124852e+06  3.241751e+05  2.928624e+05   
min    1.421060e+05  2.000000e+01  9.396420e+05  9.396420e+05   
25%    3.073723e+07  2.450280e+07  2.874212e+06  2.644973e+06   
50%    3.319871e+07  2.690493e+07  3.140600e+06  2.886407e+06   
75%    3.575578e+07  2.937638e+07  3.425700e+06  3.144711e+06   
max    3.808750e+07  3.184534e+07  3.700229e+06  3.395437e+06   

       artist_credit_id     date_year  
count      9.019100e+04  90191.000000  
mean       1.209567e+06   2019.971095  
std        1.172992e+06      0.933358  
min        1.000000e+00   2019.000000  
25%        5.638000e+03   2019.000000  
50%        9.341290e+05   2020.000000  
75%        2.405504e+06   2021.000000  
max        3.256603e+06   2121.000000  


track_id: 89285 unique values
track_gid: 89285 uniq

In [6]:
api_key = '6b198bd00d4f795ff68eadf6054ebe60'

In [7]:
mbids = mb_db_songs['release_gid'].unique()

In [117]:
rel_name_artist = []
j = 0
i = 0

In [None]:
# Initialize from file

In [124]:
for j in range(j, len(mbids)):
    mbid = mbids[j]
    
    retries = 1
    success = False
    while not success:
        try:
            r = requests.get('https://ws.audioscrobbler.com/2.0/?method=album.getInfo&api_key={}&mbid={}&format=json'.format(api_key, mbid))
            success = True
        except Exception as e:
            wait = retries * 30;
            print(f'Error! Waiting {wait} secs and re-trying...')
            sys.stdout.flush()
            time.sleep(wait)
            retries += 1
    
    if 'album' in list(r.json().keys()):
        if 'tracks' in list(r.json()['album'].keys()):
            i += 1
            if type(r.json()['album']['tracks']['track']) == dict:
                track = r.json()['album']['tracks']['track']
                rel_name_artist.append([mbid, track['name'], r.json()['album']['artist'], track['url'].split('/')[-1], track['url'].split('/')[-3], track['url'], track['artist']['name']])
            else:
                for track in r.json()['album']['tracks']['track']:
                    rel_name_artist.append([mbid, track['name'], r.json()['album']['artist'], track['url'].split('/')[-1], track['url'].split('/')[-3], track['url'], track['artist']['name']])
    time.sleep(np.random.random()*0.1)
    if j % 100 == 0:
        print("{} albums searched, {} albums found, {} songs found".format(j, i, len(rel_name_artist)))
    j += 1

39700 albums searched, 33893 albums found, 461319 songs found
39800 albums searched, 33982 albums found, 462207 songs found
39900 albums searched, 34070 albums found, 463040 songs found
40000 albums searched, 34155 albums found, 463778 songs found
40100 albums searched, 34241 albums found, 464588 songs found
40200 albums searched, 34330 albums found, 465434 songs found
40300 albums searched, 34414 albums found, 466101 songs found
40400 albums searched, 34505 albums found, 466984 songs found
40500 albums searched, 34586 albums found, 467856 songs found
40600 albums searched, 34666 albums found, 468740 songs found
40700 albums searched, 34758 albums found, 469638 songs found
40800 albums searched, 34845 albums found, 470369 songs found
40900 albums searched, 34930 albums found, 471092 songs found
41000 albums searched, 35017 albums found, 471975 songs found
41100 albums searched, 35107 albums found, 472811 songs found
41200 albums searched, 35196 albums found, 473642 songs found
41300 al

In [125]:
pd.DataFrame(rel_name_artist, columns=['release_gid', 'track_name', 'artist_name', 'track_name_url', 'artist_name_url', 'url', 'album_artist_name']).to_csv('rel_name_artist.csv')