In [38]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from datetime import date
import ipywidgets as widgets #https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916

### Initialize
1. Get a temporary OAuth token from Spotify at this [link]("https://developer.spotify.com/console/get-playlist/?playlist_id=37i9dQZEVXbLiRSasKsNU9&market=&fields=&additional_types="), then copy it into the `TOKEN` variable below. Make sure the token is surrounded by quotes ("")

In [39]:
TOKEN = "BQA5HS1uDbFQGCSvLuyXPeiJ7ndSRaeHQXmtMV4YbRYxsUFpuQbcuRevKf5sCuIsoJZ1FVPpF-YPVxhUVcNRJSrz3n1NM0GKMVtXSkgs-Ex5qLVlgY9_LvGjkNLd4XhkgVDtgKloAPt0HwL_UyrEFqYCYGsEKg9nxD3hcUTb"

### Album
2. Create the necessary helper functions (`extract_album`, `extract_album_len`, `extract_album_artists`) that will extract specific characteristics from a the album each track is a part of.
3. Create `df_album` so that all the specified album attributes (defined within the function) can be extracted.

In [2]:
def extract_album(json, key):
    return [json[i].get("track").get("album").get(key) for i in range(len(json))]
def extract_album_len(json, key):
    return [len(json[i].get("track").get("album").get(key))for i in range(len(json))]
def extract_album_artists(json, key="artists"):
    artists = []

    for i in range(len(json)):
        x = json[i].get("track").get("album").get("artists")
        art = [elem.get("name") for elem in x]
        artists += [art]
    return artists

def df_album(df, tracks):
    basic_album_keys = ["id", "name", "release_date", "total_tracks"]
    for key in basic_album_keys:
        df["album, " + key] = extract_album(tracks, key)

    len_keys = ["artists", "available_markets"]
    for key in len_keys:
        df["album, len " + key] = extract_album_len(tracks, key)
    df["album, artists"] = extract_album_artists(tracks)
    
    return df

### Track
4. Create the necessary helper functions (`extract_track`, `extract_track_artists`) to extract specific characteristics about the track itself.
5. Create `df_tracks` that will call the above defined helper functions so that the relevant track information can be extracted.

In [3]:
def extract_track(json, key):
    return [json[i].get("track").get(key) for i in range(len(json))]
def extract_track_artists(json, key="artists"):
    artists = []

    for i in range(len(json)):
        x = json[i].get("track").get("artists")
        art = [elem.get("name") for elem in x]
        artists += [art]
    return artists

def df_tracks(df, tracks):
    basic_track_keys = ["name", "popularity", "duration_ms", "explicit"]
    for key in basic_track_keys:
        df["track, " + key] = extract_track(tracks, key)

    df["track, artists"] = extract_track_artists(tracks)
    return df
#df = df_tracks(df, top_tracks)

### Post-Processing
6. Create `date_process` to reformat the release_date into datetime (and not simply a string).
7. Create `date_today_diff`, the positive difference between today's date and the album release date.

In [4]:
def date_process(df):
    dates = [datetime.strptime(d, '%Y-%m-%d') for d in df["album, release_date"]]
    dates = pd.Series(dates)
    dates = dates.dt.date

    df["album, release_date, datetime"] = dates
    return df

def date_today_diff(df):
    today = date.today()
    diff = [(today-df["album, release_date, datetime"][i]).days for i in range(len(df))]
    df["Release/Today Day difference"] = diff
    return df

#df = date_process(df)
#df = date_today_diff(df)

### Visualization
8. Create the following functions (`hist`, `cat`, `dist`, `reg`, `pie`, `explicit`) to visualize the data

In [32]:
def hist(df, bins=8):
    df[["Release/Today Day difference"]].plot.hist(bins=bins);
    plt.title("The Difference between Release Date and Today")
    plt.xlabel("Days")
    plt.ylabel("Count")
    plt.show()
def cat(df, jitter=True):
    sns.catplot(data=df,
        x="album, len artists",
        y="track, popularity",
        jitter=jitter);
    plt.title("Correlation between Album Artist Count and Track Popularity (Jittered)")
    plt.xlabel("Album Artist Count")
    plt.ylabel("Track Popularity")
    plt.show()
def dist(df):
    sns.distplot(a=df["track, popularity"], hist=True);
    plt.axvline(np.mean(df["track, popularity"]),
                linestyle='solid',
                linewidth=1,
                color="green",
                label="mean")
    plt.axvline(np.median(df["track, popularity"]),
                linestyle='solid',
                linewidth=1,
                color="red",
                label="median")
    plt.title("Distribution of Track Popularities")
    plt.xlabel("Track Popularity")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show()
def reg(df, jitter=0.2):
    sns.regplot(data=df,
               x="album, len available_markets",
               y="track, popularity",
               ci=None,
               x_jitter=jitter,
               scatter_kws={'alpha':0.3});
    plt.title("Correlation between Album Market Availibility and Track Popularity (Jittered)")
    plt.xlabel("Number of Countries Album is Availible In")
    plt.ylabel("Track Popularity")
    plt.show();
def pie(df):
    album_counts = df["album, name"].value_counts()

    #determine threshold
    threshold = 0
    if max(album_counts) > np.median(album_counts):
        threshold = np.median(album_counts) + 1
    elif min(album_counts) < np.median(album_counts):
        threshold = np.median(album_counts) - 1
    else:
        threshold = np.median(album_counts)

    #seperate into those that fail/meet threshold
    less_threshold = album_counts < threshold
    meet_threshold = album_counts >= threshold
    sum(less_threshold)

    #process the data
    OTHER_ = pd.Series([sum(less_threshold)], index=["OTHER"])

    top_albums = album_counts[meet_threshold].append(OTHER_)
    top_albums_df = pd.DataFrame(top_albums)
    top_albums_df = top_albums_df.rename(columns={0:"album, name"})

    #determine exploding array
    explode = []
    for i in range(len(top_albums_df)):
        explode.append(top_albums_df.iloc[i]["album, name"] == max(album_counts))
    explode = pd.Series(explode)/7

    #plot the data
    plt.figure(figsize=(9, 7), dpi=80)
    plt.pie(top_albums_df["album, name"],
            labels=top_albums_df.index,
            autopct='%1.1f%%',
            counterclock=False,
            startangle=90,
            pctdistance=0.85,
            explode=explode);
    plt.title("Album Frequency Among Tracks")
    plt.show()
def explicit(df):
    plt.figure(figsize=(7, 5), dpi=80)

    explicit = df[df["track, explicit"] == True]["track, popularity"]

    sns.distplot(explicit, label="Explicit",
                hist=False)
    plt.axvline(np.mean(explicit),
                linestyle='solid',
                linewidth=1,
                color="blue",
                label="Explicit Mean")

    not_explicit = df[df["track, explicit"] == False]["track, popularity"]
    sns.distplot(not_explicit, label="Not Explicit",
                hist=False)
    plt.axvline(np.mean(not_explicit),
                linestyle='solid',
                linewidth=1,
                color="Red",
                label="Non-Explicit Mean")

    plt.title("Track Popularity Distribution on Explicit-ness")
    plt.xlabel("Track Popularity")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show();

In [33]:
#sns.displot(df, x="track, popularity", hue="album, len artists")

### Main
9. Create `playlist_category_id`, a DICTIONARY that connects playlist names with their corresponding id
10. Create `playlist_main` to process the data. It returns a dictionary of relevant details (category, playlist url, playlist name, and number of followers) AND a dataframe with relevant details, post-process.
11. Create `playlist_visual` to visualize the data. It takes as an input the dataframe returned by `playlist_main`
12. Create `main`, which calls `playlist_main` and `playlist_visual`. It returns the dataframe returned by `playlist_main`. NOTE: the token it takes as a input shold be refreshed each time to guarantee it does not time out.

In [34]:
playlist_category_id = {
    "viral_50_global": "37i9dQZEVXbLiRSasKsNU9",
    "top_50_global": "37i9dQZEVXbMDoHDwVN2tF",

    "viral_50_usa": "37i9dQZEVXbKuaTI1Z1Afx",
    "top_50_usa": "37i9dQZEVXbLRQDuF5jeBp",

    "top_50_vietnam": "37i9dQZEVXbLdGSmz6xilI",

    "top_50_japan": "37i9dQZEVXbKXQ4mDTEBXq",

    "top_50_argentina": "37i9dQZEVXbMMy2roB9myp",

    "top_50_indonesia": "37i9dQZEVXbObFQZ3JLcXt",

    "top_50_india": "37i9dQZEVXbLZ52XmnySJg",

    "top_50_uk": "37i9dQZEVXbLnolsZ8PSNw",  #(has 2004)
    "top_50_spain": "37i9dQZEVXbNFJfN1Vw8d9",
    #"top_50_australia": "37i9dQZEVXbJPcfkRz0wJ0", #(has 2004)
    "top_50_turkey": "37i9dQZEVXbIVYVBNw9D5K",
    "top_50_france": "37i9dQZEVXbIPWwFssbupI",
    "top_50_taiwan": "37i9dQZEVXbMnZEatlMSiu",
    "top_50_brazil": "37i9dQZEVXbMMy2roB9myp",
    "top_50_germany": "37i9dQZEVXbMMy2roB9myp",
    "top_50_turkey": "37i9dQZEVXbIVYVBNw9D5K",
    #"top_50_australia": "37i9dQZEVXbJPcfkRz0wJ0", #(has 2004)
    "top_50_sweden": "37i9dQZEVXbLoATJ81JYXz",
    "top_50_netherlands": "37i9dQZEVXbKCF6dqVpDkS",
    "top_50_turkey": "37i9dQZEVXbIVYVBNw9D5K"

    #"top_50_mexico": "37i9dQZEVXbO3qyFxbkOE1", #has a weird 1993 year
    #"50s_party": "37i9dQZF1DWSwFS0Z6E1ep" #has YEAR-MO
}

def playlist_main(token, category="50_global_top"):
    #determine id that corresponds to category
    
    print("The following visualizations will be about " + category + ".")
    id_ = playlist_category_id.get(category, "ERROR")
    
    #create token   
    resp = requests.get(
        url = 'https://api.spotify.com/v1/playlists/' + id_,
        headers = {
            'Authorization': 'Bearer '+ token
        }
    )
    r = resp.json()
    
    tracks = r.get('tracks').get("items")
        
    #create dataframes
    df = pd.DataFrame()
    df = df_album(df, tracks)
    df = df_tracks(df, tracks)
    
    #date post-processing
    #display(df)
    df = date_process(df)
    df = date_today_diff(df)
    
    playlist_dict = {
        "category": category,
        "url": r.get("external_urls").get("spotify"),
        "name": r.get("name"),
        "followers": r.get("followers").get("total")
    }
    
    return playlist_dict, df

def playlist_visual(df):
    hist(df)
    #cat(df)
    dist(df)
    explicit(df)
    reg(df)
    pie(df)

In [35]:
def main(token, category, return_df = False):
    playlist_dict, df_50 = playlist_main(token, category)
    playlist_visual(df_50)
    if return_df:
        return df_50

In [36]:
#CATEGORY = "top_50_turkey"
#df = main(TOKEN, CATEGORY, False)

playlist_dict


category = "viral_50_global"
token=TOKEN
playlist_category_id = {
    "viral_50_global": "37i9dQZEVXbLiRSasKsNU9",
    "top_50_global": "37i9dQZEVXbMDoHDwVN2tF",

    "viral_50_usa": "37i9dQZEVXbKuaTI1Z1Afx",
    "top_50_usa": "37i9dQZEVXbLRQDuF5jeBp",

    "top_50_vietnam": "37i9dQZEVXbLdGSmz6xilI",

    "top_50_japan": "37i9dQZEVXbKXQ4mDTEBXq",

    "top_50_argentina": "37i9dQZEVXbMMy2roB9myp",

    "top_50_indonesia": "37i9dQZEVXbObFQZ3JLcXt",

    "top_50_india": "37i9dQZEVXbLZ52XmnySJg"

    #"top_50_mexico": "37i9dQZEVXbO3qyFxbkOE1", #has a weird 1993 year
    #"50s_party": "37i9dQZF1DWSwFS0Z6E1ep" #has YEAR-MO
}
print(category)
id_ = playlist_category_id.get(category, "ERROR")

#create token   
resp = requests.get(
    url = 'https://api.spotify.com/v1/playlists/' + id_,
    headers = {
        'Authorization': 'Bearer '+ token
    }
)
r = resp.json()

tracks = r.get('tracks').get("items")

tracks[0].get("track").get("explicit")

In [37]:
# SOURCE: https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916

def unique_sort_list(array):
    unique = array.unique().tolist()
    unique.sort()
    return unique

dropdown = widgets.Dropdown(options = unique_sort_list(pd.Series(list(playlist_category_id.keys()))))

def dropdown_event(change):   
    output.clear_output()
    with output:
        main(TOKEN, change.new, False)

dropdown.observe(dropdown_event, names='value')

output = widgets.Output()

display(dropdown)
display(output)

Dropdown(options=('top_50_argentina', 'top_50_brazil', 'top_50_france', 'top_50_germany', 'top_50_global', 'to…

Output()

TOKEN = 'BQDrbDT4kLhwctaiXijkqtYSh0-3T6MD03xre8sRq2EEzk-QipMkF0AefTKzt6fuQlIzharfpiY-SYa2bm77zcLYc4dUX2uL1h6VkCgaWScVokeAwe3exXp5hKm6iCEReUlz7sShaX7Rj0JYfHYLfaGlfBW4mItlVegJ90wx'
df_50 = playlist_main(token = TOKEN, category="top")
playlist_visual(df_50)

TOKEN = 'BQBR0Uls74aPKmCyi8ww5jyRiVVDnR3mNrYrzI-MLMDsgqDzOPUrE6IupyXKUZM7Ni7qkm7b5w0T-jOfApmEBYKY0aK3JvZCcCneZ0ZoVOi15ZEBJgMOPDX8-qd5YFxgwkonWgCf6-ycOdTgVrEP3ELzXtMvez4KIYG49vnt'
BASE_URL = 'https://api.spotify.com/v1/'
headers = {
    'Authorization': 'Bearer '+ TOKEN
}

console = 'artists'
_id = '2CIMQHirSU0MQqyYHq0eOx%2C57dN52uHvrHOxijzpIgu3E%2C1vCWHaC5f2uS3yhpwWbIA6'



resp = requests.get(
    url = 'https://api.spotify.com/v1/playlists/37i9dQZEVXbLiRSasKsNU9',#viral 50
    #url = 'https://api.spotify.com/v1/playlists/37i9dQZEVXbMDoHDwVN2tF',#top 50
    headers = headers
)
r = resp.json()
top_tracks = r.get('tracks').get("items")

url = r.get("external_urls").get("spotify")
name = r.get("name")
followers = r.get("followers").get("total")


url, name, followers

https://open.spotify.com/playlist/37i9dQZEVXbLiRSasKsNU9

https://developer.spotify.com/console/get-playlist/?playlist_id=37i9dQZEVXbLiRSasKsNU9&market=&fields=&additional_types=

For all the playlists:

https://open.spotify.com/search/top%2050/playlists