In [2]:
import requests
import datetime
import base64
import re
import pandas as pd
import traceback
import logging
import math
import os

In [3]:
from config import client_id, client_secret

In [4]:
client_creds = f"{client_id}:{client_secret}"
type(client_creds)

str

In [5]:
client_creds_b64 = base64.b64encode(client_creds.encode())
type(client_creds_b64)

bytes

In [6]:
token_url = "https://accounts.spotify.com/api/token"
method = "POST"
token_data = {
    "grant_type": "client_credentials"
}
token_headers = {
    "Authorization": f"Basic {client_creds_b64.decode('utf-8')}",
    "Content-Type": 'application/x-www-form-urlencoded' # <base64 encoded client_id:client_secret>
}

In [7]:
r = requests.post(token_url, data=token_data, headers=token_headers)
json = r.json()
token = json['access_token']
valid_request = r.status_code in range(200, 299)

In [8]:
# Test if your access token has expired (it only lasts 60 minutes, 
# then you have to run cells to generate new one)
if valid_request:
    token_response_data = r.json()
    now = datetime.datetime.now()
    access_token = token_response_data['access_token']
    expires_in = token_response_data['expires_in'] # seconds
    expires = now + datetime.timedelta(seconds=expires_in)
    did_expire = expires < now

In [9]:
# Function for getting request from Spotify
# You will have to edit the URL for querying playlist, artist, album, user info, etc.
def getRequest(url, token):
    data = requests.get(url, headers={"Accept": "application/json",
                                   "Content-Type": "application/json",
                                   "Authorization": f"Bearer {token}"})
    return data.json()

In [10]:
def getArtistPlaylistURIs(playlists_json):
    num_playlists = len(playlists_json['items'])
    URIs = []
    for i in range(num_playlists):
        URIs.append(playlists_json['items'][i]['id'])
    return URIs

In [11]:
def getURI(uri_list):
#     p = re.compile(r'(?:spotify:playlist:)(\w{22})')
    p = re.compile(r'(?:spotify:(?:album|playlist|artist|track):)(\w{22})')
    URIs = [p.match(item).group(1) for item in uri_list]
    return URIs

In [12]:
# playlist has to be in the form of json data from playlist query
def getSongInfo(playlist):
    length = len(playlist['items'])
    band_names = []
    band_ids = []
    song_names = []
    release_dates = []
    popularity = []
    uri_list = []
    for i in range(len(playlist['items'])):
        band_names.append(playlist['items'][i]['track']['artists'][0]['name'])
        band_ids.append(playlist['items'][i]['track']['artists'][0]['id'])
        song_names.append(playlist['items'][i]['track']['name'])
        release_dates.append(playlist['items'][i]['track']['album']['release_date'])
        popularity.append(playlist['items'][i]['track']['popularity'])
        uri_list.append(playlist['items'][i]['track']['uri'])
        
    data = {
        "band_name":band_names,
        "band_uri":band_ids,
        "song_name":song_names,
        "release_date": release_dates,
        "popularity":popularity,
        "track_uri": getURI(uri_list)
    }
    
    df = pd.DataFrame(data)
    return df

In [13]:
def getPlaylists(user_id):
    # Configure the URL
    url = f"https://api.spotify.com/v1/users/{user_id}/playlists?offset=0&limit=50"

    # Grab all the user's playlists (maximum of 50, 
    # unfortunately due to Spotify's APIs)
    playlists = getRequest(url, token)

    # Create a list of playlist's URIs 
    playlist_URIs = getArtistPlaylistURIs(playlists)
    return playlist_URIs

In [14]:
# For every user in the list, grab their playlists
user_dict = {}
user_list = ['12121638402',"bcemcphilamy","1237116029","1246196398","atommyco"]
for user in user_list:
    playlists = getPlaylists(user)
    
    track_data = []
    for playlist_id in playlists: 
        url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
        json = getRequest(url, token)
        track_data.append(json)
    
    user_dict[user] = track_data

In [19]:
# The key is the user's username/id
# The value is a list of dictionaries, with each dictionary corresponding 
# to all the songs in a playlist
for i in user_list:
    print(f"There are {len(user_dict[i])} playlists for User {i}.")

There are 50 playlists for User 12121638402.
There are 16 playlists for User bcemcphilamy.
There are 20 playlists for User 1237116029.
There are 11 playlists for User 1246196398.
There are 50 playlists for User atommyco.


In [20]:
brigid = user_dict['bcemcphilamy'][:4]
for playlist in brigid:
    print(f"Number of songs is {len(playlist['items'])}")

Number of songs is 29
Number of songs is 19
Number of songs is 21
Number of songs is 19


In [21]:
brigid_playlists = user_dict['bcemcphilamy']
len(brigid_playlists)
len(brigid_playlists[0]['items'])

29

In [24]:
user_playlist = user_dict['12121638402']

def getPlaylistSongs(user_playlists, user):
    for i in range(len(user_playlists)):
        if i==0:
            try:
                df = getSongInfo(user_playlists[i])
            except (TypeError):
                pass
        else:
            try:
                df = df.append(getSongInfo(user_playlists[i]))
            except (TypeError):
                pass
    
    df['user'] = user
    return df

In [None]:
for user in user_list:
    user_playlist = user_dict[user]
    

In [25]:
gabby_df = getPlaylistSongs(user_playlist, "12121638402")
gabby_df

Unnamed: 0,band_name,band_uri,song_name,release_date,popularity,track_uri,user
0,Slow Pulp,2JFTRDi5v7JtqoouVe1z5D,In Too Deep,2021-09-15,49,76rOZ9F8s8mX9N3NhZBgxt,12121638402
1,Snail Mail,4QkSD9TRUnMtI8Fq1jXJJe,Valentine,2021-09-15,60,21brT2CZwKz8VPCf0nFxiP,12121638402
2,Coco,4NxTtZTeJqQ1n27i2qnPCe,Knots,2021-08-24,23,2uDlO65509ESfCy4s07pYT,12121638402
3,Kacey Musgraves,70kkdajctXSbqSMJbQO424,simple times,2021-09-10,68,1lNEXDlxVhsWaq2DLBUDgC,12121638402
4,Kacey Musgraves,70kkdajctXSbqSMJbQO424,breadwinner,2021-09-10,74,2stbjvpFujerfDUuUUPX7o,12121638402
...,...,...,...,...,...,...,...
95,Weezer,3jOstUTkEu2JkjvRdBA5Gu,El Scorcho,1996-09-24,0,1PB7gRWcvefzu7t3LJLUlf,12121638402
96,The Goo Goo Dolls,2sil8z5kiy4r76CRTXxBCA,Name - New Version,2007-11-06,37,6WrP9bvTeNLalVEtLCgyhm,12121638402
97,Green Day,7oPftvlwr6VrsViSDV7fJY,When I Come Around,1994-02-01,74,1Dr1fXbc2IxaK1Mu8P8Khz,12121638402
98,Fall Out Boy,4UXqAaa6dQYAk18Lv7PEgX,"Sugar, We're Goin Down",2005-01-01,0,4zjFqN9fXAw91GNgJOCYX6,12121638402


In [None]:
for user in user_list:
    # JSON is a list of dictionaries 
    json = user_dict[user]
    
    for 
    songs = getSongInfo(json)

In [None]:
# playlist_URIs_cleaned.remove("4W6yjf9ChXo9lHHVH9FERe")
for playlist_id in playlist_URIs_cleaned:
    
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
    playlist_json = getRequest(url, token)
    new_df = getSongInfo(playlist_json)
    
    try:
        df = df.append(new_df)
    except (TypeError, KeyError):
        print("'NoneType' object is not subscriptable")
        
print(df)

In [None]:
print(df.duplicated().sum())

In [None]:
track_json = getTrackInfo(df)

In [None]:
track_json[21]['audio_features'][99]

In [None]:
len(track_json)

In [None]:
tracks = getTrackInfo(df)

In [None]:
tracks

In [None]:
def getArtistJSON(df):   
    band_uris = list(df['band_uri'])
    uris = list(dict.fromkeys(band_uris))

    strings = []
    iterations = math.ceil(len(uris) / 50)
    for i in range(iterations):
        front_index = i*50
        end_index = front_index + 50
        uri_chunk = uris[front_index:end_index]
        joined_string = ",".join(uri_chunk)
        strings.append(joined_string)

    # joined_string = ",".join(uris)
    artists_json = []
    for i in strings:
        url = f"https://api.spotify.com/v1/artists?ids={i}"
        artists = getRequest(url, token)
        artists_json.append(artists)
    
    return artists_json

In [None]:
def artistDF(list_JSON):
    band_uris = []
    genres = []
    names = []
    followers = []
    for i in range(len(list_JSON)):
        num = len(list_JSON[i]['artists'])
        for j in range(num):
            genres.append(list_JSON[i]['artists'][j]['genres'])
            names.append(list_JSON[i]['artists'][j]['name'])
            band_uris.append(list_JSON[i]['artists'][j]['id'])
            followers.append(list_JSON[i]['artists'][j]['followers']['total'])
    
    data = {
        'band_uri': band_uris,
        'genres': genres,
        'names': names,
        'followers': followers
    }
    
    df = pd.DataFrame(data)
    return df

In [None]:
artists_json = getArtistJSON(df)

In [None]:
artists_df = artistDF(artists_json)

In [None]:
artists_df

In [None]:
gabby_join = df.join(artists_df.set_index('band_uri'), on='band_uri')

In [None]:
def getTrackInfo(df):
    track_uris = list(df['track_uri'])
    uris = list(dict.fromkeys(track_uris))
    
    strings = []
    iterations = math.ceil(len(uris) / 100)
    for i in range(iterations):
        front_index = i*100
        end_index = front_index + 100
        uri_chunk = uris[front_index:end_index]
        joined_string = ",".join(uri_chunk)
        strings.append(joined_string)

    track_json = []
    for i in strings:
        url = f"https://api.spotify.com/v1/audio-features?ids={i}"
        tracks = getRequest(url, token)
        track_json.append(tracks)
    
    danceability = []
    energy = []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness = []
    liveness = []
    valence = []
    tempo = []
    track_id = []
    duration_ms = []
    time_signature = []
    
    # Loop through all of the JSON lists
    for i in range(len(track_json)):
        for j in range(len(track_json[i]['audio_features'])):
            track = track_json[i]
            audioFeature = track['audio_features'][j]
            danceability.append(audioFeature['danceability'])
            energy.append(audioFeature['energy'])
            key.append(audioFeature['key'])
            loudness.append(audioFeature['loudness']) 
            mode.append(audioFeature['mode']) 
            speechiness.append(audioFeature['speechiness'])
            acousticness.append(audioFeature['acousticness']) 
            instrumentalness.append(audioFeature['instrumentalness']) 
            liveness.append(audioFeature['liveness']) 
            valence.append(audioFeature['valence']) 
            tempo.append(audioFeature['tempo']) 
            track_id.append(audioFeature['id']) 
            duration_ms.append(audioFeature['duration_ms']) 
            time_signature.append(audioFeature['time_signature']) 
            
    data = {
        'track_uri': track_id,
        'danceability': danceability,
        'energy': energy,
        'key': key,
        'loudness':loudness,
        'mode':mode,
        "speechiness":speechiness,
        "acousticness":acousticness,
        "instrumentalness": instrumentalness,
        "liveness":liveness,
        "valence":valence,
        "tempo": tempo,
        "duration_md":duration_ms,
        "time_signature":time_signature
    }
    
    df = pd.DataFrame(data)
    return df
    

In [None]:
tracks.head()

In [None]:
gabby_join = gabby_join.set_index('track_uri')

In [None]:
tracks = tracks.set_index('track_uri')

In [None]:
gabby_join.merge(tracks, on='track_uri', how='left')
gabby_join.describe()

In [None]:
def getUserPlaylist(user_id):
    # Configure the URL
    url = f"https://api.spotify.com/v1/users/{user_id}/playlists?offset=0&limit=50"

    # Grab all the user's playlists (maximum of 50, unfortunately due to Spotify's APIs)
    playlists = getRequest(url, token)

    # Create a list of playlist's URIs 
    playlist_URIs = getArtistPlaylistURIs(playlists)

    # Filter this list on just the URIs
    playlist_URIs_cleaned = getURI(playlist_URIs)
    print(len(playlist_URIs_cleaned))
    
    playlist_id = playlist_URIs_cleaned[0]
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
    playlist_json = getRequest(url, token)
    df = getSongInfo(playlist_json)
    
#     playlist_URIs_cleaned.remove("4W6yjf9ChXo9lHHVH9FERe")
    for playlist_id in playlist_URIs_cleaned[1:]:
    
        url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
        playlist_json = getRequest(url, token)
        new_df = getSongInfo(playlist_json)

        try:
            df = df.append(new_df)
        except (TypeError, KeyError):
            print("'NoneType' object is not subscriptable")
   
    return df

In [None]:
brigid_df = getUserPlaylist("bcemcphilamy")
brigid_artists_json = getArtistJSON(brigid_df)
brigid_artists_df = artistDF(brigid_artists_json)

In [None]:
brigid_join = brigid_df.join(brigid_artists_df.set_index('band_uri'), on='band_uri')
brigid_join

In [None]:
url = f"https://api.spotify.com/v1/me/top/artists"
top_tracks = getRequest(url, token)

In [None]:
brigid_join.to_csv("spotify_brigid.csv", sep = ',')

In [None]:
brigid_join['band_name'].value_counts()

In [None]:
string = ['country world', 'rock', 'chicago indie']
test = [word.split(' ') for word in string]
test

for sublist in test: 
    print(sublist)

In [None]:
# Get most common overarching genres
# 257 songs don't have assigned genres
overaching_genres = ['country','rock','pop','electronic','edm_dance','rnb','hiphop','kpop','latin','soul','jazz',
                    'classical']
genres = df_join['genres']
genres.iloc[0]

test = pd.DataFrame(item for item in genres)
# test = pd.DataFrame(genres.tolist())
# test[12].value_counts()
test

In [None]:
top_genres = []
for col in test.columns:
    top_genres.append(test[col].value_counts().nlargest(20))

top_genres

In [None]:
genre = ['rock','rock','pop']
df_test = pd.DataFrame({
    'genre': genre
})
print(df_test['genre'].value_counts())

In [None]:
# # loop through all the lists in artists_json
# # then, loop through all of the artists within the list
# genres = artists_json[0]['artists'][0]['genres']
# name = artists_json[0]['artists'][0]['name']
# uri_id = artists_json[0]['artists'][0]['id']
# followers = artists_json[0]['artists'][0]['followers']['total']

In [None]:
# len(artists_json[17]['artists'])

In [None]:
# i = "5oOhM2DFWab8XhSdQiITry"
# url = f"https://api.spotify.com/v1/artists/{i}"
# artist_json = getRequest(url,token)
# genres = artist_json['genres']
# artist_json

In [None]:

def getArtistGenre(artist_id_list):
    

In [None]:
# For each band_uri in the dataframe, grab artists' stats like the genre, popularity, etc. 

In [None]:
# get top 10 most frequent names
n = 10
df['band_name'].value_counts()[:n].index.tolist()

In [None]:
df['popularity'].mean()

In [None]:
# playlist_id = "4W6yjf9ChXo9lHHVH9FERe"
# url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
# playlist_json = getRequest(url, token)
# df
# try:
#     df = getSongInfo(playlist_json)
# except Exception as e:
#     logging.error(traceback.format_exc())
#     # Logs the error appropriately. 


In [None]:
user_id = "bcemcphilamy"
url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
brigids_playlists = getRequest(url,token)
brigids_playlists

In [None]:
# Madison Cunningham's playlists
user_id = "3cxtfy2f4gf5dqt194qfwgcx3"
url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
madison_cunningham = getRequest(url,token)
print("Madison Cunningham has: ", len(madison_cunningham['items']), "playlists.")

In [None]:
# Madison Cunningham's songs in her playlist
def getArtistPlaylistURIs(playlists_json):
    num_playlists = len(playlists_json['items'])
    URIs = []
    for i in range(num_playlists):
        URIs.append(playlists_json['items'][i]['uri'])
    return URIs

mc_list = getArtistPlaylistURIs(madison_cunningham)
mc_list

In [None]:
mc_list

In [None]:
regex = re.compile('(?:spotify:playlist:)(\w{22})')
new_mc_list = list(filter(regex.match, mc_list))
new_mc_list

In [None]:
list = ["guru99 get", "guru99 give", "guru Selenium"]
for element in list:
    z = re.match("^\w+", element)
if z:
    print((z.groups()))

In [None]:
new_mc_list

In [None]:
URIs = []
p = re.compile(r'(?:spotify:playlist:)(\w{22})')
for item in mc_list:
    m = p.match(item)
    uri = m.group(1)
    URIs.append(uri)
URIs

URIs = []
p = re.compile(r'(?:spotify:playlist:)(\w{22})')
URIs = [p.match(item).group(1) for item in mc_list]
# if m:
#     print('Match found: ', m.group(1))
# else:
#     print('No match')

In [None]:
URIs = [p.match(item).group(1) for item in mc_list]
URIs

In [None]:
import re

regex = r"(?:spotify:playlist:)(\w{22})"

test_str = ("spotify:playlist:5T4F9s5cizwJiI8EMNB3vv\n")

matches = re.finditer(regex, test_str, re.MULTILINE)

for matchNum, match in enumerate(matches, start=1):
    
    print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
    
    for groupNum in range(0, len(match.groups())):
        groupNum = groupNum + 1
        
        print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


In [None]:
h = re.compile(r"(?:spotify:playlist:)(\w{22})")
h.match('spotify:playlist:5T4F9s5cizwJiI8EMNB3vv').group()

In [None]:
user_id = "1237116029"
url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
laurels_playlists = getRequest(url,token)

In [None]:
user_id = "1224941770"
url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
snail_mails_playlists = getRequest(url,token)

In [None]:
user_id = "224sd2mespz4zevvqp4lyfxoq"
url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
phoebe_playlists = getRequest(url,token)

In [None]:
playlist = phoebe_playlists['items'][0]['uri']

In [None]:
# Example of how to use the function 

# Step 1. Grab the URI/id from Spotify (whether it's for a track, playlist, artist, etc.)
spotify_URI = '1EI0B66miJj5Fl408B7E9H'

# Step 2. Use an f string to create the url for the request. 
# See Spotify Console's endpoint for each of the data types you can pull from Spotify
# Sample documentation is here: https://developer.spotify.com/console/get-artist-albums/

url = f"https://api.spotify.com/v1/artists/{spotify_URI}/albums"

courtney_marie_andrews = getRequest(url,token)
courtney_marie_andrews

In [None]:
courtney_marie_andrews = albums.json()
print(courtney_marie_andrews['items'][0]['name'])
print(courtney_marie_andrews['items'][0]['release_date'])
print(courtney_marie_andrews['items'][0]['total_tracks'])

In [None]:
# https://api.spotify.com/v1/audio-analysis/{id}

# Example - pulling data for Michael Jackson's Billie Jean song
id = "5ChkMS8OtdzJeqyybCc9R5"
url = f"https://api.spotify.com/v1/audio-analysis/{id}"
billie_jean_song = getRequest(url,token)
billie_jean_song.keys()

In [None]:
billie_jean_song

In [None]:
# The curl is used to query things in git bash/terminal/command line
# curl -X "GET" "https://api.spotify.com/v1/artists//albums"
# -H "Accept: application/json" 
# -H "Content-Type: application/json" 
# -H "Authorization: Bearer "