## Importing packages

In [None]:
import csv
import pandas as pd

## Uploading data

### List of songs in my library requested from Spotify containing
* artist
* album
* track
* URI

In [None]:
df = pd.read_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/YourLibrary.csv')
print(df)

## Strip other characters to get only URI

In [None]:
URIs = df.uri
URIs

In [None]:
URIs = URIs.str.replace('spotify:track:','')
URIs

In [None]:
for URI in URIs:
    print(URI)

In [None]:
URIs.shape

In [None]:
mylibrary = pd.concat([df, URIs], join = 'outer', axis = 1)

In [None]:
mylibrary.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/mylibrary.csv', index = False)

## Codes for Spotify API

In [None]:
import requests
import datetime
from urllib.parse import urlencode
import base64

# we need these two to authenticate with the API
client_id = '' #input the client id in the quote
client_secret = '' #input the client secret in the quote
# we use these two to get the token (which will expire at some point)

In [None]:
class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    
    def __init__(self, client_id, client_secret, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret
      
    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        }
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            #return False
        # so if the request is valid, then..
        # now we have our token!
        data = r.json()
        # then we create these variables
        now = datetime.datetime.now() # will give you the time now
        access_token = data['access_token']
        expires_in = data['expires_in'] #seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
        
    def get_access_token(self):
        #auth_done = self.perform_auth()
        #if not auth_done:
            #raise Exception("Authentication failed")
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token()
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
    
    def get_resource(self, lookup_id, resource_type='albums', version='v1'):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        #if r.status_code not in range(200, 299):
         #   return {}
        return r.json()
    
    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')
    
    #not that useful
    def get_audio_analysis(self, _id):
        return self.get_resource(_id, resource_type='audio-analysis')
    
    #get feature for just 1 song
    def get_audio_features(self, _id):
        # to make it a loop so that it can do many songs
        #mylist = []
        #for id_ in my_list:
        return self.get_resource(_id, resource_type='audio-features')
    
    #get track (and eventually.. genre)
    def get_tracks(self, _id):
        return self.get_resource(_id, resource_type='tracks')

    def base_search(self, query_params):
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        print(lookup_url)
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range (200, 299):
            return {}
        return r.json()
    
        # we will adjust the queries (more robust queries than just strings eg. dictionary)
    
    def search(self, query=None, operator=None, operator_query=None, search_type='artist'):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower == "or" or operator.lower == "not":
                operator = operator.upper()
                if ininstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        print(query_params)
        return self.base_search(query_params)

In [None]:
spotify = SpotifyAPI(client_id, client_secret)

## Get tracks

In [None]:
# using 'track uri' to get info about that track, no genre though :(
all_URIs = [] # this must be outside the for loop!! be careful
for URI in URIs:
    all_URIs.append(spotify.get_tracks(URI))

In [None]:
all_URIs

In [None]:
# change from dict to dataframe
all_URIs = pd.DataFrame.from_dict(all_URIs)
all_URIs.shape

In [None]:
# turn into csv
all_URIs.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/track_data.csv', index = False)

## Get artist ID & match with track ID

In [None]:
# we have columns that are again dictionary: 'albums' & 'artists'
artists = all_URIs.artists
artists.shape
artists

In [None]:
artisturl_stripped = []
artistname_stripped = []
for artist in artists:
    artist = str(artist).strip("[{'external_urls': {'spotify': '")
    artisturl = artist[32:54]
    artisturl_stripped.append(artisturl)

In [None]:
artisturl_stripped


### artist_id column

In [None]:
# then change 'artisturl_stripped' to dataframe
df_artisturl_stripped = pd.DataFrame(artisturl_stripped, columns = ['artist_id'])

### track_id column

In [None]:
# then get just track id, turn into dataframe
df_track_id = pd.DataFrame(all_URIs.id)

## concatinate column 'artist_id' & 'track_id'

In [None]:
track_artists = pd.concat([df_artisturl_stripped, df_track_id], join = 'outer', axis = 1)
track_artists

### Remove duplicated artist to get a list of unique artists

In [None]:
print ("The number of artists before removing duplicates : ")
print(len(track_artists))

artist_unique = []
for i in track_artists.artist_id:
    if i not in artist_unique:
        artist_unique.append(i)
        
# printing list after removal 
print ("The number of artists after removing duplicates : ")
print(len(artist_unique))


### List of unique artists in the LIBRARY :)

In [None]:
artist_unique = pd.DataFrame(artist_unique, columns = ['artist_id'])
artist_unique

## To get list of unique artists in STREAMING HISTORY :)

### Remove duplicated artists to get a list of unique artists

In [None]:
#upload total_streaming_history
stream = pd.read_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/total_streaming_history.csv')
print(stream)

In [None]:
print ("The number of artists before removing duplicates : ")
print(len(stream))

streamartist_unique = []
for i in stream.artist_name:
    if i not in streamartist_unique:
        streamartist_unique.append(i)
        
# printing list after removal 
print ("The number of artists after removing duplicates : ")
print(len(streamartist_unique))

In [None]:
streamartist_unique = pd.DataFrame(streamartist_unique, columns = ['streamartist_name'])
streamartist_unique

In [None]:
streamartist_unique_list = streamartist_unique['streamartist_name'].tolist()

In [None]:
streamartist_unique_list

In [None]:
#to slice list
def splitartist(list_a, size):
    for i in range(0, len(streamartist_unique_list), size):
        yield list_a[i:i + size]
size = 20
print(list(splitartist(streamartist_unique_list, size)))

###### In case wanting to get other information such as artist_id, genres, for all the streamed tracks (not just the tracks in the library), then have to use 'search' API (20 items at a time)

In [None]:
#now we only have artist_name -> we need other info too (artist_id, genres, etc) -> have to use 'search' API
## HOWEVER, we can only search only 20 items a time
info_streamartist =[]
for streamartist in streamartist_unique:
    info_streamartist.append(spotify.search(query=streamartist, search_type="artist"))
    #info_streamartist.append(spotify.search({"artist": "Avril Lavigne"}, search_type="artist"))
info_streamartist
# doesn't work yet

### Export track_artists to CSV (for 2,716 songs in mylibrary)

In [None]:
# to get repeated artists for unique tracks
track_artists.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/track_data.csv', index = False)


## Get Genre for each artist

In [None]:
for artisturl in artist_unique:
    print(artisturl)

### Get genres of artists in LIBRARY :)

In [None]:
all_genres = [] # this must be outside the for loop!! be careful
for artisturl in artist_unique:
    all_genres.append(spotify.get_artist(artisturl))
all_genres

In [None]:
all_genres = pd.DataFrame(all_genres)
all_genres.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/all_genres.csv', index = False)

In [None]:
all_genres.genres

In [None]:
# change to dataframe
artist_genres = pd.DataFrame(all_genres.genres)
artist_genres

In [None]:
#split genres into separate columns
genre_split = artist_genres.genres.apply(pd.Series).add_prefix('genre_')

### Genre column

In [None]:
genre_split

### Unique artist column

In [None]:
artist_unique

## concatinate column 'genre' & 'artist_id'

In [None]:
genres_uniqueartists = pd.concat([artist_unique, genre_split], join = 'outer', axis = 1)
genres_uniqueartists

In [None]:
# unpivot genres for each artist (one artist has many genres)
genres_uniqueartists_unpivot = pd.melt(genres_uniqueartists, id_vars='artist_id', value_vars=['genre_0', 'genre_1',
                                'genre_2', 'genre_3', 'genre_4', 'genre_5', 'genre_6', 'genre_7',
                                'genre_8', 'genre_9', 'genre_10', 'genre_11'])

print(genres_uniqueartists_unpivot)

In [None]:
genres_uniqueartists_unpivot.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/genre_unpivot.csv', index = False)

## Get audio features

In [None]:
track_ids = all_URIs.id

In [None]:
for track_id in track_ids:
    print(track_id)

In [None]:
all_features = []
for track_id in track_ids: 
    all_features.append(spotify.get_audio_features(track_id))

In [None]:
all_features = pd.DataFrame(all_features)
all_features.to_csv(r'/Users/jass/Documents/projects/Spotify data/MyData/all_features.csv', index = False)