# SpotifySights

In [201]:
# Libraries needed for the project
import spotipy                                              # type: ignore
import requests
import pandas as pd
import plotly.express as px
from spotipy.oauth2 import SpotifyOAuth

Since this is private information, remember to fill your credentials in the `credentials.txt` and then run the code. Also, remember that these credentials are from the [developer spotify dashboard](https://developer.spotify.com/dashboard).

In [2]:
# Open the file in read mode
with open('credentials.txt', 'r') as file:
    # Read all lines from the file
    lines = file.readlines()

# Initialize variables to store credentials
client_id = None
client_secret = None

# Iterate over the lines to find the credentials
for line in lines:
    # Split the line into parts using the equal sign as a separator
    parts = line.split('=')

    # Check if the line has at least two parts (variable name and value)
    if len(parts) == 2:
        # Get the variable name and value by removing whitespace
        variable_name = parts[0].strip()
        value = parts[1].strip()

        # Compare the variable name and store the corresponding value
        if variable_name == 'client_id':
            client_id = value.strip('"')
        elif variable_name == 'client_secret':
            client_secret = value.strip('"')

In [3]:
# Function to get the Spotify API token
def spotify(client_id:str, client_secret:str, scope:str=None):
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id= client_id,
                                                client_secret=client_secret,
                                                redirect_uri="http://localhost:8888/callback",
                                                scope=scope))
    return sp

Check if the code below runs well, sometimes there might be issues with token, and might be a little too complicated to work with this API. But don't worry, you can get all the help you need either in [stackoverflow](https://stackoverflow.com/questions/tagged/spotify) or [medium](https://medium.com/search?q=spotify+api+python) (this one is the best in my opinion).

In [4]:
sp = spotify(client_id, client_secret)
sp.me()

{'display_name': 'Angel Panda',
 'external_urls': {'spotify': 'https://open.spotify.com/user/12165545125'},
 'href': 'https://api.spotify.com/v1/users/12165545125',
 'id': '12165545125',
 'images': [{'url': 'https://scontent-ord5-1.xx.fbcdn.net/v/t39.30808-1/367459657_2338569392993162_3452082202277657577_n.jpg?stp=c6.0.50.50a_cp0_dst-jpg_p50x50&_nc_cat=106&ccb=1-7&_nc_sid=5f2048&_nc_ohc=wgMcNbiqeQQAX97Rebp&_nc_ht=scontent-ord5-1.xx&edm=AP4hL3IEAAAA&oh=00_AfDrjSXVQB0RiM8gfJX5cwiQ7FaIyEW8E4t5BiEWI4vi2Q&oe=655A9D7F',
   'height': 64,
   'width': 64},
  {'url': 'https://scontent-ord5-1.xx.fbcdn.net/v/t39.30808-1/367459657_2338569392993162_3452082202277657577_n.jpg?stp=c36.0.320.320a_dst-jpg_p320x320&_nc_cat=106&ccb=1-7&_nc_sid=5f2048&_nc_ohc=wgMcNbiqeQQAX97Rebp&_nc_ht=scontent-ord5-1.xx&edm=AP4hL3IEAAAA&oh=00_AfBTdMYcF2Dl4aA1NVGzWqFS1wgjHQrZz5GB-DGEQuwanA&oe=655A9D7F',
   'height': 300,
   'width': 300}],
 'type': 'user',
 'uri': 'spotify:user:12165545125',
 'followers': {'href': None, 'to

In [99]:
# This scope shows you the last 20 tracks you've saved
scope = "user-library-read"

sp = spotify(client_id, client_secret, scope)

results = sp.current_user_saved_tracks()

# Extract track information into a DataFrame
tracks_data = []
for idx, item in enumerate(results['items']):
    track = item['track']
    track_data = {
        'Artist': track['artists'][0]['name'],
        'Track': track['name']
    }
    tracks_data.append(track_data)

# Create DataFrame
tracks = pd.DataFrame(tracks_data)

In [238]:
fig = px.treemap(tracks, path=[px.Constant('My Saved Tracks'),'Artist','Track'])
fig.update_traces(root_color="#ffffff",
                  insidetextfont=dict(size=14),
                  textposition='middle center')
fig.update_layout(colorway=['#f6b520', '#f0cc80', '#0c8c4c', '#3f8c44', '#eae9e5', '#424236'],
                  margin=dict(t=0, l=0, r=0, b=0))
fig.show()

In [176]:
# This scope shows you the top 50 artists you've listened depending on the time range
scope = 'user-top-read'
ranges = ['short_term', 'medium_term', 'long_term']

sp = spotify(client_id, client_secret, scope)

# Initialize an empty list to store top artists data
top_artists_data = []

# Retrieve and display top artists for different time ranges
for sp_range in ['short_term', 'medium_term', 'long_term']:

    # Retrieve top artists for the specified time range
    results = sp.current_user_top_artists(time_range=sp_range, limit=50)

    # Extract and append artist information to the list
    for i, item in enumerate(results['items']):
        artist_data = {
            'Time_Range': sp_range,
            'Artist': item['name'],
            'Genres': item['genres']
        }
        top_artists_data.append(artist_data)

# Create a DataFrame from the list of top artists data
top_artists = pd.DataFrame(top_artists_data)

In [234]:
# Display the top artists DataFrame
fig = px.treemap(top_artists.groupby('Time_Range').head(10), path=[px.Constant('My Top Artists'),'Time_Range', 'Artist'])
fig.update_traces(root_color="#BD8A3E",
                  insidetextfont=dict(size=14),
                  textposition='middle center')
fig.update_layout(colorway=['#f6b520', '#f0cc80', '#0c8c4c', '#3f8c44', '#eae9e5', '#424236'],
                  margin=dict(t=0, l=0, r=0, b=0))
fig.show()

In [241]:
# Display the top genres DataFrame
Genres = top_artists['Genres'].explode().value_counts().to_frame().reset_index()
fig = px.bar(Genres, x='index', y='Genres')
fig.update_layout(title="Top Genres")
fig.show()

In [20]:
# This scope shows you the current user's top tracks
scope = 'user-top-read'
ranges = ['short_term', 'medium_term', 'long_term']

sp = spotify(client_id, client_secret, scope)

# Initialize an empty list to store top tracks data
top_tracks_data = []

# Retrieve and display top tracks for different time ranges
for sp_range in ['short_term', 'medium_term', 'long_term']:

    # Retrieve top tracks for the specified time range
    results = sp.current_user_top_tracks(time_range=sp_range, limit=50)

    # Extract and append track information to the list
    for i, item in enumerate(results['items']):
        track_data = {
            'Time Range': sp_range,
            'Track': item['name'],
            'Artist': item['artists'][0]['name']
        }
        top_tracks_data.append(track_data)

# Create a DataFrame from the list of top tracks data
top_tracks = pd.DataFrame(top_tracks_data)


In [235]:
# Display the top tracks DataFrame
fig = px.treemap(top_tracks.groupby('Time Range').head(10), path=[px.Constant('My Top Tracks'), 'Time Range', 'Artist', 'Track'])
fig.update_traces(root_color="#BD8A3E",
                  insidetextfont=dict(size=14),
                  textposition='middle center')
fig.update_layout(colorway=['#f6b520', '#f0cc80', '#0c8c4c', '#3f8c44', '#eae9e5', '#424236'],
                  margin=dict(t=0, l=0, r=0, b=0))
fig.show()

In [202]:
# This function takes a playlist id and returns a dataframe with the tracks and their audio features
def call_playlist(playlist_id, cid, secret):
    # generating the access token for the spotify api
    # authentication base url
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_response = requests.post(auth_url, {'grant_type': 'client_credentials',
                                             'client_id': cid,
                                             'client_secret': secret})
    auth_response_data = auth_response.json()
    # save the access token from the response data
    access_token = auth_response_data['access_token']
    
    # defining headers used for authenticating all api calls
    headers = {'Authorization': 'Bearer {token}'.format(token = access_token)}
    
    # setting the base url for the api call
    base_url = 'https://api.spotify.com/v1/'
    
    # calling the api to get the tracks on the playlist
    playlist_request = requests.get(base_url + 'playlists/' + playlist_id + '/tracks', headers = headers)
    playlist = playlist_request.json()
    # resetting playlist variable to parse out only the items (no additional playlist metadata)
    playlist = playlist['items']
    
    # creating an empty list to append the primary data for each track to
    tracks = []
    
    # iterating through each item and pulling out primary data
    for track in playlist:
        track_name = track['track']['name']
        track_id = track['track']['id']
        # parsing out the dictionary of song artists, iterating through each dict item, and appending each artist to an empty list
        # this will ensure all artists associated with the song are provided the proper credit
        artists = track['track']['artists']
        track_artists = []
        for artist in artists:
            artist_name = artist['name']
            track_artists.append(artist_name)
        # pulling the rest of the data items that are available in this get request
        popularity = track['track']['popularity']
        explicit = track['track']['explicit']
        # appending the defined variables to the empty tracks list - this will create a list of lists to turn into a dataframe
        tracks.append([track_name, track_id, track_artists, popularity, explicit])
    
    # creating the tracks dataframe
    tracks_columns = ['track_name', 'track_id', 'track_artists', 'popularity', 'explicit']
    tracks_df = pd.DataFrame(tracks, columns = tracks_columns)
    
    # creating a list of track ids to retrieve features for using the spotify api
    all_track_ids = tracks_df['track_id'].to_list()
    track_ids = []

    for t_id in all_track_ids:
        if t_id != None:
            track_ids.append(t_id)
    
    # creating an empty list to append the audio features for each song to
    features = []
    
    # iterating through each track id and calling the api to pull the audio features for the track
    for t_id in track_ids:
        features_request = requests.get(base_url + 'audio-features/' + t_id, headers = headers)
        audio_features = features_request.json()
        features.append(audio_features)
    
    # creating a df and appending select data values for each set of audio features
    features_columns = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
    features_df = pd.DataFrame(features, columns = features_columns)
    
    # joining the two dfs on the track id
    playlist_df = tracks_df.merge(features_df, how = 'outer', left_on = 'track_id', right_on = 'id')
    
    # return playlist for viewing
    return playlist_df

In [203]:
# Example of the function
playlist_id = '05wi7cd3ZWrVnxhfHk8SlF'

playlist = call_playlist(playlist_id, client_id, client_secret)
playlist.head(5)

Unnamed: 0,track_name,track_id,track_artists,popularity,explicit,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Un Poquito,1nU4sOQqqhFUqS0TiRlD0V,"[Alejo, Young Miko]",71,True,1nU4sOQqqhFUqS0TiRlD0V,0.823,0.758,11,-5.027,0,0.0689,0.203,0.000102,0.0938,0.47,92.877,166050,4
1,Te Conoci Perriando,68UxtmtQa5wiUa0sbpPvgd,[Mora],72,True,68UxtmtQa5wiUa0sbpPvgd,0.478,0.701,11,-5.009,0,0.232,0.566,0.0,0.156,0.255,87.105,190107,4
2,FINA,3nNmRE0DxHC6ZaKkrpUumS,"[Bad Bunny, Young Miko]",93,True,3nNmRE0DxHC6ZaKkrpUumS,0.847,0.724,1,-5.41,1,0.298,0.0869,1e-06,0.118,0.396,94.601,216328,4
3,Suelta,5LitdykQszEnuzIrtGhwPs,"[Jay Wheeler, Mora]",71,False,5LitdykQszEnuzIrtGhwPs,0.859,0.553,9,-5.726,0,0.0456,0.628,0.00011,0.109,0.338,122.048,243934,4
4,Desenfocao',2CEf2gU5ZJamLXa3NrgAvA,[Rauw Alejandro],73,False,2CEf2gU5ZJamLXa3NrgAvA,0.706,0.649,8,-3.911,1,0.0353,0.152,0.0,0.161,0.271,139.976,170536,4


> I added this last function because I think this would be cool to analyze a playlist and try to see how effective it can be in a party. If we use a model and analyze its features, then we could be able to determine how this would guarantee that the music will have a positive effect in the people.

### References:
- spotipy-dev/spotipy: A light weight Python library for the Spotify Web API. (2023, October 31). GitHub. https://github.com/spotipy-dev/spotipy.
- Freidenburg, M. (2023, October 21). A Data Art Project: the Spotify July Top 50 | Medium. Medium; Medium. https://medium.com/@miafreidenburg/spotify-top-50-689a37c90b7c
