### Imports and Global Variables

In [1]:
'''
Imports + global variables
'''

import os

import spotipy
import spotipy.util as util
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

import config

username = config.CLIENT_USERNAME
client_id = config.CLIENT_ID
client_secret = config.CLIENT_SECRET


### Logging in to Spotify with our client ID and client secret

In [2]:
def login_to_spotify(client_id, client_secret):
    '''
    This handles logging in to Spotify and returning a Spotipy object
    to be used to gather our data
    '''

    os.environ['SPOTIPY_CLIENT_ID'] = client_id
    os.environ['SPOTIPY_CLIENT_SECRET'] = client_secret
    os.environ['SPOTIPY_REDIRECT_URI'] = 'http://localhost:8888/callback/'

    scope = 'user-library-read playlist-read-private user-top-read'

    token = util.prompt_for_user_token(
        username,
        scope,
        client_id=client_id,
        client_secret=client_secret,
        redirect_uri='http://localhost:8888/callback/',
    )
    
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    
    return spotify


This is where the actual Spotipy object is created and all their playlists are downloaded.

In [3]:
SPOTIPY_OBJECT = login_to_spotify(client_id, client_secret)
playlists = SPOTIPY_OBJECT.user_playlists(username)


### Getting playlist data

In [4]:
def get_playlist_data(playlists, playlist_name):
    '''
    This function takes in a user's playlists and a playlist name, 
    and downloads all song information for the given playlist name.
    '''

    for playlist in playlists['items']:
        if (playlist['name'] == playlist_name):
            playlist_id = playlist['id']
    
    
    playlist_data = SPOTIPY_OBJECT.user_playlist(
        username,
        playlist_id,
    )
    
    playlist_tracks = playlist_data["tracks"]
    playlist_songs = playlist_tracks["items"] 
    
    while playlist_tracks['next']:

        playlist_tracks = SPOTIPY_OBJECT.next(playlist_tracks)
        for item in playlist_tracks['items']:
            playlist_songs.append(item)

    return playlist_songs


### Getting our final dataframe from playlist data

In [5]:
def get_dataframe(playlist_data):
    track_ids = []

    for track in playlist_data:
         track_ids.append(track['track']['id'])

    track_features_list = SPOTIPY_OBJECT.audio_features(tracks=track_ids)
    track_features = {}

    for i in range(len(track_ids)):
        track_features[track_ids[i]] = track_features_list[i]

    artist_ids = {}
    artist_genres = {}
    artist_names = {}
    artist_popularity = {}
    artist_followers = {}

    for track_id in track_ids:

        artist_id = SPOTIPY_OBJECT.track(track_id)['artists'][0]['id']
        artist_ids[track_id] = artist_id

        artist_info = SPOTIPY_OBJECT.artist(artist_id)

        artist_genres[artist_id] = ','.join(artist_info['genres'])
        artist_names[artist_id] = artist_info['name']
        artist_popularity[artist_id] = artist_info['popularity']
        artist_followers[artist_id] = artist_info['followers']['total']

    track_features_df = pd.DataFrame.from_dict(
        track_features,
        orient='index'
    ).reset_index().rename(columns={'id': 'track_id'}).drop(columns=['index'])
    [
        [
            'track_id',
            'instrumentalness',
            'energy',
            'tempo',
            'time_signature', 
            'valence',
            'duration_ms',
            'key',
            'liveness',
            'speechiness',
            'danceability',
            'loudness',
        ]
    ]

    final_df = pd.DataFrame(
        columns=[
            'track_id',
            'artist_id',
            'artist_name',
            'artist_popularity',
            'artist_followers',
            'artist_genres',
            'instrumentalness',
            'duration_ms',
            'time_signature',
            'acousticness',
            'speechiness',
            'energy',
            'loudness',
            'tempo',
            'key',
            'valence',
            'danceability',
            'liveness',
        ]
    )

    track_energy = track_features_df[['track_id', 'energy']].set_index('track_id').to_dict()['energy']
    track_tempo = track_features_df[['track_id', 'tempo']].set_index('track_id').to_dict()['tempo']
    track_time_signature = track_features_df[['track_id', 'time_signature']].set_index('track_id').to_dict()['time_signature']
    track_valence = track_features_df[['track_id', 'valence']].set_index('track_id').to_dict()['valence']
    track_duration_ms = track_features_df[['track_id', 'duration_ms']].set_index('track_id').to_dict()['duration_ms']
    track_key = track_features_df[['track_id', 'key']].set_index('track_id').to_dict()['key']
    track_liveness = track_features_df[['track_id', 'liveness']].set_index('track_id').to_dict()['liveness']
    track_speechiness = track_features_df[['track_id', 'speechiness']].set_index('track_id').to_dict()['speechiness']
    track_danceability = track_features_df[['track_id', 'danceability']].set_index('track_id').to_dict()['danceability']
    track_loudness = track_features_df[['track_id', 'loudness']].set_index('track_id').to_dict()['loudness']
    track_instrumentalness = track_features_df[['track_id', 'instrumentalness']].set_index('track_id').to_dict()['instrumentalness']
    track_acousticness = track_features_df[['track_id', 'acousticness']].set_index('track_id').to_dict()['acousticness']

    for i in range(len(track_ids)):

        track_id = track_ids[i]
        data = []
        artist_id = artist_ids[track_id]

        data.extend(
            (
                track_id,
                artist_id,
                artist_names[artist_id],
                artist_popularity[artist_id],
                artist_followers[artist_id],
                artist_genres[artist_id],
                track_instrumentalness[track_id],
                track_duration_ms[track_id],
                track_time_signature[track_id],
                track_acousticness[track_id],
                track_speechiness[track_id],
                track_energy[track_id],
                track_loudness[track_id],
                track_tempo[track_id],
                track_key[track_id],
                track_valence[track_id],
                track_danceability[track_id],
                track_liveness[track_id],
            )
        )

        final_df.loc[i] = data

    return final_df

This is where the actual dataframe is created.

In [6]:
amb_tech_data = get_playlist_data(playlists, 'amb-tech')
amb_tech_df = get_dataframe(amb_tech_data)

# likes_playlist_data = get_playlist_data(playlists, 'likes')
# dislikes_playlist_data = get_playlist_data(playlists, 'dislikes')

# likes_df = get_dataframe(likes_playlist_data)
# dislikes_df = get_dataframe(dislikes_playlist_data)

In [7]:
# likes_df

In [8]:
# dislikes_df

In [9]:
amb_tech_df

Unnamed: 0,track_id,artist_id,artist_name,artist_popularity,artist_followers,artist_genres,instrumentalness,duration_ms,time_signature,acousticness,speechiness,energy,loudness,tempo,key,valence,danceability,liveness
0,3yWl69NlEkqdGDUtLzDQl4,5I5iKldB6ajelADVOi4UKG,Dubiosity,15,344,,0.857,407412,4,0.228,0.0736,0.536,-13.778,125.036,1,0.0748,0.696,0.141
1,5zSGNCPUCQXsqzdYAggfUe,0o8ENM3dQlWJh4CkX0nGK7,Patrick Siech,12,644,swedish electronic,0.938,428750,4,0.0151,0.0561,0.736,-12.915,129.008,8,0.328,0.579,0.11
2,5RSLE68Xq2m6h7ggtVReSL,7yxi31szvlbwvKq9dYOmFI,Jon Hopkins,64,228244,"ambient,compositional ambient,electronic,fourt...",0.72,467816,4,0.00019,0.0416,0.942,-11.22,130.013,7,0.0333,0.595,0.318
3,50PtCIy3QV8JTfmXISe2I7,1EULJuDFWpZ9xg4YwtUGGt,Daniel Avery,47,45274,"chamber psych,electronic,float house,microhous...",0.0413,342853,4,0.414,0.0523,0.473,-10.708,119.988,9,0.737,0.776,0.0807
4,2EAwwq6Rq2KOE6sgth13ah,0o8ENM3dQlWJh4CkX0nGK7,Patrick Siech,12,644,swedish electronic,0.921,377209,4,0.0936,0.0511,0.646,-11.937,129.008,10,0.04,0.66,0.103
5,0WsROU8CJrMWBukK5IMs4y,0KqSULB80ft2H3aFg6kJmN,I Hate Models,40,19292,minimal dub,0.755,463700,4,0.0001,0.0602,0.916,-8.012,135.986,11,0.355,0.431,0.396
6,6JfhpeR95WeRE3xnXB1pPo,56HBXB2JoYhf04oMeko90l,Mark Broom,29,6278,"minimal dub,tech house,techno",0.925,307733,4,0.139,0.162,0.797,-8.467,131.998,9,0.205,0.884,0.109
7,1H5neWFEXm8YnlXGH3MhIW,3lN70MoiO9u6b95CsTeB1J,Pearson Sound,35,9607,"bass music,float house,future garage,outsider ...",0.936,391684,4,0.00219,0.064,0.878,-7.155,127.978,7,0.752,0.799,0.107
8,2Cq5ETe5Wit7ZMV7N1Ogxe,6WOuebFShfHzNcGkKkEYof,Kilner,0,133,,0.764,463800,4,0.0263,0.0892,0.64,-11.652,119.501,10,0.178,0.83,0.107
9,7gECnmpNOXBRinZCsTrDKm,73A3bLnfnz5BoQjb4gNCga,Bicep,59,96389,"deep house,electronic,float house,house",0.229,316602,4,0.0553,0.027,0.897,-5.96,123.017,1,0.399,0.589,0.105
