In [1]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import json
import pandas as pd

## Spotipy setup

- Need client id and client secret which you can get from [here](https://developer.spotify.com/dashboard/login).
- Also need to setup your redirectURI. I just use Google. It doesn't really matter. You can do this by clicking on the green 'Edit Settings' button. Make sure to add and save.
- Username is your user id which you can find by (at least on the desktop app) clicking on your name in the top right to get to your profile, clicking the '...' under your name, hover over 'Share' and navigate to 'Copy Profile Link'. Click it. Paste somewhere to show the link which should be something like https://open.spotify.com/user/** where ** is your user id.

In [2]:
cId = "xxx"
cSecret = "xxx"
redirectURI = "https://google.com/"
username = "xxx"

client_credentials_manager = SpotifyClientCredentials(client_id=cId, client_secret=cSecret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

scope = 'user-library-read user-read-currently-playing user-read-playback-state user-read-recently-played'
token = util.prompt_for_user_token(username, scope, cId, cSecret, redirectURI)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

## Importing some saved tracks from spotify to a csv file

This stores the user's saved tracks into a list named track_list and the album info into album_list.

The "amount" is for the number of songs we want. I'm getting 100 for now. I have well over 1000 songs saved and it worked fine using that amount. I don't know how this would work if the user does not have more than the amount specified. 

In [3]:
track_list = []
artist_list = []
album_list = []

song_artist_list = []
song_album_list = []
album_artist_list = []

limit = 50
amount = 100

for i in range(0, amount, limit):
    
    saved_tracks = sp.current_user_saved_tracks(limit=limit, offset=i)
    
    for t in saved_tracks['items']:
        track = t['track']
        track_id = track['id']
        track_name = track['name']

        for t_artist in track['artists']:
            artist_id = t_artist['id']
            
            artist_dict = {
                'artist_id': artist_id,
                'artist_name': t_artist['name']
            }
            artist_list.append(artist_dict)

            song_artist_dict = {
                'track_id': track_id,
                'artist_id': artist_id
            }
            song_artist_list.append(song_artist_dict)
        
        track_album = track['album']
        album_id = track_album['id']
        album_name = track_album['name']
        album_img = track_album['images'][0]['url']
        album_type = track_album['album_type']
        release_date = track_album['release_date']
        total_tracks = track_album['total_tracks']

        for a in track_album['artists']:
            album_artist_dict = {
                'album_id': album_id,
                'artist_id': a['id']
            }
            album_artist_list.append(album_artist_dict)
        
        song_album_dict = {
            'track_id': track_id,
            'album_id': album_id
        }
        song_album_list.append(song_album_dict)
        
        track_dict = {
            'track_id': track_id,
            'track_name': track_name,
        }
        
        album_dict = {
            'album_id': album_id,
            'album_name': album_name,
            'album_img': album_img,
            'album_type': album_type,
            'release_date': release_date,
            'total_tracks': total_tracks
        }
        
        track_list.append(track_dict)
        album_list.append(album_dict)

Converting the track list into a dataframe

In [4]:
track_df = pd.DataFrame(track_list)
track_df.set_index('track_id', inplace=True) # change index to be the track id
track_df.head() # preview first 5 rows of data

Unnamed: 0_level_0,track_name
track_id,Unnamed: 1_level_1
2RSOzvKUnfDDrb2nQPfpQU,Barbie Girl
3DZQ6mzUkAdHqZWzqxBKIK,Loveeeeeee Song
68mxSfFGEVRJFoq2UCaVsU,The Purge
2Y6MLh2iXf6PP2GzHPlLzb,Different Skies
6WmIyn2fx1PKQ0XDpYj4VR,Love It If We Made It


In [5]:
artist_df = pd.DataFrame(artist_list)
artist_df.set_index('artist_id', inplace=True)
artist_df.drop_duplicates(keep='first', inplace=True)
# artist_df.head()

In [6]:
album_df = pd.DataFrame(album_list)
album_df.set_index('album_id', inplace=True)
album_df.drop_duplicates(keep='first', inplace=True) # drop duplicate albums
# album_df.head()

In [7]:
# the relational tables
song_artist_df = pd.DataFrame(song_artist_list)
song_album_df = pd.DataFrame(song_album_list)
album_artist_df = pd.DataFrame(album_artist_list)

# song_artist_df.head()
# song_album_df.head()
# album_artist_df.head()

Checking dataframe shapes

In [8]:
# expect 100 rows of data
# 3 columns
print(track_df.shape)

# expect less than or equal to 100 rows
print(artist_df.shape)

# expect less than or equal to 100 rows
print(album_df.shape)

# expect greater than or equal to 100 rows because the same song can have multiple artists
print(song_artist_df.shape)

# expect 100 rows
print(song_album_df.shape)

# expect greater than or equal to 100 rows because the same album can have multiple artists
print(album_artist_df.shape)

(100, 1)
(71, 1)
(66, 5)
(112, 2)
(100, 2)
(105, 2)


Saving the dataframes as csv files

In [9]:
track_df.to_csv('songs.csv')
artist_df.to_csv('artists.csv')
album_df.to_csv('albums.csv')

song_artist_df.to_csv('song-artist.csv')
song_album_df.to_csv('song-album.csv')
album_artist_df.to_csv('album-artist.csv')

Check out the csv files!