In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy import util
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient
import auth

In [None]:
mongo = MongoClient()

db = mongo['project-05']

db_user = db['Usernames']

In [None]:
client_id = auth.client_id
client_secret = auth.client_secret
username = auth.sp_username
scope = auth.scope
redirect_uri = auth.redirect_uri

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
    print('Token acquired')
else:
    print("Can't get token for", username)

In [None]:
tracks_df = pd.DataFrame(columns=['Artist', 'Song Title', 'Track ID'])

In [None]:
cursor = db_user.find(batch_size=50, limit=6000, no_cursor_timeout=True)

for document in cursor:
    for i in range(len(document['Tracks'])):
        try:
            song = document['Tracks'][i][0]
            artist = document['Tracks'][i][1]

            query = song + ' ' + artist

            search = sp.search(q=query, limit=15, type='track')
            
            if search['tracks']['items'] is None or len(search['tracks']['items']) == 0:
                pass
            else:
                song_id = search['tracks']['items'][0]['id']
                series = pd.Series([artist, song, song_id], index=tracks_df.columns)
                tracks_df = tracks_df.append(series, ignore_index=True)
                
        except IndexError:
            print('There was an error at {}'.format(i))
            print(document['Tracks'][i])
            
            
cursor.close()

In [None]:
tracks_df.dropna(subset=['Track ID'], inplace=True)
tracks_df.drop_duplicates(subset=['Track ID'], keep='first', inplace=True)

In [None]:
tracks_df.to_pickle('./track_dump.pkl')