In [7]:
!pip install requests



In [8]:
import requests
import datetime
from urllib.parse import urlencode
import pprint
import base64
import sys
import spotipy
import spotipy.util as util

In [9]:
# add your details
client_id = ''
client_secret = '' 
redirect_uri = ''
#these are vab's developer account ids 
#plz dont hack his account 
#:)


In [10]:
#client for spotify api for getting general stuff such 
#as top tracks top artists etc and spotipy api for getting specific song data
class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    scope ='user-library-read'
    
    def __init__(self, client_id, client_secret, redirect_uri, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri

    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        } 
    
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            # return False
        data = r.json()
        now = datetime.datetime.now()
        access_token = data['access_token']
        expires_in = data['expires_in'] # seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
    
    
    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token() 
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
        
        
    def get_resource(self, lookup_id, resource_type='albums', version='v1', extras = ''):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}{extras}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
    #personal data
    def spotipy_auth(self):
        if len(sys.argv) > 1:
            username = sys.argv[1]
        else:
            print("Usage: %s username" % (sys.argv[0],))
            sys.exit()

        spotipy_token = util.prompt_for_user_token(username,
                                   scope=self.scope,
                                   client_id=self.client_id,
                                   client_secret= self.client_secret,
                                   redirect_uri=self.redirect_uri)
        return spotipy_token
    
    def get_my_tracks(self):
        spotipy_token = self.spotipy_auth()
        track_list = []
        if spotipy_token:
            sp = spotipy.Spotify(auth=spotipy_token)
            i = 0
            while i < 1000000:
                results = sp.current_user_saved_tracks(1, i)
                if len(results['items']) == 0:
                    break
                else:
                    for item in results['items']:
                        track = item['track']
                        track_list.append(track['name'] + ' - ' + track['artists'][0]['name'])
                i+=1
        else:
            print("Can't get token for", username)
        return track_list
    
    def top_tracks_current(self):
        spotipy_token = self.spotipy_auth()
        track_list = []
        if spotipy_token:
            sp = spotipy.Spotify(auth=spotipy_token)
            sp.trace = False
            results = sp.current_user_top_tracks(time_range='short_term', limit=50)
            for i, item in enumerate(results['items']):
                track_list.append(item['name'] + '-' + item['artists'][0]['name'])
        else:
            print("Can't get token for", username)
        return track_list
    
    
    def get_my_top_artists(self):
        spotipy_token = self.spotipy_auth()
        artists = []
        if spotipy_token:
            sp = spotipy.Spotify(auth=spotipy_token)
            sp.trace = False
            ranges = ['short_term', 'medium_term', 'long_term']
            for r in ranges:
                results = sp.current_user_top_artists(time_range=r, limit=10)
                for i, item in enumerate(results['items']):
                    artists.append(item['name'])

        else:
            print("Can't get token for", username)
        return set(artists)
    
    #get functions
    #albums
    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_album_tracks(self, _id):
        return self.get_resource(_id, resource_type='albums', extras = '/tracks')
    
    #artists
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')
    
    def get_artist_albums(self, _id):
        return self.get_resource(_id, resource_type='artists', extras='/albums')
    
    def get_artist_top_tracks(self, _id):
        endpoint = f"https://api.spotify.com/v1/artists/{_id}/top-tracks?country=US"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        #if r.status_code not in range(200, 299):
            #return {}
        return r.json()
    
    def get_similar_artist(self, _id):
        return self.get_resource(_id, resource_type='artists', extras = '/related-artists')
    
    #tracks
    def get_audio_data(self, _id):
        return self.get_resource(_id, resource_type='audio-features')
    
    def get_audio_analysis(self, _id):
        return self.get_resource(_id, resource_type='audio-analysis')
    
    
    def new_releases(self):
        endpoint = "https://api.spotify.com/v1/browse/new-releases"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
   
    
    #search
    def base_search(self, query_params): # type
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range(200, 299):  
            return {}
        return r.json()
    
    def search(self, query = None, operator = None, operator_query = None, search_type='artist'):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == 'or' or operator.lower() == 'not':
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        return self.base_search(query_params)

In [11]:
spotify = SpotifyAPI(client_id, client_secret, redirect_uri)
#instance of client

In [12]:
my_top_artists = list(spotify.get_my_top_artists())
my_top_artists

['iann dior',
 'Ed Sheeran',
 'Lil Uzi Vert',
 'Khalid',
 'Eminem',
 'Marshmello',
 'XXXTENTACION',
 'Juice WRLD',
 'A Boogie Wit da Hoodie',
 'Dua Lipa',
 'Halsey',
 'The Weeknd',
 'Loud Luxury',
 'YNW Melly',
 'Drake',
 'NF']

In [13]:
my_current_top_50 = spotify.top_tracks_current()
my_current_top_50

['Relationship (feat. Future)-Young Thug',
 'Eenie Meenie - Radio Version-Sean Kingston',
 'ily (i love you baby) (feat. Emilee)-Surf Mesa',
 'Foreplay-Jalen Santoy',
 'Without Me (with Juice WRLD)-Halsey',
 'Wassup (feat. Future)-Lil Uzi Vert',
 'Drinking Problem (feat. 27CLUB)-Arizona Zervas',
 'Sunny Days-Edward Maya',
 'Trap - Rompasso Remix-SAINt JHN',
 'These Days (feat. Jess Glynne, Macklemore & Dan Caplen)-Rudimental',
 'Feeling-Juice WRLD',
 'High On Life (feat. Bonn)-Martin Garrix',
 'Love To Go-Lost Frequencies',
 'Too Good-Drake',
 'bad vibes forever-XXXTENTACION',
 'Tinted Eyes (feat. blackbear & 24kGoldn)-DVBBS',
 'Paris-Cevith',
 'King Of My City-A Boogie Wit da Hoodie',
 'The Hills-The Weeknd',
 'R.O.D.-A Boogie Wit da Hoodie',
 'Might Not Give Up (feat. Young Thug)-A Boogie Wit da Hoodie',
 'Reply (feat. Lil Uzi Vert)-A Boogie Wit da Hoodie',
 'Skinny Love-Birdy',
 'Blame (feat. John Newman)-Calvin Harris',
 'Cold Feet-Loud Luxury',
 'Godzilla-Aftermath',
 'Mr. Saxobea

In [14]:
my_library = spotify.get_my_tracks()
my_library

['Shameless - iann dior',
 'Cinderella Story - A Boogie Wit da Hoodie',
 'Gummy - Loud Luxury',
 'Relationship (feat. Future) - Young Thug',
 'Close Friends (Lil Baby & Gunna) - Lil Baby',
 'GO (feat. Juice WRLD) - The Kid LAROI',
 'Addison Rae - The Kid LAROI',
 'Trap - Rompasso Remix - SAINt JHN',
 'VALENTINO - Imanbek Remix - 24kGoldn',
 'The Bigger Picture - Lil Baby',
 'Lalala - Ilkan Gunuc Remix - Y2K',
 'Otherside Of America - Meek Mill',
 'Alien - Dennis Lloyd',
 'Tomorrow (feat. 433) - Tiësto',
 'GO (feat. Juice WRLD) - The Kid LAROI',
 'You & I (feat. Tyler Sjöström & Bertrand Lacoste) - Sterkøl',
 'Paris - Cevith',
 'Godzilla - Aftermath',
 'Keeps Me High (WHOCARES Remix) - HADES',
 'I Love My Friends (And My Friends Love Me) - Steve Aoki',
 'Tinted Eyes (feat. blackbear & 24kGoldn) - DVBBS',
 'Kiss Me Hard - DJ Antoine vs Mad Mark 2k20 Mix - DJ Antoine',
 'Best Friend (feat. Tory Lanez) - A Boogie Wit da Hoodie',
 "F**kin' Problems (feat. Drake, 2 Chainz & Kendrick Lamar) -

The following cells are for data cleaning and data collecting so that it is easier to put into MongoDB

In [15]:
#takes in track and artist from my_library to put it in search method
def make_query(s):
    delimeter = ' - '
    delimeter_index = s.find(delimeter)
    track = s[0 : delimeter_index]
    artist = s[delimeter_index :]
    query_dict = {"track" : track, "artist" : artist}
    return query_dict

In [16]:

#function to take in query result and cleanly obtain basic ids and names
def basic_song_data(query):
    try:
        artist_ids.append(query['tracks']['items'][0]['artists'][0]['id'])
        artist_names.append(query['tracks']['items'][0]['artists'][0]['name'])
        song_ids.append(query['tracks']['items'][0]['id'])
        song_names.append(query['tracks']['items'][0]['name'])
        song_popularities.append(query['tracks']['items'][0]['popularity'])
        album_ids.append(query['tracks']['items'][0]['album']['id'])
        album_names.append(query['tracks']['items'][0]['album']['name'])
    except IndexError:
        artist_ids.append(None)
        artist_names.append(None)
        song_ids.append(None)
        song_names.append(None)
        song_popularities.append(None)
        album_ids.append(None)
        album_names.append(None)


In [18]:
#loops through my entire library, retrieves names and ids
artist_ids = []
artist_names = []
song_ids = []
song_names = []
song_popularities = []
album_ids = []
album_names = []

def clean_library():
    for song in my_library:
        search_query = make_query(song)
        resultset = spotify.search(search_query, search_type = "track")
        basic_song_data(resultset)
            
clean_library()

In [20]:
#gets rid of None values
while artist_ids.count(None) > 0:
    artist_ids.remove(None)
    artist_names.remove(None)
    song_ids.remove(None)
    song_names.remove(None)
    song_popularities.remove(None)
    album_ids.remove(None)
    album_names.remove(None)

In [21]:
#new library without queries that were faulting
new_library = []
for i in range (0, len(artist_names)):
    new_track = song_names[i] + ' - ' + artist_names[i]
    new_library.append(new_track)

In [22]:
#songs and artists
song_artist_album = {}
#maps track name and artist to all the things retrived above fro easy entry to document store
for i in range(0, len(song_ids)):
    lil_dicty = {}
    lil_dicty['artist_id'] = artist_ids[i]
    lil_dicty['artist_name'] = artist_names[i]
    lil_dicty['song_id'] = song_ids[i]
    lil_dicty['song_name'] = song_names[i]
    lil_dicty['song_popularity'] = song_popularities[i]
    lil_dicty['album_id'] = album_ids[i]
    lil_dicty['album_name'] = album_names[i]
    song_artist_album[new_library[i]] = lil_dicty

In [23]:
#get audio features
songdata_dict = {}

for i in range(0, len(song_ids)):
    songdata_dict[new_library[i]] = spotify.get_audio_data(song_ids[i])
    


In [24]:
songdata_dict

{'Shameless - iann dior': {'acousticness': 0.186,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4VZFXDtx8NnXWeJHOMzEan',
  'danceability': 0.709,
  'duration_ms': 106012,
  'energy': 0.76,
  'id': '4VZFXDtx8NnXWeJHOMzEan',
  'instrumentalness': 0,
  'key': 8,
  'liveness': 0.164,
  'loudness': -7.583,
  'mode': 1,
  'speechiness': 0.148,
  'tempo': 81.495,
  'time_signature': 4,
  'track_href': 'https://api.spotify.com/v1/tracks/4VZFXDtx8NnXWeJHOMzEan',
  'type': 'audio_features',
  'uri': 'spotify:track:4VZFXDtx8NnXWeJHOMzEan',
  'valence': 0.709},
 'Cinderella Story - A Boogie Wit da Hoodie': {'acousticness': 0.272,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/18AIesWCXvIbXu8RqQw1E1',
  'danceability': 0.639,
  'duration_ms': 185455,
  'energy': 0.571,
  'id': '18AIesWCXvIbXu8RqQw1E1',
  'instrumentalness': 0,
  'key': 4,
  'liveness': 0.144,
  'loudness': -7.332,
  'mode': 1,
  'speechiness': 0.511,
  'tempo': 88.047,
  'time_signature': 4,
  'track_hr

now that we have data on my library, we need to get data for potential recommendations
start with related artists to my top artists

    

In [25]:
def get_artist_id(name):
    try:
        fid = spotify.search(name)['artists']['items'][0]['id']
    except IndexError:
        fid = ''
    return fid

In [26]:
def clean_related_artists(fid):
    related_artists = []
    try:
        
        art_rs = spotify.get_similar_artist(fid)
        for i in range(0, 5):
            related_artists.append(art_rs['artists'][i]['name'])
    except KeyError:
        related_artists.append('')
    return related_artists

def get_top_tracks(fid):
    top_tracks = []
    try:
        top_tracks_dirty = spotify.get_artist_top_tracks(fid)['tracks']
        for i in range(0, len(top_tracks_dirty)):
            top_tracks.append(top_tracks_dirty[i]['name'])
    except KeyError:
        top_tracks.append('')
    return top_tracks

In [27]:
#makes above thing into function

def get_artist_recs():
    potential_recommendations = []
    top_artist_top_tracks = []
    related_top_tracks = []
    for artist in my_top_artists:
        taid = get_artist_id(artist)
        tops = get_top_tracks(taid)
        for tr in tops:
            top_artist_top_tracks.append(tr + ' - ' + artist)
        rartists = clean_related_artists(taid)
        for rartist in rartists:
            raid = get_artist_id(rartist)
            rtops = get_top_tracks(raid)
            for rtr in rtops:
                related_top_tracks.append(rtr + ' - ' + rartist)
    potential_recommendations = set(top_artist_top_tracks + related_top_tracks)
    for i in list(potential_recommendations):
        if i in new_library:
            potential_recommendations.remove(i)
    return potential_recommendations

In [28]:
final_pot_recs = get_artist_recs()

In [29]:
#this is a list of all potential recommendations based on top artists and their related artists and their top tracks,
#that are not already in the library. I will get audio features for each track, and later run a classifier
final_pot_recs

{'12.38 (feat. 21 Savage, Ink & Kadhja Bonet) - Childish Gambino',
 '123 - Jess Glynne',
 '123 - Smokepurpp',
 '1999 - Troye Sivan',
 '20 Years Later - Yung Pinch',
 '2019 - Bazanji',
 "223's (feat. 9lokknine) - YNW Melly",
 '29 - Bankrol Hayden',
 '3005 - Childish Gambino',
 '40 Oz. - D12',
 '772 Love - YNW Melly',
 '8TEEN - Khalid',
 '@ MEH - Playboi Carti',
 'A Brand New Day (BTS World Original Soundtrack) [Pt. 2] - Zara Larsson',
 'A Kiss - Bad Meets Evil',
 'ARGENTINA - Gunna',
 'Above The Law - Bad Meets Evil',
 'Adam & Eve - Calboy',
 'Adorn - Miguel',
 "Ain't My Fault - Zara Larsson",
 "Ain't Thinkin Bout You (feat. Louisa) - KREAM",
 'Alexis - Joey Trap',
 'Ali Bomaye - The Game',
 'All For You - Years & Years',
 'All I Am - Jess Glynne',
 'All I Think About - Bad Meets Evil',
 'All In My Head (Flex) (feat. Fetty Wap) - Fifth Harmony',
 'All Me (feat. Keyshia Cole) - Kehlani',
 'All My Love - Sabrina Claudio',
 'All My Love - Wale',
 'All Night - Matoma',
 'All Star (with Lil 

In [30]:
#my next set of recommendations will be based on new releases

In [31]:
#gets an albums id based on query
def get_album_id(q):
    new_al = spotify.search(q, search_type = 'album')
    return_album_id = new_al['albums']['items'][0]['id']
    return return_album_id

In [32]:
#gets tracks cleanly from spotify albums
def clean_album_tracks(alid):
    clean_album_songs = []
    iter_al_tracks = spotify.get_album_tracks(alid)['items']
    for trackb in iter_al_tracks:
        clean_album_songs.append(trackb['name'] + " - " + trackb['artists'][0]['name'])
    return clean_album_songs

In [33]:
def potential_release_radar():
    potential_rr_tracks = []
    rr = spotify.new_releases()
    dirty_rr = rr['albums']['items']
    for i in range(0, len(dirty_rr)):
        potential_rr_tracks.append(clean_album_tracks(get_album_id(
            dirty_rr[i]["name"] + " " + dirty_rr[i]['artists'][0]['name'])))
    return potential_rr_tracks

In [34]:
#new tracks released that the classifier will be run on
prr = potential_release_radar()

In [35]:
prr_clean = []
for i in prr:
    prr_clean = prr_clean + i
for i in prr_clean:
    if i in new_library:
        prr_clean.remove(i)
prr_clean

['Make It Rain - Pop Smoke',
 'Intro - Chloe x Halle',
 'Forgive Me - Chloe x Halle',
 'Baby Girl - Chloe x Halle',
 'Do It - Chloe x Halle',
 'Tipsy - Chloe x Halle',
 'Ungodly Hour - Chloe x Halle',
 'Busy Boy - Chloe x Halle',
 'Catch Up (feat. Mike WiLL Made-It) - Chloe x Halle',
 'Overwhelmed - Chloe x Halle',
 'Lonely - Chloe x Halle',
 "Don't Make It Harder On Me - Chloe x Halle",
 'Wonder What She Thinks of Me - Chloe x Halle',
 'ROYL - Chloe x Halle',
 'GIRL - Maren Morris',
 'The Feels - Maren Morris',
 'All My Favorite People (feat. Brothers Osborne) - Maren Morris',
 'A Song for Everything - Maren Morris',
 'Common (feat. Brandi Carlile) - Maren Morris',
 'Flavor - Maren Morris',
 'Make Out With Me - Maren Morris',
 'Gold Love - Maren Morris',
 'Great Ones - Maren Morris',
 'RSVP - Maren Morris',
 'To Hell & Back - Maren Morris',
 'The Bones - Maren Morris',
 'Good Woman - Maren Morris',
 'Shade - Maren Morris',
 'Just for Now - Maren Morris',
 'Takes Two - Maren Morris',
 

In [36]:
#now before I get features I will group together all possible recommendations and put them in a new list
new_and_artist_recs = set(list(final_pot_recs) + prr_clean)

In [37]:
#get audio features and dump all this in a dictionary
potential_recs_songdata_dict = {}
for i in new_and_artist_recs:
    rqery = spotify.search(make_query(i), search_type = "track")
    try:
        songid = rqery['tracks']['items'][0]['id']
        audio_data = spotify.get_audio_data(songid)
        potential_recs_songdata_dict[i] = audio_data
    except:
        audio_data = {}

In [38]:
#remove missing data
missing_keys = []
for k, v in potential_recs_songdata_dict.items():
    if v == {}:
        missing_keys.append(k)
missing_keys

for k in missing_keys:
    del potential_recs_songdata_dict[k]

for k, v in potential_recs_songdata_dict.items():
    print(v['valence'])

0.396
0.672
0.705
0.0562
0.533
0.369
0.274
0.238
0.512
0.156
0.26
0.909
0.231
0.125
0.503
0.701
0.0399
0.477
0.321
0.0773
0.606
0.569
0.917
0.411
0.73
0.733
0.793
0.686
0.696
0.201
0.592
0.216
0.43
0.819
0.577
0.307
0.716
0.38
0.543
0.736
0.459
0.348
0.646
0.4
0.12
0.555
0.584
0.195
0.13
0.472
0.298
0.512
0.417
0.561
0.16
0.55
0.548
0.295
0.479
0.699
0.438
0.718
0.603
0.591
0.396
0.391
0.621
0.4
0.594
0.481
0.226
0.13
0.67
0.35
0.457
0.765
0.472
0.503
0.615
0.435
0.101
0.326
0.392
0.668
0.695
0.12
0.0363
0.801
0.759
0.686
0.574
0.273
0.418
0.613
0.662
0.6
0.5
0.794
0.548
0.515
0.333
0.718
0.709
0.528
0.771
0.494
0.559
0.164
0.191
0.634
0.417
0.36
0.208
0.195
0.338
0.245
0.078
0.264
0.191
0.62
0.437
0.805
0.67
0.697
0.674
0.69
0.661
0.339
0.289
0.0405
0.241
0.494
0.398
0.786
0.37
0.504
0.257
0.505
0.375
0.604
0.311
0.728
0.556
0.547
0.656
0.333
0.45
0.72
0.756
0.617
0.193
0.264
0.771
0.427
0.397
0.919
0.577
0.356
0.0815
0.333
0.811
0.363
0.878
0.475
0.659
0.758
0.34
0.588
0.254
0.486
0.

In [39]:
#adds song and artist as a key for easier queries in Mongo
for k, v in songdata_dict.items():
    v["Track"] = k
songdata_dict    

{'Shameless - iann dior': {'Track': 'Shameless - iann dior',
  'acousticness': 0.186,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4VZFXDtx8NnXWeJHOMzEan',
  'danceability': 0.709,
  'duration_ms': 106012,
  'energy': 0.76,
  'id': '4VZFXDtx8NnXWeJHOMzEan',
  'instrumentalness': 0,
  'key': 8,
  'liveness': 0.164,
  'loudness': -7.583,
  'mode': 1,
  'speechiness': 0.148,
  'tempo': 81.495,
  'time_signature': 4,
  'track_href': 'https://api.spotify.com/v1/tracks/4VZFXDtx8NnXWeJHOMzEan',
  'type': 'audio_features',
  'uri': 'spotify:track:4VZFXDtx8NnXWeJHOMzEan',
  'valence': 0.709},
 'Cinderella Story - A Boogie Wit da Hoodie': {'Track': 'Cinderella Story - A Boogie Wit da Hoodie',
  'acousticness': 0.272,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/18AIesWCXvIbXu8RqQw1E1',
  'danceability': 0.639,
  'duration_ms': 185455,
  'energy': 0.571,
  'id': '18AIesWCXvIbXu8RqQw1E1',
  'instrumentalness': 0,
  'key': 4,
  'liveness': 0.144,
  'loudness': -7.332

In [40]:
for k, v in potential_recs_songdata_dict.items():
    v["Track"] = k
potential_recs_songdata_dict

{'123 - Jess Glynne': {'Track': '123 - Jess Glynne',
  'acousticness': 0.0901,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3gFgp6Ofp808Kc2qLQiN5e',
  'danceability': 0.707,
  'duration_ms': 190280,
  'energy': 0.674,
  'id': '3gFgp6Ofp808Kc2qLQiN5e',
  'instrumentalness': 0,
  'key': 1,
  'liveness': 0.115,
  'loudness': -5.578,
  'mode': 1,
  'speechiness': 0.177,
  'tempo': 95.02,
  'time_signature': 4,
  'track_href': 'https://api.spotify.com/v1/tracks/3gFgp6Ofp808Kc2qLQiN5e',
  'type': 'audio_features',
  'uri': 'spotify:track:3gFgp6Ofp808Kc2qLQiN5e',
  'valence': 0.928},
 '123 - Smokepurpp': {'Track': '123 - Smokepurpp',
  'acousticness': 0.196,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0MHPgpKYfTVml3qEXspnLQ',
  'danceability': 0.88,
  'duration_ms': 170000,
  'energy': 0.61,
  'id': '0MHPgpKYfTVml3qEXspnLQ',
  'instrumentalness': 0,
  'key': 9,
  'liveness': 0.105,
  'loudness': -5.257,
  'mode': 1,
  'speechiness': 0.402,
  'tempo': 135.993,


Now, we have a cleaned dataset to dump into mongo... this part took a while... now for the easy bit 

In [1]:
#now time to start dumping my personal data into MongoDB
import pymongo
from pymongo import MongoClient
client = MongoClient()

In [2]:
#makes spotify database
spotify_db = client.spotify

In [None]:
#database for data from my library
library_collection = spotify_db.library

In [None]:
library_vals_list = list(songdata_dict.values())

In [None]:
#inserts my library
library_collection.insert_many(library_vals_list)


In [None]:
#now time to insert all potential recommendations
#make a new collection for that to keep things separate
recommendation_collection = spotify_db.recs

In [None]:
recs_list = list(potential_recs_songdata_dict.values())

In [None]:
#dumps in collection
recommendation_collection.insert_many(recs_list)

In [48]:
#Now I need to gather data for music that i dislike
#found a kaggle dataset of hits and flops will use some of that
import pandas as pd
import numpy as np

hits2000 = pd.read_csv("dataset-of-00s.csv")
hits2010 = pd.read_csv("dataset-of-10s.csv")

In [49]:

#this dataset was found on kaggle where the target is whether or not a song was a hit 0 means flop so i will only get 
#the flopped songs


hits = hits2000.append(hits2010)
hits = hits[hits["target"]==0]
hits=hits.drop(columns=['chorus_hit', 'sections'])
#now one fo the limitations of this is that the song is good but have never heard it, i cant perfectly
#gather data for that, but i will filter out all songs that have an artist that i have in my library
for index,row in hits.iterrows():
    if row["artist"] in artist_names:
        hits.drop(index, inplace=True)

hits = hits.drop(columns=["target"])
hits["Track"] = hits["track"] + " - " + hits["artist"]
hits = hits.drop(columns=["track", "artist"])

hits

Unnamed: 0,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Track
2,spotify:track:6cHZf7RbxXCKwEkgAZT4mY,0.162,0.836,9,-3.009,1,0.0473,0.000111,0.00457,0.1740,0.3000,86.964,338893,4,Clouds Of Dementia - Candlemass
3,spotify:track:2IjBPp2vMeX7LggzRN3iSX,0.188,0.994,4,-3.745,1,0.1660,0.000007,0.07840,0.1920,0.3330,148.440,255667,4,"Heavy Metal, Raise Hell! - Zwartketterij"
5,spotify:track:5Z3nrC0JbJmXaOGiXTuNFk,0.726,0.837,11,-7.223,0,0.0965,0.373000,0.26800,0.1360,0.9690,135.347,192720,4,Dantzig Station - State Of Art
6,spotify:track:0iAdSLiQBIizTAiLUP7p5E,0.365,0.922,1,-2.644,1,0.0710,0.002850,0.00000,0.3210,0.2900,77.250,89427,4,Divorced - Blacklisted
9,spotify:track:3pjnCLIHbRczUjenWOEo56,0.647,0.324,7,-9.679,1,0.0377,0.354000,0.00000,0.1150,0.3440,124.213,314286,3,I Want to Know Your Plans - Say Anything
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6384,spotify:track:6Yk1qYbmPKdoqNrHYRFc9c,0.395,0.927,4,-4.418,1,0.1650,0.116000,0.00000,0.4410,0.9410,188.384,191905,3,Gimme D' Road - Carnival Mix - Calypso Rose
6386,spotify:track:6RSamYe6vPBz7mGHe5Qj1q,0.396,0.044,9,-25.088,0,0.0370,0.984000,0.94700,0.0802,0.0898,71.200,224636,5,Vast Changes - Regentum
6393,spotify:track:4t1TljQWJ6ZuoSY67zVvBI,0.172,0.358,9,-14.430,1,0.0342,0.886000,0.96600,0.3140,0.0361,72.272,150857,4,Lotus Flowers - Yolta
6396,spotify:track:4o9npmYHrOF1rUxxTVH8h4,0.600,0.177,7,-16.070,1,0.0561,0.989000,0.86800,0.1490,0.5600,120.030,213387,4,Stormy Weather - Oscar Peterson


In [None]:
#keep a list of Tracks for keys

# dump all of this in mongo as well
bad_songs_collection = spotify_db.bad_songs
#inserting the data
for row in hits.to_dict(orient="row"):
    bad_songs_collection.insert_one(row)

In [6]:
#will save hits in mongo as well
good_songs_collection=spotify_db.good_songs

hits2000 = pd.read_csv("dataset-of-00s.csv")
hits2010 = pd.read_csv("dataset-of-10s.csv")

hits = hits2000.append(hits2010)
hits = hits[hits["target"]==1]
hits=hits.drop(columns=['chorus_hit', 'sections'])
hits["Track"] = hits["track"] + " - " + hits["artist"]
hits = hits.drop(columns=["track", "artist"])

for row in hits.to_dict(orient="row"):
    good_songs_collection.insert_one(row)