# Music Recommender with Deep Learning

## Access Spotify API to collect Playlists and Songs to recommend music

----

## 1. Collect Data from Spotify API

### Authentication

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy import util
from spotipy.client import SpotifyException
from collections import Counter
from gensim.utils import tokenize
import time
import tqdm
import json

In [None]:
#SPOTIPY_CLIENT_ID='<client_id>'
#SPOTIPY_CLIENT_SECRET='<client_secret>'
#SPOTIPY_REDIRECT_URI='http://127.0.0.1:8000/callback'

In [None]:
auth_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET,requests_timeout=100)
session = spotipy.Spotify(auth_manager=auth_manager)

### Find Playlists

In [4]:
load = True

if load:
    with open('data/playlists.json') as json_file: 
        playlists = json.load(json_file) 

In [None]:
def find_playlists(session, w, max_count=5000):
    try:
        res = session.search(w, limit=50, type='playlist')
        while res:
            for playlist in res['playlists']['items']:
                yield playlist
                max_count -= 1
                if max_count == 0:
                    raise StopIteration
            tries = 3
            while tries > 0:
                try:
                    res = session.next(res['playlists'])
                    tries = 0
                except SpotifyException as e:
                    tries -= 1
                    time.sleep(0.2)
                    if tries == 0:
                        raise
    except SpotifyException as e:
        status = e.http_status
        if status == 404:
            raise StopIteration
        raise


for pl in find_playlists(session, 'a'):
    break

pl

In [None]:
word_counts = Counter({'a': 1})
playlists = {}
words_seen = set()
playlists = {}
count = 0
dupes = 0

In [None]:
while len(playlists) < 100000:
    for word, _ in word_counts.most_common():
        if not word in words_seen:
            words_seen.add(word)
            print('word>', word)
            try:
                for playlist in find_playlists(session, word):
                    if playlist['id'] in playlists:
                        dupes += 1
                    elif playlist['name'] and playlist['owner']:
                        playlists[playlist['id']] = {
                          'owner': playlist['owner']['id'],
                          'name': playlist['name'],
                          'id': playlist['id'],
                        }
                        count += 1
                        for token in tokenize(playlist['name'], lowercase=True):
                            word_counts[token] += 1
                break
            except:
                print('Skip to next iteration')
                continue

In [None]:
import json
with open('playlists.json', 'w') as fp:
    json.dump(playlists, fp)

### Find songs inside each playlists

In [5]:
def track_yielder(session, playlist):
    res = session.user_playlist_tracks(playlist['owner'], playlist['id'],
              fields='items(track(id, name, artists(name, id), duration_ms)),next')
    while res:
        for track in res['items']:
            if track['track']:
                yield track['track']
        tries = 3
        while tries > 0:
            try:
                res = session.next(res)
                if not res or not res.get('items'):
                    raise StopIteration
                tries = 0
            except SpotifyException as e:
                if 400 <= e.http_status <= 499:
                    raise StopIteration
                tries -= 1
                time.sleep(1)
                if tries == 0:
                    raise e

### Create SQL Database


In [7]:
import os
import sqlite3

In [None]:
if os.path.isfile('data/songs.db'):
    os.remove('data/songs.db')
conn = sqlite3.connect('data/songs.db')
c = conn.cursor()
c.execute('CREATE TABLE songs (id text primary key, name text, artist text)')
c.execute('CREATE INDEX name_idx on songs(name)')

tracks_seen = set()
playlists_seen = set()

In [None]:
with open('data/playlists.ndjson', 'w') as fout_playlists:
    with open('data/songs_ids.txt', 'w') as fout_song_ids:
        for playlist in tqdm.tqdm(playlists.values()):
            fout_playlists.write(json.dumps(playlist) + '\n')
            track_ids = []
            try:
                for track in track_yielder(session, playlist):
                    track_id = track['id']
                    if not track_id:
                        continue
                    if not track_id in tracks_seen:
                        c.execute("INSERT INTO songs VALUES (?, ?, ?)", 
                                  (track['id'], track['name'], track['artists'][0]['name']))
                    track_ids.append(track_id)
                    tracks_seen.add(track_id)
                fout_song_ids.write(' '.join(track_ids) + '\n')
                conn.commit()
            except:
                continue
conn.commit()

In [8]:
track_ids = []
with open('data/songs_ids1.txt', 'w') as fout_song_ids:
    for playlist in tqdm.tqdm(playlists.values()):
        if len(track_ids) > 0:
            fout_song_ids.write(' '.join(track_ids) + '\n')
        track_ids = []
        try:
            for track in track_yielder(session, playlist):
                if not track['id']:
                    continue
                track_ids.append(track['id'])
        except:
            continue

 17%|█▋        | 46396/273927 [3:31:28<8:35:07,  7.36it/s]  Max Retries reached
 28%|██▊       | 76124/273927 [5:50:02<24:03:25,  2.28it/s] HTTP Error for GET to https://api.spotify.com/v1/playlists/0kjVGPu8XqedRCdC6rXzYy/tracks returned 404 due to Not found.
 32%|███▏      | 86649/273927 [6:35:55<9:09:01,  5.69it/s]  HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DZ06evO3jiHBu/tracks returned 404 due to Not found.
100%|██████████| 273927/273927 [20:14:29<00:00,  3.76it/s]    


## Modelling

In [9]:
from operator import itemgetter
import gensim

In [12]:
class WordSplitter(object):
    def __init__(self, filename):
        self.filename = filename
 
    def __iter__(self):
        with open(self.filename) as fin:
            for line in fin:
                yield line.split()


model_input = WordSplitter('data/songs_ids1.txt')
model = gensim.models.Word2Vec(model_input, min_count=4)

In [13]:
model.save(open('songs.word2vec', 'wb'))

In [52]:
conn = sqlite3.connect('data/songs.db')
def find_song(song_name, limit=5):
    c = conn.cursor()
    c.execute("SELECT * FROM songs WHERE UPPER(name) LIKE '%" + song_name + "%'")
    res = sorted((x + (model.wv.vocab[x[0]].count,) 
                  for x in c.fetchall() if x[0] in model.wv.vocab),
                 key=itemgetter(-1), reverse=True)

    return [*res][:limit]

for t in find_song('Never Gonna Give you up'):
    print(*t)

4uLU6hMCjMI75M1A2tKUQC Never Gonna Give You Up Rick Astley 1862
7GhIk7Il098yCjg4BQjzvb Never Gonna Give You Up Rick Astley 632
6JEK0CvvjDjjMUBFoXShNZ Never Gonna Give You Up Rick Astley 505
0FutrWIUM5Mg3434asiwkp Never Gonna Give You Up Rick Astley 220
5fnDDcjcXKUvJ6iSnpiU0v Never Gonna Give You Up Mac Beez 107


In [53]:
def suggest_songs(song_id):
    c = conn.cursor()
    similar = dict(model.wv.most_similar([song_id]))
    song_ids = ', '.join(("'%s'" % x) for x in similar.keys())
    c.execute("SELECT * FROM songs WHERE id in (%s)" % song_ids)
    res = sorted((rec + (similar[rec[0]],) for rec in c.fetchall()), 
                 key=itemgetter(-1),
                 reverse=True)
    return [*res]


for t in suggest_songs('4uLU6hMCjMI75M1A2tKUQC'):
    print(*t)

3mRM4NM8iO7UBqrSigCQFH Stayin' Alive - From "Saturday Night Fever" Soundtrack Bee Gees 0.979710042476654
0QKfiqpEU4h9ycPSzIFwYe Maniac Michael Sembello 0.9597901105880737
0B9x2BRHqj3Qer7biM3pU3 You're The One That I Want - From “Grease” John Travolta 0.9564324021339417
2TxCwUlqaOH3TIyJqGgR91 Mamma Mia ABBA 0.9541833400726318
6ztstiyZL6FXzh4aG46ZPD Boogie Wonderland Earth, Wind & Fire 0.9533374905586243
7azo4rpSUh8nXgtonC6Pkq Thriller Michael Jackson 0.9514068961143494
1TfqLAPs4K3s2rJMoCokcS Sweet Dreams (Are Made of This) - Remastered Eurythmics 0.9480932354927063
5KgbyD2lQQlIupAaPjgiXg Night Fever - From "Saturday Night Fever" Soundtrack Bee Gees 0.9478886127471924
7DD1ojeTUwnW65g5QuZw7X I Will Survive - 1981 Re-recording Gloria Gaynor 0.9478068947792053
0ikz6tENMONtK6qGkOrU3c Wake Me Up Before You Go-Go Wham! 0.9474787712097168
