# Spotify Recommender Model

This model will be based on vectors created from the tracks' accoustic features.  Vectors from the features will be saved into a Gensim KeyedVectors object in order to utilize the api fucntions of Gensim.

In [1]:
# Basic Imports
import warnings;
warnings.filterwarnings('ignore')

import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import time
import random
import matplotlib.pyplot as plt
%matplotlib inline


from gensim.models import Word2Vec
from gensim import utils
import gensim.models
from gensim.models import KeyedVectors

In [2]:
# For the Spotify Dataset
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Integer, String, Float, MetaData, and_, or_, func
from sqlalchemy import create_engine
import sqlite3
from sqlalchemy.orm import sessionmaker
from sqlalchemy import exc

sys.path.append('../../')
from spotify_api import get_spotify_data, get_tracks, get_artists, get_audiofeatures
from spotify_database import get_session, display_time
from spotify_utils import Table_Generator, List_Generator, pickle_load, pickle_save

In [3]:
# !pip install ipywidgets 
# !jupyter nbextension enable --py widgetsnbextension
# !jupyter labextension install @jupyter-widgets/jupyterlab-manager

# %%capture
from tqdm import tqdm_notebook as tqdm

In [4]:
data_path = '../../data/SpotifyDataSet'
db_path = '../../data/SpotifyDataSet/spotify_songs.db'

# Get sesion
session = get_session(db_path)
engine = create_engine('sqlite:///' + db_path)

# Get Songs class
Playlists = getattr(get_session, "Playlists")
Artists = getattr(get_session, "Artists")
Tracks = getattr(get_session, "Tracks")

In [5]:
# fetch all db tracks
db_tracks = display_time(session.query(Tracks).all)
session.close()

Time to Execute: 66.94 seconds


In [24]:
df_all_tracks = pd.DataFrame([x.__dict__ for x in db_tracks]).drop('_sa_instance_state', axis=1).set_index(['track_uri'])


In [25]:
df_all_tracks.head()

Unnamed: 0_level_0,acousticness,artist_uri,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_popularity,valence
track_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
spotify:track:2d7LPtieXdIYzf7yHPooWd,0.974,spotify:artist:0MeLMJJcouYXCymQSHPn8g,0.467,242564,0.157,1e-06,11,0.0816,-9.649,1,0.0336,108.13,4,65,0.277
spotify:track:0y4TKcc7p2H6P0GJlt01EI,0.961,spotify:artist:7w0qj2HiAPIeUcoPogvOZ6,0.312,253933,0.207,0.00818,10,0.0773,-13.367,1,0.0347,93.778,4,36,0.278
spotify:track:6q4c1vPRZREh7nw3wG7Ixz,0.991,spotify:artist:32ogthv0BdaSMPml02X9YB,0.412,103920,0.159,0.772,9,0.083,-14.214,1,0.0278,85.462,4,54,0.0389
spotify:track:54KFQB6N4pn926IUUYZGzK,0.885,spotify:artist:32ogthv0BdaSMPml02X9YB,0.264,371320,0.122,0.349,9,0.094,-15.399,1,0.0349,148.658,4,72,0.0735
spotify:track:0NeJjNlprGfZpeX2LQuN6c,0.689,spotify:artist:3qnGvpP8Yth1AqSBMqON5x,0.658,238560,0.179,0.0,8,0.17,-10.866,1,0.0448,128.128,4,75,0.191


In [7]:
vector_features= [
    'acousticness',
    'danceability',
    'duration_ms',
    'energy',
    'instrumentalness',
    'key',
    'liveness',
    'loudness',
    'mode',
    'speechiness',
    'tempo',
    'time_signature',
    'valence'
]

In [None]:
def create_vector_frame(df_all_tracks:pd.DataFrame, vector_features:list)->pd.DataFrame:
    
    drop_cols = set(df_all_tracks.index) - set(vector_features)
    
    df = df_all_tracks.drop(drop_cols, axis=1)

In [33]:
accoustic_vectors = KeyedVectors(len(vector_features))
drop_cols = set(df_all_tracks.columns) - set(vector_features)
df = df_all_tracks.drop(drop_cols, axis=1)

In [34]:
df.head()

Unnamed: 0_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
track_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
spotify:track:2d7LPtieXdIYzf7yHPooWd,0.974,0.467,242564,0.157,1e-06,11,0.0816,-9.649,1,0.0336,108.13,4,0.277
spotify:track:0y4TKcc7p2H6P0GJlt01EI,0.961,0.312,253933,0.207,0.00818,10,0.0773,-13.367,1,0.0347,93.778,4,0.278
spotify:track:6q4c1vPRZREh7nw3wG7Ixz,0.991,0.412,103920,0.159,0.772,9,0.083,-14.214,1,0.0278,85.462,4,0.0389
spotify:track:54KFQB6N4pn926IUUYZGzK,0.885,0.264,371320,0.122,0.349,9,0.094,-15.399,1,0.0349,148.658,4,0.0735
spotify:track:0NeJjNlprGfZpeX2LQuN6c,0.689,0.658,238560,0.179,0.0,8,0.17,-10.866,1,0.0448,128.128,4,0.191


In [35]:
# weights are the vectors for each track
weights = np.array(df)

# entities are the trackuris
entities = np.array(df.index)

# add the vectors to the dataset
accoustic_vectors.add(entities, weights)

'spotify:track:2d7LPtieXdIYzf7yHPooWd'

In [64]:
seed_uri = df.iloc[10001].name
playlist = np.array(accoustic_vectors.similar_by_word(seed_uri, topn=10, restrict_vocab=None))[:,0]
seed_track = get_tracks([seed_uri])[0]
sp_playlist = get_tracks(playlist)


In [65]:
print("Playlist Seed:")
print("\tArtist       : ", seed_track['artists'][0]['name'])
print("\tTrack        : ", seed_track['name'])
print("\tTrack Preview: ", seed_track['preview_url'] )
print()
for t in sp_playlist:
    print("Artist       : ", t['artists'][0]['name'])
    print("Track        : ", t['name'])
    print("Track Preview: ", t['preview_url'] )
    print()


Playlist Seed:
	Artist       :  Frank Sinatra
	Track        :  The Song Is You - Live At The Pyramids, Egypt / 1979
	Track Preview:  None

Artist       :  Shimmy Engel
Track        :  Nigun Carlebach
Track Preview:  https://p.scdn.co/mp3-preview/0a4a387a3698a0d095f8aeb47dda0af7baca4d47?cid=72413f75d4db4ec79c6caaf02523959e

Artist       :  El Cejas y Su Banda Fuego
Track        :  Fui Judicial Federal
Track Preview:  https://p.scdn.co/mp3-preview/41ffc9ddfed661e86e83a838e7f81ba8550c0619?cid=72413f75d4db4ec79c6caaf02523959e

Artist       :  Romantic Piano Song Masters
Track        :  Shake It Off (Piano Version)
Track Preview:  https://p.scdn.co/mp3-preview/84a7d35abaf2ddc5a405d10170ddb10aa794ce5a?cid=72413f75d4db4ec79c6caaf02523959e

Artist       :  NateWantsToBattle
Track        :  All I Want
Track Preview:  https://p.scdn.co/mp3-preview/5a4d5bab002cf8812d8b341259e949f66f7c9cc7?cid=72413f75d4db4ec79c6caaf02523959e

Artist       :  Cdot Honcho
Track        :  SKRT [Prod. By Yung Murk]
T

Setting credentials
token():INFO:   Getting initial token
token():INFO:   Token refreshed
