In [1]:
import featurizing
import encoding
import recommending
import playlist_creation

import pathlib
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import api_setup
from sklearn.metrics.pairwise import cosine_similarity

from typing import List, Optional, Callable
import pandas as pd


REPO_ROOT = pathlib.Path.cwd().parent

EXPECTED_COLUMN_ORDER = ['track_id', 'artist_name', 'track_name', 'duration_ms', 'danceability', 'energy', 'key',
                         'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence',
                         'tempo', 'time_signature', 'genres', 'artist_popularity']

In [2]:
# API Auth
scope = ['playlist-modify-public', 'playlist-modify-private']
env_vars = api_setup.parse_api_kvs(REPO_ROOT / "api-keys")
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=env_vars['client_id'], client_secret=env_vars['client_secret'], redirect_uri=env_vars['redirect_uri'], scope=scope))

In [3]:
playlist_uri = 'spotify:playlist:2uP6neXdGTbCZCDExKE6TI'

In [4]:
# Code using spotify API to featurize songs/playlists
def split_into_sublists(input_list, chunk_size=50):
    upto = 0
    output_lists = []
    while True:
        if upto + chunk_size >= len(input_list):
            output_lists.append(input_list[upto:-1])
            break
        else:
            output_lists.append(input_list[upto:upto+chunk_size])
            upto += chunk_size
    return output_lists

def get_artist_from_track_uri(spotify: spotipy.Spotify, track_uri: str) -> dict:
	uri = track_uri.split(":")[-1]
	track_info = spotify.track(uri)
	artist_info = spotify.artist(track_info['artists'][0]['uri'])
	return {'track_uri': uri,
			'artist_name': artist_info['name'],
			'artist_popularity': artist_info['popularity'],
			'artist_genres': artist_info['genres'],
			'track_name': track_info['name']}

def get_artists_from_track_uris(spotify: spotipy.Spotify, track_uris: str) -> List[dict]:
	track_uris = [track_uri.split(":")[-1] for track_uri in track_uris]
	chunked_track_uris = split_into_sublists(track_uris)
	results = []
	for chunk in chunked_track_uris:
		tracks_info = spotify.tracks(chunk)['tracks']
		artists = [track_info['artists'][0]['id'] for track_info in tracks_info]
		artists_info = spotify.artists(artists)
		chunk_results = [{'track_uri': track_info['id'],
						  'artist_name': artist_info['name'],
						  'artist_popularity': artist_info['popularity'],
						  'artist_genres': artist_info['genres'],
						  'track_name': track_info['name']}
						 for track_info, artist_info in zip(tracks_info, artists_info)]
		results.extend(chunk_results)
	return results

def featurize_song_list(spotify: spotipy.Spotify, track_uris: List[str]) -> dict:
	track_uris_chunked = split_into_sublists(track_uris)
	features = []
	for chunk in track_uris_chunked:
		features.extend(spotify.audio_features(chunk))
	return features

def get_songs_from_playlist(spotify_client: spotipy.Spotify, playlist_uri: str) -> List[str]:
    """
    Return a list of strings of the URIs of the tracks in this playlist.
    """
    tracks_json = spotify_client.playlist_items(playlist_uri)
    return [track['track']['uri'].split(":")[-1] for track in tracks_json['items']]

def get_playlist_song_features(spotify: spotipy.Spotify, playlist_uri: str) -> dict:
	song_uris = get_songs_from_playlist(spotify, playlist_uri)
	playlist_song_features = featurize_song_list(spotify, song_uris)
	return playlist_song_features

def dataframe_from_playlist(spotify: spotipy.Spotify, playlist_uri: str) -> pd.DataFrame:
	playlist_song_features = get_playlist_song_features(spotify, playlist_uri)
	playlist_song_uris = [features['uri'] for features in playlist_song_features]
	playlist_artist_info = get_artists_from_track_uris(spotify, playlist_song_uris)

	song_features_df = pd.DataFrame.from_records(playlist_song_features)
	artist_info_df = pd.DataFrame.from_records(playlist_artist_info)

	song_features_df = song_features_df.join(artist_info_df, on='id')
	song_features_df = song_features_df.drop(columns=["type", "uri", "track_href", "analysis_url"])
	song_features_df = song_features_df.rename(mapper={"artist_genres": "genres", "id": "track_id"}, axis=1)
	song_features_df = song_features_df[EXPECTED_COLUMN_ORDER]
	return song_features_df

def get_playlist_vector_from_uri(spotify: spotipy.Spotify, playlist_uri: str, scalers_and_encoders: Optional[dict] = None) -> pd.DataFrame:
	playlist_features = dataframe_from_playlist(spotify, playlist_uri)
	if not scalers_and_encoders:
		scalers_and_encoders = encoding.fit_scalers_and_encoders(playlist_features)
	playlist_features = encoding.encode_dataframe_given_scalers(playlist_features, scalers_and_encoders)
	playlist_features = playlist_features.drop(columns=[*encoding.UNUSED_COLUMNS, 'track_id'])
	playlist_feature_vector = playlist_features.sum(axis=0, numeric_only=False)
	return playlist_feature_vector

def get_k_closest_songs(spotify: spotipy.Spotify,
						playlist_uri: str,
                        training_data: pd.DataFrame,
                        training_transformers: dict,
                        k: int = 15,
                        comparison: Callable = cosine_similarity) -> List[str]:
    """
    Returns a list of the URIs of the k closest songs found in the dataset.
    :param playlist_uri:
    :param training_data:
    :param training_transformers:
    :param k:
    :param comparison:
    :return:
    """
    training_data_to_use = training_data.drop(columns=encoding.UNUSED_COLUMNS)

    playlist_feature_vector = get_playlist_vector_from_uri(spotify, playlist_uri, training_transformers)
    training_data_to_use['similarity'] = comparison(playlist_feature_vector.values.reshape(1, -1),
                                                    training_data_to_use[training_data_to_use.columns]).transpose()
    training_data_to_use = training_data_to_use.sort_values(by='similarity', ascending=False)
    return list(training_data_to_use.head(k).index)

In [5]:
training_dataset = pd.read_csv(REPO_ROOT / 'data/song_data/final_song_dataset.csv')
training_dataset, training_scalers_and_encoders = encoding.encode_training_data(training_dataset)

  training_dataset = pd.read_csv(REPO_ROOT / 'data/song_data/final_song_dataset.csv')


In [6]:
playlist_feature_vector = get_playlist_vector_from_uri(sp, playlist_uri)

KeyboardInterrupt: 

In [None]:
playlist_feature_vector