In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from IPython.display import clear_output

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from spotipy import util

from config import CLIENT_ID, CLIENT_SECRET, REDIRECT_URI

# Functions

In [None]:
def fetch_playlist_tracks(sp, playlistsid): 
    offset = 0
    tracks = []

    while True:
        content = sp.playlist_tracks(playlistsid, fields=None, limit=100, offset=offset, market=None)
        tracks += content["items"]

        if content["next"] is not None:
            offset += 100
        else:
            break
    
    track_id = []
    track_name = []
    track_artist = []
    
    for track in tracks:
        track_id.append(track["track"]["id"])
        track_name.append(track["track"]["name"])
        track_artist.append(track["track"]["artists"][0]["name"])
    
    df_playlists_tracks = pd.DataFrame({"track_id": track_id, "track_name": track_name, "track_artist": track_artist})
    return df_playlists_tracks

In [None]:
def fetch_playlists(sp, username):
        
    id = []
    name = []
    num_tracks = []
    
    # Make the API request
    playlists = sp.user_playlists(username)
    for playlist in playlists["items"]:
        id.append(playlist["id"])
        name.append(playlist["name"])
        num_tracks.append(playlist["tracks"]["total"])# Create the final df   
    df_playlists = pd.DataFrame({"id":id, "name": name, "#tracks": num_tracks})
    return df_playlists

In [None]:
def create_playlist(sp, username, playlist_name, playlist_description):
    new_playlist = sp.user_playlist_create(username, playlist_name, description = playlist_description)
    return new_playlist["id"]

In [None]:
def fill_playlist(sp, username, playlist_id, playlist_tracks):
    index = 0
    results = []
    
    while index < len(playlist_tracks):
        results += sp.user_playlist_add_tracks(username, playlist_id, tracks = playlist_tracks["id"][index:index + 50])
        index += 50

# Access

In [None]:
scope = "user-top-read playlist-read-private playlist-modify-public user-library-read"

spotifyOAuth = SpotifyOAuth(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, redirect_uri=REDIRECT_URI, scope=scope)
token = spotifyOAuth.get_cached_token()
sp = spotipy.Spotify(auth=token["access_token"])

In [None]:
user_id = sp.current_user()["id"]

# User top tracks

In [None]:
user_top_tracks = sp.current_user_top_tracks(limit=100, offset=0,time_range="medium_term")

In [None]:
top_tracks = pd.DataFrame()

for track in user_top_tracks["items"]:
    track_info = {}
    track_info["track_name"] = track["name"]
    track_info["album"] = track["album"]["name"]
    track_info["track_id"] = track["id"]
    track_info["artist"] = track["artists"][0]["name"]
    track_info["duration"] = track["duration_ms"]
    track_info["popularity"] = track["popularity"]
    top_tracks = top_tracks.append(pd.Series(track_info), ignore_index=True)
top_tracks.sort_values(by="popularity", ascending=False).head(10)

## Track features

In [None]:
top_tracks_features_df = pd.DataFrame()
features_names = ["danceability", "energy", "valence", "tempo","loudness", "speechiness", "instrumentalness", "acousticness", "liveness"]
top_tracks_features = sp.audio_features(top_tracks["track_id"])
for track in top_tracks_features:
    features_info = {key: track[key] for key in features_names+["id"]}
    top_tracks_features_df = top_tracks_features_df.append(pd.Series(features_info), ignore_index=True)
top_tracks_features_df.head()

# User playlists

In [None]:
fetch_playlists(sp, user_id)

# AI playlist

Playlist generated using a Random Forest trained on user top tracks

## Select cadidate playlists and tracks

In [None]:
user_playlists = sp.user_playlists(user_id)["items"]

In [None]:
search_key = "Before"
#search_key = []

if type(search_key) is str and len(search_key)>0:
    searched_playlists = [x for x in user_playlists if search_key in x["name"]]
elif type(search_key) is list and len(search_key)>0:
    searched_playlists = [x for x in user_playlists if x["name"] in search_key]
else:
    searched_playlists = []
    
print("Founded {} playlists".format(len(searched_playlists)))

In [None]:
searched_df = pd.DataFrame()

for playlist in searched_playlists:
    playlist_info = {}
    playlist_info["name"] = playlist["name"]
    playlist_info["id"] = playlist["id"]
    searched_df = searched_df.append(pd.Series(playlist_info), ignore_index=True)
searched_df

In [None]:
searched_tracks_df = pd.DataFrame(columns=["track_id", "track_name"])
for playlist in searched_df["id"]:
    tracks = fetch_playlist_tracks(sp, playlist)
    searched_tracks_df = searched_tracks_df.append(tracks, ignore_index=True)
searched_tracks_df.head()

In [None]:
index = 0
size = 100

tracks_features_df = pd.DataFrame()
features_names = ["danceability", "energy", "valence", "tempo","loudness", "speechiness", "instrumentalness", "acousticness", "liveness"]

while index < searched_tracks_df.shape[0]:
    tracks_features = sp.audio_features(searched_tracks_df["track_id"][range(index, min(searched_tracks_df.shape[0], index+size))])
    for track in tracks_features:
        features_info = {key: track[key] for key in features_names + ["id"]}
        features_info["track_name"] = searched_tracks_df[searched_tracks_df["track_id"]==features_info["id"]]["track_name"].values[0]
        tracks_features_df = tracks_features_df.append(pd.Series(features_info), ignore_index=True)
    index += size
tracks_features_df = tracks_features_df.drop_duplicates()
tracks_features_df.head()

## Model Training

In [None]:
top_tracks_X = pd.merge(top_tracks, top_tracks_features_df, left_on="track_id", right_on="id")
top_tracks_y = top_tracks_X["popularity"]
top_tracks_X = top_tracks_X.drop(["track_id", "popularity"], axis=1)

In [None]:
model = RandomForestRegressor(random_state=42)
model = model.fit(top_tracks_X[features_names], top_tracks_y)

## Tracks popularity evaluation

In [None]:
tracks_features_df["popularity"] = model.predict(tracks_features_df[features_names])
tracks_features_df.sort_values(by="popularity", ascending=False).head()

## Playlist Creation

In [None]:
top_tracks_searched = tracks_features_df.sort_values(by="popularity", ascending=False).head(50)
top_tracks_searched.head()

In [None]:
playlist_name = "Lazy Playlist"
playlist_desc = "Playlist created with Python"

new_playlist_id = create_playlist(sp, user_id, playlist_name, playlist_desc)

In [None]:
fill_playlist(sp, user_id, new_playlist_id, top_tracks_searched)

In [None]:
fetch_playlists(sp, user_id).head()