### Preparing Data for Dashboard

This file prepares all the data to be used for the dashboard functionality.

In [172]:
import pandas as pd
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

#### Explore Top Songs By Topic

In [173]:
# Load in the playlists dataset from csv file

df = pd.read_csv('../data/playlist_with_embeddings_dataset.csv')
df.head()

Unnamed: 0,Playlist ID,Playlist Name,Track Name,Artist Name,Album Name,Track URI,Lyrics,Preprocessed Lyrics,Preprocessed Playlist Name,Preprocessed Track Name,Preprocessed Album Name,Lyrics Embedding,Playlist Name Embedding,Dominant Topic,Dominant Topic Probability
0,30,Garage Rock,Take My Side,Will Butler,Policy,spotify:track:6v4zAuJTlszNdKrbbnEFu8,Where's the fire? Let it burn\nWhere're the ch...,"['fire', 'let', 'burn', 'child', 'child', 'got...","['garage', 'rock']","['take', 'side']",['policy'],[ 1.47042751e-01 1.87495738e-01 2.75591016e-...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Self-Empowerment + Confident,0.921849
1,30,Garage Rock,Everyday it Starts,Parquet Courts,Content Nausea,spotify:track:25JD35LDh7CEJ9gKUNruVj,Everyday it starts\nEveryday it starts\nEveryd...,"['everyday', 'start', 'everyday', 'start', 'ev...","['garage', 'rock']","['everyday', 'start']","['content', 'nausea']",[ 0.14214703 0.13033065 -0.1995055 -0.361942...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Life + Struggles,0.5017
2,30,Garage Rock,Content Nausea,Parquet Courts,Content Nausea,spotify:track:24bk2iKzr3VcymFnzIy3oS,"Content nausea, World War Four\nSeems like it ...","['content', 'nausea', 'world', 'war', 'four', ...","['garage', 'rock']","['content', 'nausea']","['content', 'nausea']",[ 2.36339107e-01 1.71193734e-01 3.91736887e-...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Self-Empowerment + Confident,0.893172
3,30,Garage Rock,Slide Machine,Parquet Courts,Content Nausea,spotify:track:7rjK8CDTtTn2KG9Zja7ETj,I've been down South where they use the slide ...,"['south', 'use', 'slide', 'machine', 'god', 'o...","['garage', 'rock']","['slide', 'machine']","['content', 'nausea']",[-0.04323885 -0.0324073 0.3804018 -0.931053...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Relationships,0.98926
4,30,Garage Rock,Pretty Machines,Parquet Courts,Content Nausea,spotify:track:1zPn4tfkWfowVG3ROo1wUY,"Ah, moonlight\nIt's hard to believe it\nAnd it...","['moonlight', 'hard', 'believe', 'harder', 'ne...","['garage', 'rock']","['pretty', 'machine']","['content', 'nausea']",[ 0.11442823 0.0718515 0.0574954 -0.275225...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Self-Empowerment + Confident,0.749053


From text_processing.ipynb, we got the track names and artists of the top 5 tracks for each topic (see code below). Let's save them into a dataframe.
Note: Sadly we have to manually re-access these from the dataframe, because did not save track URIs when we originally ran this with our LDA model.

* Topic 1: Life + Personal Struggles
* Topic 2: Relationships
* Topic 3: Self-Empowerment
* Topic 4: Nostalgic Reflection on Life

In [174]:
topics = ["Life + Personal Struggles", "Relationships", "Self-Empowerment", "Nostalgic Reflection on Life"]

topic1_titles = ["Fake Love", "Every Single Thing", "Big Green Tractor", "Fallen", "Bed"]
topic1_artists = ["Drake", "HOMESHAKE", "Jason Aldean", "Mýa", "J. Holiday"]

topic2_titles = ["Just The Two Of Us (feat. Bill Withers)", "Kush & Corinthians", "Eres", "Super Rich Kids", "Make It Wit Chu"]
topic2_artists = ["Grover Washington, Jr.", "Kendrick Lamar", "Café Tacvba", "Frank Ocean", "Queens of the Stone Age"]

topic3_titles = ["Forrest Gump", "Redefined (feat. Melanie Fontana & Novaspace) [Club Mix]", "Tennis Court", "I'm Still Standing - From \"Sing\" Original Motion Picture Soundtrack", "Lost"]
topic3_artists = ["Frank Ocean", "tyDi", "Lorde", "Taron Egerton", "Frank Ocean"]

topic4_titles = ["Nights", "Brokenhearted", "7 Years", "Jungle", "Not Today"]
topic4_artists = ["Frank Ocean", "Kalin and Myles", "Lukas Graham", "Drake", "Twenty One Pilots"]

In [175]:
# Create dictionary of topic name (key) and list of top 5 track URIs (values)

topic_track_uris = {}

# Define the topics and their corresponding titles and artists
topics_data = {
    "Life + Personal Struggles": (topic1_titles, topic1_artists),
    "Relationships": (topic2_titles, topic2_artists),
    "Self-Empowerment": (topic3_titles, topic3_artists),
    "Nostalgic Reflection on Life": (topic4_titles, topic4_artists)
}

# Iterate over each topic and its corresponding titles and artists
for topic, (titles, artists) in topics_data.items():
    track_uris = []
    
    # Iterate over each title and artist in the current topic
    for title, artist in zip(titles, artists):
        # Search for matching rows in the df based on track name and artist name
        matching_rows = df[(df['Track Name'] == title) & (df['Artist Name'] == artist)]
        
        # Retrieve the track URI from the matching rows
        if not matching_rows.empty:
            track_uri = matching_rows.iloc[0]['Track URI']
            track_uris.append(track_uri)
    
    # Store the top 5 track URIs for the current topic
    topic_track_uris[topic] = track_uris[:5]
    
# Now topic_track_uris contains the top 5 track URIs for each topic

In [176]:
topic_track_uris

{'Life + Personal Struggles': ['spotify:track:343YBumqHu19cGoGARUTsd',
  'spotify:track:7xSIYgB6KVCQNmKVVFwSkM',
  'spotify:track:1jvUrVUMm1U2XfXX534fRP',
  'spotify:track:59D1ABymMZNQLxBHYsfaq0',
  'spotify:track:6TlRNJaezOdzdECnQeRuMM'],
 'Relationships': ['spotify:track:1ko2lVN0vKGUl9zrU0qSlT',
  'spotify:track:705u4LfXcTBuDOB8UCy0A7',
  'spotify:track:3sxokptpTvk4rGdU9XeRUV',
  'spotify:track:0725YWm6Z0TpZ6wrNk64Eb',
  'spotify:track:6GyDY0yE47rfk8pcuKhioh'],
 'Self-Empowerment': ['spotify:track:4YZbVct8l9MnAVIROnLQdx',
  'spotify:track:3kkCsfOj7sjRWzUPS1Iod3',
  'spotify:track:74fpNVAJrKGQuqHETi4WSt',
  'spotify:track:0mb7btREdC3wuIUmuVRgWn',
  'spotify:track:4L7jMAP8UcIe309yQmkdcO'],
 'Nostalgic Reflection on Life': ['spotify:track:7eqoqGkKwgOaWNNHx90uEZ',
  'spotify:track:3fAMVfl5A50F0CR0ry8YSe',
  'spotify:track:7129iqBafaphfc3WPCGC0L',
  'spotify:track:7JXZq0JgG2zTrSOAgY8VMC',
  'spotify:track:7qxjGHW485TL8ciwkHD5MK']}

#### Get Subset of Playlists for Playlist Song Recommendations

In [177]:
# Get all the tracks in the first 8 unique playlist IDs

unique_ids = df.drop_duplicates(subset=['Playlist ID'])
unique_ids.head(15)

Unnamed: 0,Playlist ID,Playlist Name,Track Name,Artist Name,Album Name,Track URI,Lyrics,Preprocessed Lyrics,Preprocessed Playlist Name,Preprocessed Track Name,Preprocessed Album Name,Lyrics Embedding,Playlist Name Embedding,Dominant Topic,Dominant Topic Probability
0,30,Garage Rock,Take My Side,Will Butler,Policy,spotify:track:6v4zAuJTlszNdKrbbnEFu8,Where's the fire? Let it burn\nWhere're the ch...,"['fire', 'let', 'burn', 'child', 'child', 'got...","['garage', 'rock']","['take', 'side']",['policy'],[ 1.47042751e-01 1.87495738e-01 2.75591016e-...,[-6.4670e-01 9.8998e-01 -1.4379e-01 -3.0598e-...,Self-Empowerment + Confident,0.921849
132,41,Belters,Lay Me Down,Sam Smith,In The Lonely Hour,spotify:track:64GRDrL1efgXclrhVCeuA0,"Yes, I do, I believe\nThat one day I will be w...","['yes', 'believe', 'one', 'day', 'right', 'rig...",['belters'],['lay'],"['lonely', 'hour']",[ 2.8818098e-01 1.7539494e-01 6.5428846e-02 ...,,Self-Empowerment + Confident,0.990174
281,42,Road Trippin',Sorry,Justin Bieber,Purpose,spotify:track:69bp2EbF7Q2rqc5N3ylezZ,You gotta go and get angry at all of my honest...,"['gotta', 'go', 'get', 'angry', 'honesty', 'kn...","['road', 'trippin']",['sorry'],['purpose'],[ 0.19337267 -0.03710859 0.2024993 -0.281451...,[-0.388 0.93128496 0.701795 -0.140929...,Self-Empowerment + Confident,0.993481
428,55,Tennessee,Tennessee Whiskey,Chris Stapleton,Traveller,spotify:track:3fqwjXwUGN6vbzIwvyFMhx,Used to spend my nights out in a bar room\nLiq...,"['used', 'spend', 'night', 'bar', 'room', 'liq...",['tennessee'],"['tennessee', 'whiskey']",['traveller'],[-0.12505525 0.3721859 -0.4946104 -0.255244...,,Relationships,0.988425
573,68,xx,"Win Some, Lose Some",Big Sean,Dark Sky Paradise,spotify:track:0NBWNdTlcz0GAYY1lXRk1Y,"You win some and lose some, I heard that my wh...","['win', 'lose', 'heard', 'whole', 'life', 'hea...",['xx'],"['win', 'lose']","['dark', 'sky', 'paradise']",[ 0.00896502 0.07703428 0.02889826 -0.211491...,,Life + Struggles,0.670105
710,72,spanish jams,Atrévete-Te-Te,Calle 13,Calle 13 (Explicit Version),spotify:track:1q8NdCAQ9QUjpYiqzdd3mv,"¡Ah!\n¡Cumbia!\n\nAtrévete-te-te, salte del cl...","['cumbia', 'atrévete', 'te', 'te', 'salte', 'd...","['spanish', 'jam']","['atrévete', 'te', 'te']","['calle', '13', 'explicit', 'version']",[-0.104058 0.5621034 -0.46676672 0.563982...,[-0.63882 0.22677 0.04753 1.0836 -0.29...,Nostalgic Reflection on Life,0.996594
847,95,roadtrippin,Toes,Zac Brown Band,The Foundation,spotify:track:5kjyiH6but1t2UDXq15aeS,"I got my toes in the water, ass in the sand\nN...","['got', 'toe', 'water', 'sand', 'worry', 'worl...",['roadtrippin'],['toe'],['foundation'],[ 6.2010817e-02 2.6247647e-01 -7.2774865e-02 ...,,Self-Empowerment + Confident,0.423685
979,97,2012,Poison & Wine,The Civil Wars,Barton Hollow,spotify:track:0Z9TbLIhMAXa8BSOiY6um6,You only know what I want you to\n\nI know eve...,"['know', 'want', 'know', 'everything', 'want',...",['2012'],"['poison', 'wine']","['barton', 'hollow']",[ 0.16211595 0.333444 -0.28920245 -0.396273...,,Self-Empowerment + Confident,0.957199
1121,103,latin,Atrévete-Te-Te,Calle 13,Calle 13 (Explicit Version),spotify:track:1q8NdCAQ9QUjpYiqzdd3mv,"¡Ah!\n¡Cumbia!\n\nAtrévete-te-te, salte del cl...","['cumbia', 'atrévete', 'te', 'te', 'salte', 'd...",['latin'],"['atrévete', 'te', 'te']","['calle', '13', 'explicit', 'version']",[-0.104058 0.5621034 -0.46676672 0.563982...,,Nostalgic Reflection on Life,0.996594
1252,115,beach,Semi-Charmed Life,Third Eye Blind,Third Eye Blind,spotify:track:42et6fnHCw1HIPSrdPprMl,"I'm packed and I'm holdin'\nI'm smilin', she's...","['packed', 'holdin', 'smilin', 'livin', 'golde...",['beach'],"['semi', 'charmed', 'life']","['third', 'eye', 'blind']",[ 0.12684102 0.0335091 0.08912975 -0.323625...,[ 0.19833 0.76981 -0.88967 0.97886 -1.17...,Self-Empowerment + Confident,0.654565


In [178]:
# Choose the playlist IDs that are informative
keep_ids = [42, 115, 123, 124, 134]
# # Select all rows corresponding to these playlist IDs

# Get all the tracks in the first 8 unique playlist IDs
dash_df = df.loc[df['Playlist ID'].isin(keep_ids)]


print("Number of rows:", len(dash_df))

Number of rows: 713


In [179]:
# Get unique track URIs and names from this subset of data
dash_tracks = dash_df[['Track URI', 'Track Name']].drop_duplicates()

#### Run Predictions for Subset of Data

In [180]:
# DATA PREPARATION --> from content_recommender_system.ipynb

# Prepare data ------------
dash_df['Lyrics Embedding'] = dash_df['Lyrics Embedding'].apply(lambda x: np.fromstring(x[1:-1], sep=' ') if isinstance(x, str) else np.nan)
    
keep_cols = ['Playlist Name', 'Track Name', 'Artist Name', 'Album Name', 'Track URI', 'Lyrics Embedding']

unique_tracks_df = dash_df.drop_duplicates(subset='Track URI')[keep_cols].reset_index(drop=True)

# Add 'index' column, just in case original indices are lost if the df is grouped, etc.
unique_tracks_df['Index'] = range(len(unique_tracks_df))


# Create cosine similarity matrix ------------

# Get nparray of all lyric_embeddings
# Drop rows with missing values in the 'lyrics_embedding' col; there should only be one such col
unique_tracks_df = unique_tracks_df.dropna(subset=['Lyrics Embedding'])
unique_tracks_df['Index'] = range(len(unique_tracks_df))
dash_df = dash_df.dropna(subset=['Lyrics Embedding'])

# Convert the 'lyrics_embedding' col to a NumPy array
vectors = unique_tracks_df['Lyrics Embedding'].to_numpy()
vectors_2d = np.stack(vectors)

sim_matrix = cosine_similarity(vectors_2d)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dash_df['Lyrics Embedding'] = dash_df['Lyrics Embedding'].apply(lambda x: np.fromstring(x[1:-1], sep=' ') if isinstance(x, str) else np.nan)


In [181]:
# HELPER FUNCTIONS --> from content_recommender_system.ipynb

# HELPER Functions: index_to_uri and uri_to_index
# Use the unique_tracks_df to convert between the two (will be useful for when finding cosine similarity via index, but need to map those results to a specific track)

def index_to_uri(index):
    uri = unique_tracks_df.loc[unique_tracks_df['Index'] == index, 'Track URI'].values[0]
    return uri


def uri_to_index(uri):
    index = unique_tracks_df.loc[unique_tracks_df['Track URI'] == uri, 'Index'].values[0]
    return index


# HELPER Function: get_uris_in_playlist
# Given a playlist id, get the set of all unique tracks in that playlist

def get_uris_in_playlist(playlist_id):
    playlist = df[df['Playlist ID'] == playlist_id]
    return set(playlist['Track URI'])


# HELPER Function: get_topk_similar(i, k)
# Given a track_uri 'i', checks cosine similarity matrix to return (track_uri, similarity) of top k most similar lyrics

def get_topk_index_sim(uri, k):

    # Get index to search similarity matrix with
    index = uri_to_index(uri)

    # Extract similarities for that index, then sort
    sims = sim_matrix[index]
    top_k_indices = np.argsort(sims)[::-1][:k+1]  # k+1 since we don't want the get the similarity '1' between this track and itself
    top_k_sims = sims[top_k_indices]

    return [[index, sim] for index, sim in zip(top_k_indices[1:], top_k_sims[1:])]  # skipping the first elem since it will be same as the current track!


# HELPER Function: estimate_rating
# Use the formula described above to estimate ratings of tracks the playlist has not seen before

def estimate_rating(uri, topk, uris_in_plist):
    num = 0
    denom = 0
    for index, sim in topk:
        denom += sim

        uri = index_to_uri(index)
        if uri in uris_in_plist:  # if similar track is in playlist, multiply it by the 'rating' (we will set as 10 to weight it much higher than other similar tracks not in the playlist)
            num += (sim * 5)
        else:
            num += sim
    return num / denom


# HELPER Function: provide_recs
# Given a dataframe with estimated recs for new tracks, will cleanly provide recommendations

def get_recs(ratings_df, k):
    # Sort ratings_df and retain only top k
    ratings_df = ratings_df.sort_values(by='Estimated Rating', ascending=False)
    top_k_ratings_df = ratings_df.head(k)

    # Iterate through top k tracks and return df with relevant info
    rows = []
    for i, row in top_k_ratings_df.iterrows():
        uri = row['Track URI']
        rating = row['Estimated Rating']

        # Find row in original df that matches with track_uri
        match = unique_tracks_df.loc[unique_tracks_df['Track URI'] == uri]

        # Extract fields
        tr_name = match['Track Name'].values[0]
        tr_uri = match['Track URI'].values[0]
        art_name = match['Artist Name'].values[0]
        alb_name = match['Album Name'].values[0]
        tr_uri = match['Track URI'].values[0]

        # Create new row for official recommendation
        new_row = {'Track Name': tr_name, 'Artist Name': art_name, 'Album Name': alb_name, 'Track URI': tr_uri, 'Recommendation Score': rating}

        rows.append(new_row)

    return pd.DataFrame(rows)

In [182]:
# RECOMMENDER SYSTEM --> from content_recommender_system.ipynb

def content_rec_topk_tracks(playlist_id, topk=10):

    # Get set of all unique tracks in playlist_id
    uris_in_plist = get_uris_in_playlist(playlist_id)

    # Create df with estimated ratings for all unique tracks not in the playlist
    rows = []

    # Iterate through all unique track_uris and calculate rating
    unique_track_uris = dash_df['Track URI'].unique().tolist()
    for i in unique_track_uris:
        if i not in uris_in_plist:  # potential rec
            similar = get_topk_index_sim(i, topk)
            est_rating = estimate_rating(i, similar, uris_in_plist)

            # Add to estimated ratings df
            row = {'Track URI': i, 'Estimated Rating': est_rating}
            rows.append(row)

    # Create dataframe with recommendation info
    ratings_df = pd.DataFrame(rows)

    # Return df with recommendations
    return get_recs(ratings_df, topk)


In [194]:
# Get recs for all 5 playlists

all_playlist_recs = pd.DataFrame(columns=['Playlist Name', 'Recommendations'])

for pid in keep_ids:
    pname = dash_df.loc[dash_df['Playlist ID'] == pid, 'Playlist Name'].iloc[0]
    recs = content_rec_topk_tracks(pid, topk=10)
    # Append the playlist ID and recommendations to the DataFrame
    all_playlist_recs = all_playlist_recs.append({'Playlist Name': pname, 'Recommendations': recs}, ignore_index=True)

In [195]:
all_playlist_recs

Unnamed: 0,Playlist Name,Recommendations
0,Road Trippin',Track Name Artist...
1,beach,Trac...
2,w o r k o u t,Track Name ...
3,party music,Track Name Arti...
4,Rap/Pop,Track Name Artist Name \ 0 ...


In [196]:
# For each playlist id, save a list of track URIs that are the top recommendations

all_playlist_recs_dict = {}
for index, row in all_playlist_recs.iterrows():
    pname = row['Playlist Name']
    
    # Get the recommendations DataFrame for the current playlist ID
    recs_df = row['Recommendations']
    
    # Extract track URIs from the recommendations DataFrame and store them in a list
    track_uris = recs_df['Track URI'].tolist()
    
    # Store the list of track URIs for the current playlist ID in the dictionary
    all_playlist_recs_dict[pname] = track_uris


In [197]:
# Sanity check the data 
all_playlist_recs_dict.keys()

dict_keys(["Road Trippin'", 'beach', 'w o r k o u t', 'party music', 'Rap/Pop'])

In [198]:
all_playlist_recs_dict["Road Trippin'"]

['spotify:track:6yIMatK8OCXC3GuIlwHxuM',
 'spotify:track:3U3Ff8ZvL6oEu1r98gCCWW',
 'spotify:track:1SLikaDhWhhhnLJC58bkFI',
 'spotify:track:3l9tlu58MWDpHPAQEOUQAn',
 'spotify:track:6lanRgr6wXibZr8KgzXxBl',
 'spotify:track:2KQGs6oyLQwfqSZ5Zs0eje',
 'spotify:track:4X6qvY7lDpNxq9QokS5F0u',
 'spotify:track:5rX6C5QVvvZB7XckETNych',
 'spotify:track:5Yi6mwvMHnnh241uGjcJYy',
 'spotify:track:6MjljecHzHelUDismyKkba']

#### Pickle the final data needed for dashboard

In [199]:
# Dictionary of top 5 track URIs for each topic
topic_track_uris
with open('topic_track_uris.pkl', 'wb') as f:
    pickle.dump(topic_track_uris, f)

In [189]:
topic_track_uris

{'Life + Personal Struggles': ['spotify:track:343YBumqHu19cGoGARUTsd',
  'spotify:track:7xSIYgB6KVCQNmKVVFwSkM',
  'spotify:track:1jvUrVUMm1U2XfXX534fRP',
  'spotify:track:59D1ABymMZNQLxBHYsfaq0',
  'spotify:track:6TlRNJaezOdzdECnQeRuMM'],
 'Relationships': ['spotify:track:1ko2lVN0vKGUl9zrU0qSlT',
  'spotify:track:705u4LfXcTBuDOB8UCy0A7',
  'spotify:track:3sxokptpTvk4rGdU9XeRUV',
  'spotify:track:0725YWm6Z0TpZ6wrNk64Eb',
  'spotify:track:6GyDY0yE47rfk8pcuKhioh'],
 'Self-Empowerment': ['spotify:track:4YZbVct8l9MnAVIROnLQdx',
  'spotify:track:3kkCsfOj7sjRWzUPS1Iod3',
  'spotify:track:74fpNVAJrKGQuqHETi4WSt',
  'spotify:track:0mb7btREdC3wuIUmuVRgWn',
  'spotify:track:4L7jMAP8UcIe309yQmkdcO'],
 'Nostalgic Reflection on Life': ['spotify:track:7eqoqGkKwgOaWNNHx90uEZ',
  'spotify:track:3fAMVfl5A50F0CR0ry8YSe',
  'spotify:track:7129iqBafaphfc3WPCGC0L',
  'spotify:track:7JXZq0JgG2zTrSOAgY8VMC',
  'spotify:track:7qxjGHW485TL8ciwkHD5MK']}

In [200]:
# Dictionary of unique songs in subset of data
all_tracks_dict = {}

for index, row in unique_tracks_df.iterrows():
    key = f"{row['Track Name']} by {row['Artist Name']}"
    value = row['Track URI']
    all_tracks_dict[key] = value
sorted_dict = dict(sorted(all_tracks_dict.items(), key=lambda x: x[0]))

with open('all_tracks_dict.pkl', 'wb') as f:
    pickle.dump(sorted_dict, f)

In [191]:
# Dictionary of track to playlist it's in
track_to_playlist_dict = {}

for index, row in unique_tracks_df.iterrows():
    key = f"{row['Track Name']} by {row['Artist Name']}"
    value = row["Playlist Name"]
    track_to_playlist_dict[key] = value

with open('track_to_playlist_dict.pkl', 'wb') as f:
    pickle.dump(track_to_playlist_dict, f)

In [192]:
# Dataframe of 5 playlist names and their songs
playlists_songs_df = dash_df[['Playlist Name', 'Track Name', 'Artist Name', 'Album Name']]

playlists_songs_df.to_pickle('playlists_songs_df.pkl')

In [202]:
# Dictionary of playlist IDs and their top 10 recs
all_playlist_recs_dict
with open('all_playlist_recs.pkl', 'wb') as f:
    pickle.dump(all_playlist_recs_dict, f)

In [203]:
all_playlist_recs_dict


{"Road Trippin'": ['spotify:track:6yIMatK8OCXC3GuIlwHxuM',
  'spotify:track:3U3Ff8ZvL6oEu1r98gCCWW',
  'spotify:track:1SLikaDhWhhhnLJC58bkFI',
  'spotify:track:3l9tlu58MWDpHPAQEOUQAn',
  'spotify:track:6lanRgr6wXibZr8KgzXxBl',
  'spotify:track:2KQGs6oyLQwfqSZ5Zs0eje',
  'spotify:track:4X6qvY7lDpNxq9QokS5F0u',
  'spotify:track:5rX6C5QVvvZB7XckETNych',
  'spotify:track:5Yi6mwvMHnnh241uGjcJYy',
  'spotify:track:6MjljecHzHelUDismyKkba'],
 'beach': ['spotify:track:2ENexcMEMsYk0rVJigVD3i',
  'spotify:track:3VCWhe7AtBrkhour9t6dq6',
  'spotify:track:5LOr27rz2IKQ48HIfKullw',
  'spotify:track:5CtI0qwDJkDQGwXD1H1cLb',
  'spotify:track:6uCmU6ldcsVpLAKNCojVg8',
  'spotify:track:2p07VcUwRZ5sru3mJ0JogS',
  'spotify:track:16pwlVsypm4aDKMhXdOuXg',
  'spotify:track:1dlvJVzqW8BKVQauN8d3o5',
  'spotify:track:22L7bfCiAkJo5xGSQgmiIO',
  'spotify:track:6M6lsQB4OhqL41eld29PeC'],
 'w o r k o u t': ['spotify:track:0SGojwNP2zUrgDyeoBQxYO',
  'spotify:track:4vtF1P0X44pSXeJCwJkxUY',
  'spotify:track:6Ymvlzom4TQeoK