In [1]:
import json
import pandas as pd
import numpy as np
import os
import seaborn as sns
import re
from collections import *
from tqdm import tqdm
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.sparse import csr_matrix, vstack, lil_matrix
import time
import random
import webbrowser
import pickle, gzip, joblib, shelve
import tkinter as tk
from tkinter import ttk, font
import threading, time
from itertools import islice, combinations


In [2]:
directory_path = 'data/raw'
filenames = sorted(os.listdir(directory_path))
print(f"{len(filenames) * 1000} playlists")

1000000 playlists


In [3]:
# looking at only the first 30,000 playlists
fullpaths = [directory_path + '/' + f for f in filenames][0:5]

In [4]:
min_tracks_per_playlist = 5
max_tracks_per_playlist = 150
min_albums_per_playlist = 2
min_artists_per_playlist = 2

In [10]:
song_relationships = {}

t = 0
iteration_times = []

for idx, path in enumerate(fullpaths):
    start_time = time.time()
    if idx % 10 == 0 and idx > 0:
        print(f"processed {idx-10}-{idx} - time taken {t:.2f}")
        iteration_times.append(t)
        t = 0
    with open(path) as f:
        mpd_slice = json.load(f)
    playlists_data = mpd_slice['playlists']

    for idx, playlist in enumerate(playlists_data):

        ## TO DO: remove redundnat info in song_name
        songs = set(track['track_uri'] for track in playlist['tracks'])
        t_per_p = len(songs)
        albums = set(track['album_uri'] for track in playlist['tracks'])
        alb_per_p = len(albums)
        artists = set(track['artist_uri'] for track in playlist['tracks'])
        art_per_p = len(artists)

        if (min_tracks_per_playlist >= t_per_p) or \
             (t_per_p >= max_tracks_per_playlist) or \
                (min_albums_per_playlist >= alb_per_p) or \
                    (min_artists_per_playlist >= art_per_p):
            continue
        
        temp_relationships = {}

        for song in songs:
            if song not in temp_relationships:
                temp_relationships[song] = {}
            
            for related_song in songs:
                if related_song != song:  # avoid self-relationship
                    if related_song in temp_relationships[song]:
                        temp_relationships[song][related_song] += 1
                    else:
                        temp_relationships[song][related_song] = 1

        for song, relations in temp_relationships.items():
            if song not in song_relationships:
                song_relationships[song] = dict(relations)
            else:
                for related_song, count in relations.items():
                    if related_song in song_relationships[song]:
                        song_relationships[song][related_song] += count
                    else:
                        song_relationships[song][related_song] = count

    end_time = time.time()
    t += end_time - start_time

# song_relationships = {key: list(value.items()) for key, value in tqdm(song_relationships.items())}
    
print(f'{len(song_relationships)} songs processed')

# save_path = os.path.join('song_data', 'song_relationships.gz')

# with gzip.open(save_path, 'wb') as f:
#     pickle.dump(song_relationships, f, protocol = pickle.HIGHEST_PROTOCOL)
# print(f"'song_relationships' saved to {save_path}")

83746 songs processed


In [7]:
idx

4

In [12]:
song_relationships2 = {}
iteration_times = []

for idx, path in enumerate(fullpaths):
    start_time = time.time()

    with open(path) as f:
        mpd_slice = json.load(f)

    playlists_data = mpd_slice['playlists']

    for playlist in playlists_data:
        for track in playlist['tracks']:
            song_uri = track['track_uri'].split(':')[-1]
            song_name = track['track_name']
            album_name = track['album_name']
            artist_name = track['artist_name']
            song_data_map[song_uri] = {'song_name': song_name, 'album_name': album_name, 'artist_name': artist_name}

        songs = [track['track_uri'].split(':')[-1] for track in playlist['tracks']]
        t_per_p = len(songs)
        albums = set(track['album_uri'].split(':')[-1] for track in playlist['tracks'])
        alb_per_p = len(albums)
        artists = set(track['artist_uri'].split(':')[-1] for track in playlist['tracks'])
        art_per_p = len(artists)

        if (min_tracks_per_playlist >= t_per_p) or \
             (t_per_p >= max_tracks_per_playlist) or \
                (min_albums_per_playlist >= alb_per_p) or \
                    (min_artists_per_playlist >= art_per_p):
            continue

        # Compute song-to-song relationships for the current playlist
        song_pairs = combinations(songs, 2)
        pair_counts = Counter(song_pairs)
        
        for (song1, song2), count in pair_counts.items():
            song_relationships2.setdefault(song1, {}).setdefault(song2, 1)
            song_relationships2[song1][song2] += count
            song_relationships2.setdefault(song2, {}).setdefault(song1, 1)
            song_relationships2[song2][song1] += count

    end_time = time.time()
    iteration_time = end_time - start_time
    iteration_times.append(iteration_time)

    if idx % 10 == 0 and idx > 0:
        t = np.sum(iteration_times[-10:])
        print(f"processed {idx}-{idx + 10} - time taken {t:.2f}")

print(f'{len(song_relationships2)} songs processed')


82799 songs processed


In [13]:
pair_counts

Counter({('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:4dZOhqCIzHt5rIlqrbGxtY'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:1Ub2UfhHdGUvzj5ic8Y2iW'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:3Kfjnc7XuaOELBqCIRy5ps'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:7fzS6dcLKvwdWcm3YUOonp'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:6pFJGaqEW7VVc3mQl0Wy6O'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:0Wza5vVl1eLx9x85WQXudD'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:2ugTo5HTikOTzYtmSUi2RY'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:4ckkOOSM4LmBGeEfj2vbgj'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spotify:track:1cgG82O5BLN5UGRYmjQ4CP'): 1,
         ('spotify:track:6Lp1guODRjYW2PuMbKi9KQ',
          'spo

In [8]:
0 % 10

0

In [25]:
len(song_relationships)

82799

In [None]:
# to load song_relationships from storage

save_path = os.path.join('song_data', 'song_relationships.gz')

with gzip.open(save_path, 'rb') as f:
    song_relationships = pickle.load(f)

In [None]:
song_data_map = {}
for idx, path in enumerate(fullpaths):
    if idx % 10 == 0 and idx > 0:
        print(f"Processed {idx-10}-{idx}")
    with open(path) as f:
        mpd_slice = json.load(f)
    playlists_data = mpd_slice['playlists']
    for playlist in playlists_data:
        for track in playlist['tracks']:
            song_uri = track['track_uri']
            song_name = track['track_name']
            album_name = track['album_name']
            artist_name = track['artist_name']
            if song_uri in song_relationships:
                song_data_map[song_uri] = {'song_name': song_name, 'album_name': album_name, 'artist_name': artist_name}

save_path = os.path.join('song_data', 'song_data_map.gz')

with gzip.open(save_path, 'wb') as f:
    pickle.dump(song_data_map, f)
print(f"'song_data_map' saved to {save_path}")

In [None]:
# to load song_data_map from storage

save_path = os.path.join('song_data', 'song_data_map.gz')

with gzip.open(save_path, 'rb') as f:
    song_data_map = pickle.load(f)

In [None]:
max_connections = 0
song_with_most_connections = None

for song, connections in song_relationships.items():
    num_connections = len(connections)
    if num_connections > max_connections:
        max_connections = num_connections
        song_with_most_connections = song

print("Song with the most connections:", song_data_map[song_with_most_connections])
print("Number of connections:", max_connections)


In [None]:
song_indices = {song_uri: idx for idx, song_uri in enumerate(song_relationships.keys())}
num_songs = len(song_relationships)

In [None]:
# def process_chunk(chunk):
#     row_indices = []
#     col_indices = []
#     data = []

#     for song_uri, relationships in chunk.items():
#         row_idx = song_indices[song_uri]
#         for related_song_uri, count in relationships:
#             col_idx = song_indices[related_song_uri]
            
#             # Only consider the upper triangle of the matrix
#             if row_idx <= col_idx:
#                 row_indices.append(row_idx)
#                 col_indices.append(col_idx)
#                 data.append(count)
                
#                 # If it's not on the diagonal, add the symmetric entry
#                 if row_idx != col_idx:
#                     row_indices.append(col_idx)
#                     col_indices.append(row_idx)
#                     data.append(count)

#     return csr_matrix((data, (row_indices, col_indices)), shape=(num_songs, num_songs), dtype=np.int32)

# def slice_dict(d, start, end):
#     return dict(islice(d.items(), start, end))

# chunk_size = 20000
# interim_matrices = []

# for start_index in tqdm(range(0, num_songs, chunk_size)):
#     end_index = min(start_index + chunk_size, num_songs)
#     current_chunk = slice_dict(song_relationships, start_index, end_index)

#     interim_matrix = process_chunk(current_chunk)
#     interim_path = os.path.join('song_data', f'interim_matrix_{start_index//chunk_size}.gz')
    
#     with gzip.open(interim_path, 'wb') as f:
#         pickle.dump(interim_matrix, f)
    
#     interim_matrices.append(interim_path)
#     print(f"Processed chunk {start_index//chunk_size + 1} and saved to {interim_path}")

# # Combining interim results to get the final matrix
# final_matrix = sum((pickle.load(gzip.open(path, 'rb')) for path in interim_matrices))

# # Saving final matrix
# final_path = os.path.join('song_data', 'final_matrix.gz')
# with gzip.open(final_path, 'wb') as f:
#     pickle.dump(final_matrix, f)

# print(f"Final matrix saved to {final_path}")

In [None]:
full_matrix = csr_matrix((num_songs, num_songs), dtype=np.int32)

def slice_dict(d, start, end):
    return dict(islice(d.items(), start, end))

def update_matrix(chunk):
    global full_matrix
    
    data = []
    row_indices = []                
    col_indices = []
                                      
    for song_uri, relationships in chunk.items():
        row_idx = song_indices[song_uri]
        for related_song_uri, count in relationships:
            col_idx = song_indices[related_song_uri]
            
            # Only consider the upper triangle of the matrix
            if row_idx <= col_idx:
                data.append(count)
                row_indices.append(row_idx)
                col_indices.append(col_idx)
                
                # If it's not on the diagonal, add the symmetric entry
                if row_idx != col_idx:
                    data.append(count)
                    row_indices.append(col_idx)
                    col_indices.append(row_idx)

    # Create a temporary csr_matrix
    temp_matrix = csr_matrix((data, (row_indices, col_indices)), shape=(num_songs, num_songs), dtype=np.int32)
    
    # Update the full_matrix in-place using the temp_matrix
    full_matrix += temp_matrix

chunk_size = 25000

for start_index in range(0, num_songs, chunk_size):
    end_index = min(start_index + chunk_size, num_songs)
    current_relationships = slice_dict(song_relationships, start_index, end_index)
    update_matrix(current_relationships)
    print(f"Processed chunk {start_index//chunk_size + 1}/{num_songs//chunk_size}")

# Now, the full_matrix is your final cooccurrence_matrix
cooccurrence_matrix = full_matrix

save_path = os.path.join('song_data', 'cooccurrence_matrix.gz')
with gzip.open(save_path, 'wb') as f:
    pickle.dump(cooccurrence_matrix, f)
print(f"'cooccurrence_matrix' saved to {save_path}")

In [None]:
# to load cooccurrence_matrix from storage

save_path = os.path.join('song_data', 'cooccurrence_matrix.gz')

with gzip.open(save_path, 'rb') as f:
    cooccurrence_matrix = pickle.load(f)

In [None]:
# n = 10

# popularity = np.sum(transition_matrix, axis=0)
# top_n_indices = np.argsort(popularity.A1)[-n:]

# init = np.random.rand(1, num_songs)
# init = init / np.sum(init)
# probs = [init]
# p = csr_matrix(init)

# damping = True
# if damping:
#     damping_factor = 0.85
#     random_jump_vector = csr_matrix(np.ones(num_songs)/num_songs)
#     for i in tqdm(range(30)):
#         p = damping_factor * np.dot(p, transition_matrix) + (1 - damping_factor) * random_jump_vector
#         probs.append(p)
# else:
#     for i in tqdm(range(30)):
#         p = np.dot(p, transition_matrix)
#         probs.append(p)


# plot_data = []
# for i in top_n_indices:
#     song_name = song_data_map[list(song_relationships)[i]]["song_name"]
#     for step_num, step in enumerate(probs):
#         plot_data.append({'Iteration': step_num, 'Probability': step[0, i], 'Song': song_name})

# df = pd.DataFrame(plot_data)
# fig = px.line(df, x='Iteration', y='Probability', color='Song', title='Convergence of Most Popular Songs')
# fig.show()

In [None]:
# A = transition_matrix.copy()
# def user_playlist_vector(playlist_songs, song_indices, num_songs):
#     user_vector = np.zeros(num_songs)
#     for song in playlist_songs:
#         user_vector[song_indices[song]] = 1/len(playlist_songs)
#     return csr_matrix(user_vector)

# def get_recommendation_vector(user_vector, transition_matrix, steps, damping = True, damping_factor = 0.0):
#     p = user_vector
#     if damping:
#         random_jump_vector = csr_matrix(np.ones(num_songs) / num_songs)
#         for _ in range(steps):
#             p = damping_factor * np.dot(p, transition_matrix) + (1 - damping_factor) * user_vector
#     else:
#         for _ in range(steps):
#             p = np.dot(p, transition_matrix)
#     return p.toarray()[0]

# def top_n_recommendations(recommendation_vector, song_data_map, n):
#     top_indices = np.argsort(recommendation_vector)[-n:]
#     top_songs = [song_data_map[list(song_relationships)[i]] for i in top_indices]
#     return top_songs

# def recommend_songs(user_playlist, song_indices, transition_matrix, song_data_map, steps, n=10):
#     user_vector = user_playlist_vector(user_playlist, song_indices, len(song_indices))
#     recommendation_vector = get_recommendation_vector(user_vector, transition_matrix, steps)
#     return top_n_recommendations(recommendation_vector, song_data_map, n)

# top_n_songs = [list(song_relationships)[i] for i in top_n_indices]
# playlist = random.sample(list(song_relationships), 3) #+ top_n_songs

# for i, j in zip(playlist, [song_data_map[x] for x in playlist]):
#     print(i, j)

# recommended_songs = recommend_songs(playlist, song_indices, A, song_data_map, 10, n=5)
# for song in recommended_songs:
#     print(f"Song: {song['song_name']}\nAlbum: {song['album_name']}\nArtist: {song['artist_name']}\n{'-'*40}")    

In [None]:
total_occurrences = np.sum(cooccurrence_matrix)
p_i = np.sum(cooccurrence_matrix, axis=1) / total_occurrences
p_i = np.asarray(p_i).flatten()
p_ij = cooccurrence_matrix / total_occurrences

pmi_data = []
row_indices = []
col_indices = []
k = 1

for i in tqdm(range(len(p_ij.indptr) - 1)):
    for data_idx in range(p_ij.indptr[i], p_ij.indptr[i + 1]):
        j = p_ij.indices[data_idx]

        # Only consider the upper triangle of the matrix
        if i <= j:
            if p_ij.data[data_idx] > 0:  # Avoid log(0)
                original_pmi = np.log2(p_ij.data[data_idx] / (p_i[i] * p_i[j]))
                pmi_score = original_pmi - (-(k - 1) * np.log2(p_ij.data[data_idx]))
                
                # store value for upper triangle
                pmi_data.append(pmi_score)
                row_indices.append(i)
                col_indices.append(j)

                # If not on the diagonal, store the mirrored value for the lower triangle
                if i != j:
                    pmi_data.append(pmi_score)
                    row_indices.append(j)
                    col_indices.append(i)

pmi_matrix = csr_matrix((pmi_data, (row_indices, col_indices)), shape=cooccurrence_matrix.shape, dtype=np.float32)

save_path = os.path.join('song_data', 'pmi_matrix.gz')
with gzip.open(save_path, 'wb') as f:
    pickle.dump(pmi_matrix, f)
print(f"'pmi_matrix' saved to {save_path}")

In [None]:
# total_occurrences = np.sum(cooccurrence_matrix)
# p_i = np.sum(cooccurrence_matrix, axis=1) / total_occurrences
# p_i = np.asarray(p_i).flatten()  # Convert matrix to 1D array
# p_ij = cooccurrence_matrix / total_occurrences

# pmi_data = np.array(p_ij.data)
# k = 1

# for i in tqdm(range(len(p_ij.indptr) - 1)):
#     for data_idx in range(p_ij.indptr[i], p_ij.indptr[i + 1]):
#         j = p_ij.indices[data_idx]
#         if pmi_data[data_idx] > 0:  # Avoid log(0)
#             original_pmi = np.log2(pmi_data[data_idx] / (p_i[i] * p_i[j]))
#             pmi_data[data_idx] = original_pmi - (-(k - 1) * np.log2(pmi_data[data_idx]))
#         # else:
#         #     pmi_data[data_idx] = 0  # Or any other default value for log(0)

# pmi_matrix = csr_matrix((pmi_data, p_ij.indices, p_ij.indptr), shape=cooccurrence_matrix.shape, dtype=np.float32)

# save_path = os.path.join('song_data', 'pmi_matrix.gz')
# with gzip.open(save_path, 'wb') as f:
#     pickle.dump(pmi_matrix, f)
# print(f"'pmi_matrix' saved to {save_path}")

In [None]:
# to load pmi_matrix from storage

save_path = os.path.join('song_data', 'pmi_matrix.gz')

with gzip.open(save_path, 'rb') as f:
    pmi_matrix = pickle.load(f)

In [None]:
def user_playlist_vector(playlist_songs, song_indices, num_songs):
    user_vector = np.zeros(num_songs)
    for song in playlist_songs:
        user_vector[song_indices[song]] = 1
    return csr_matrix(user_vector)

def compute_scores(user_vector, pmi_matrix):
    scores = user_vector.dot(pmi_matrix)
    return scores.toarray()[0]

def get_top_recommendations(scores, song_data_map, n=10):
    top_indices = np.argsort(scores)[-n:]
    top_songs = [song_data_map[list(song_relationships)[i]] for i in top_indices]
    return top_songs

def recommend_songs_pmi(user_playlist, song_indices, pmi_matrix, song_data_map, n=10):
    user_vector = user_playlist_vector(user_playlist, song_indices, num_songs)
    user_vector_csr = csr_matrix(user_vector)
    scores = compute_scores(user_vector_csr, pmi_matrix)
    return get_top_recommendations(scores, song_data_map, n)

In [None]:
class SongRecommendationApp(tk.Tk):
    def __init__(self, song_data_map):
        super().__init__()

        default_font = font.nametofont("TkDefaultFont")
        default_font.configure(family="Courier")

        self.song_data_map = song_data_map
        self.uri_map = {self.format_song_display(song_info): uri for uri, song_info in song_data_map.items()}
        self.playlist_data = []  # Store song data for sorting

        # Filter Frame
        self.filter_frame = ttk.Frame(self)
        self.filter_frame.pack(pady=10)

        # Label and Entry for Song
        self.song_label = ttk.Label(self.filter_frame, text="Song")
        self.song_label.grid(row=0, column=0, padx=5)
        self.song_entry = ttk.Entry(self.filter_frame)
        self.song_entry.grid(row=1, column=0, padx=5)

        # Label and Entry for Artist
        self.artist_label = ttk.Label(self.filter_frame, text="Artist")
        self.artist_label.grid(row=0, column=1, padx=5)
        self.artist_entry = ttk.Entry(self.filter_frame)
        self.artist_entry.grid(row=1, column=1, padx=5)

        # Label and Entry for Album
        self.album_label = ttk.Label(self.filter_frame, text="Album")
        self.album_label.grid(row=0, column=2, padx=5)
        self.album_entry = ttk.Entry(self.filter_frame)
        self.album_entry.grid(row=1, column=2, padx=5)

        # Debounce logic
        self.last_time = time.time()

        self.search_button = ttk.Button(self.filter_frame, text="Search", command=self.display_search_results)
        self.search_button.grid(row=2, columnspan=3, pady=10)

        width = 200
        # Songs Listbox
        self.songs_listbox = tk.Listbox(self, selectmode=tk.SINGLE, width=width, font=("Courier", 10))
        self.songs_listbox.pack(pady=10)

        # Drag & Drop functionality
        self.songs_listbox.bind('<<ListboxSelect>>', self.add_to_playlist)

        # Playlist Listbox
        self.playlist_listbox = tk.Listbox(self, bg="lightblue", selectmode=tk.SINGLE, width=width, font=("Courier", 10))
        self.playlist_listbox.pack(pady=10)

        # Number of recommendations
        self.n_label = ttk.Label(self, text="Number of Recommendations:")
        self.n_label.pack(pady=5)
        self.n_entry = ttk.Entry(self)
        self.n_entry.pack(pady=5)

        # Button to generate recommendations
        self.btn_recommend = ttk.Button(self, text="Generate Recommendations", command=self.generate_recommendations)
        self.btn_recommend.pack(pady=10)

        # Recommendations Listbox
        self.recommendations_listbox = tk.Listbox(self, bg="lightgreen", selectmode=tk.SINGLE, width=width, font=("Courier", 10))
        self.recommendations_listbox.pack(pady=10)

        self.recommendations_listbox.bind('<Double-Button-1>', self.open_in_spotify)

        self.btn_refresh = ttk.Button(self, text="Refresh", command=self.refresh)
        self.btn_refresh.pack(pady=10)
    
    def refresh(self):
        # Clear all fields
        self.song_entry.delete(0, tk.END)
        self.artist_entry.delete(0, tk.END)
        self.album_entry.delete(0, tk.END)
        self.n_entry.delete(0, tk.END)
        
        # Clear listboxes
        self.songs_listbox.delete(0, tk.END)
        self.playlist_listbox.delete(0, tk.END)
        self.recommendations_listbox.delete(0, tk.END)

    def open_in_spotify(self, event):
            selected_index = self.recommendations_listbox.curselection()
            if selected_index:
                selected_song = self.recommendations_listbox.get(selected_index)
                song_uri = self.uri_map[selected_song]
                uri = song_uri.split(':')[-1]
                webbrowser.open(f"https://open.spotify.com/track/{uri}")
                
    def display_search_results(self):
        song_query = self.song_entry.get().lower()
        artist_query = self.artist_entry.get().lower()
        album_query = self.album_entry.get().lower()

        self.songs_listbox.delete(0, tk.END)
        results = []  # Store the filtered results first

        for uri, song_info in self.song_data_map.items():
            if song_query in song_info['song_name'].lower() and artist_query in song_info['artist_name'].lower() and album_query in song_info['album_name'].lower():
                display_name = self.format_song_display(song_info)
                results.append(display_name)

        # Sort by album name
        results.sort(key=lambda x: self.song_data_map[self.uri_map[x]]['album_name'])

        # Display the sorted results
        for display_name in results:
            self.songs_listbox.insert(tk.END, display_name)

        if len(results) > 300:  # If you want to limit the displayed results
            self.songs_listbox.delete(301, tk.END)

    def format_song_display(self, song_info):
        formatted_str = "{:<65}{:<35}{:<35}"
        f_string = formatted_str.format(song_info['song_name'], song_info['artist_name'], song_info['album_name'])
        return f_string

    def add_to_playlist(self, event):
        selected_index = self.songs_listbox.curselection()
        if selected_index:  # This checks if there's any selection at all
            selected_song = self.songs_listbox.get(selected_index)
            if selected_song not in self.playlist_listbox.get(0, tk.END):  # Prevent duplicates
                self.playlist_listbox.insert(tk.END, selected_song)

    def generate_recommendations(self):
        playlist_display_names = list(self.playlist_listbox.get(0, tk.END))
        playlist_uris = [self.uri_map[display_name] for display_name in playlist_display_names]  # Extract URIs

        n = int(self.n_entry.get())
        recommended_songs = recommend_songs_pmi(playlist_uris, song_indices, pmi_matrix, song_data_map, n)

        self.recommendations_listbox.delete(0, tk.END)
        for song in recommended_songs:
            formatted_song = self.format_song_display(song)
            self.recommendations_listbox.insert(tk.END, formatted_song)

if __name__ == "__main__":
    app = SongRecommendationApp(song_data_map)
    app.mainloop()


In [None]:
playlist = random.sample(list(song_relationships), 1)

for i, j in zip(playlist, [song_data_map[x] for x in playlist]):
    print(f"Song: {j['song_name']}\nAlbum: {j['album_name']}\nArtist: {j['artist_name']}\n{'-'*40}")

print(f'\n{"-"*60}')
recommended_songs = recommend_songs_pmi(playlist, song_indices, pmi_matrix, song_data_map, n=8)
for song in recommended_songs:
    print(f"Song: {song['song_name']}\nAlbum: {song['album_name']}\nArtist: {song['artist_name']}\n{'-'*40}")    