# Model 2 Version 2 - Creating a Ranked List using a given playlist title and commonly shared Songs.

In [32]:
#Imports all necessary dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlite3
import json

def id_from_uri(uri: str):
    """Helper method to get the ID from a URI string like so:
    URI: 'spotify:artist:012345...'
    ID: '012345...'
    """
    return uri.split(':')[2]
def get_songs_title_from_playlist_slice(slice_json, playListName):
    """Get the list of songs (IDs) and the title of a playlist
    given the slice JSON its from and the playlist's index.
    """
    listOfSongIDs = list()
    for x in range(0,1000):   
        #Goes through each playhlist and determines if its relevant
        playlist_json = slice_json['playlists'][x]
        #Gets list of songs from Top Hits playlist WILL CHANGE THIS ONCE WE HAVE PLAYLIST IDS
        if (playlist_json['name'].lower() == playListName.lower()):
            listOfSongIDs.extend([id_from_uri(track['track_uri']) for track in playlist_json['tracks']])
    return listOfSongIDs
        
#Intializes file name data variables
slice_lower = 0
slice_upper = 999
listOfSongIDs = list() #List that contains the list of song ids associated with the playlists
while(slice_upper <= 999999):
    fileName = f"../../data/playlist/mpd.slice.{slice_lower}-{slice_upper}.json" #Creates the file name for the json file we are analyzing
    #Opens the json file and load the slice into a data variable
    with open(fileName) as testSlice:
        slice_json = json.load(testSlice)
    listOfSongIDs.extend(get_songs_title_from_playlist_slice(slice_json, playListName="Top Hits")) #Gets list of Song IDs from the playlist
    slice_lower += 1000 #Increments lower bound of slice to update file name
    slice_upper += 1000 #Increments upper bound of slice to update the file name
print(len(listOfSongIDs))


11231


Now that we have obtained the list of Song IDS from the relevant playlist. We now create a ranked list with the top K song ID's relevant to the playlist. We will be ranking the list in terms of the songs appearance in these playlists.

In [33]:
songDict = {} #Dictionary that contains the number of occurences for each song.
#Goes through the list of song ids and keeps track the number of occurences for each song.
for songID in listOfSongIDs:
    if songID in songDict.keys():
        songDict[songID] = songDict[songID] + 1 #increments the song ID occurence counter by 1 if the song id has been found
    else:
        songDict[songID] = 1 #Intializes the new key and value for the new song id.
#Gets the contents of the dictionary and formulates it into a list of tuples.
rankedsongsList = list(songDict.items())
#Sorts the song id list by the most occurences to the least amount of occurences.
rankedsongsList = sorted(rankedsongsList, key=lambda occurence: occurence[1], reverse=True)
print(rankedsongsList)

[('7BKLCZ1jbUBVqRi2FVlTVw', 42), ('69bp2EbF7Q2rqc5N3ylezZ', 39), ('6DNtNfH8hXkqOX1sjqmI7p', 38), ('0QsvXIfqM0zZoerQfsI9lm', 37), ('4pdPtRcBmOSQDlJ3Fk945m', 37), ('6O6M7pJLABmfBRoGZMu76Y', 33), ('3hB5DgAiMAQ4DzYbsMq1IT', 33), ('4Hf7WnR761jpxPr5D46Bcd', 33), ('494OU6M7NOf4ICYb4zWCf5', 31), ('3CRDbSIZ4r5MsZ0YwxuEkn', 31), ('1xznGGDReH1oQq0xzbwXa3', 31), ('4tCtwWceOPWzenK2HAIJSb', 31), ('3pzjHKrQSvXGHQ98dx18HI', 30), ('1WP1r7fuvRqZRnUaTi2I1Q', 30), ('6JV2JOEocMgcZxYSZelKcc', 30), ('4Z3qm2aWGTJuJKnv3EWwyD', 29), ('2aFiaMXmWsM3Vj72F9ksBl', 29), ('4gbVRS8gloEluzf0GzDOFc', 29), ('6Knv6wdA0luoMUuuoYi2i1', 29), ('5aAx2yezTd8zXrkmtKl66Z', 29), ('68EMU2RD1ECNeOeJ5qAXCV', 28), ('6875MeXyCW0wLyT72Eetmo', 28), ('6hmhG1b4LEyNuashVvuIAo', 27), ('3DXncPQOG4VBw3QHh3S817', 27), ('7129iqBafaphfc3WPCGC0L', 27), ('7vRriwrloYVaoAe3a9wJHe', 27), ('6b8Be6ljOzmkOmFslEb23P', 27), ('6i0V12jOa3mr6uu4WYhUBr', 26), ('6WhzFzROw3aq3rPWjgYlxr', 26), ('27SdWb2rFzO6GWiYDBTD9j', 25), ('2GyA33q5rti5IxkMQemRDH', 25), ('7yHED