In [1]:
import numpy as np
import collections

In [2]:
class Apriori:
    def __init__(self,
                 data,
                 min_support=0.01,
                 min_confidence=0.01,
                 min_lift=0.01):
        self.data = data
        self.min_support = min_confidence
        self.min_lift = min_lift

    def prepare_data(self):
        self.playlists = list(spotify_data.item().values())
        self.playlists = [set(playlist) for playlist in self.playlists]
        unique_songs = [item for sublist in self.playlists for item in sublist]
        self.songs_counter = collections.Counter(unique_songs)

    def calculate_song_frequency(self):
        songs_in_playlists = collections.defaultdict(set)
        for index, playlist in enumerate(self.playlists):
            for song in playlist:
                songs_in_playlists[song].add(index)
        self.songs_in_playlists = songs_in_playlists

    def generate_L_1(self):
        self.L_1_counter = {
            song: times
            for song, times in self.songs_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        self.L_1 = [{song} for song in self.L_1_counter]

    def generate_new_candidates(self, current_itemsets, k):
        C_k = set()
        for candidate in current_itemsets:
            for song in self.L_1:
                new_candidate = frozenset(candidate).union(song)
                if len(new_candidate) == k:
                    C_k.add(new_candidate)
        return C_k

    def prune_itemsets(self, C_k):
        C_k_counter = {}
        for candidate in C_k:
            playlists_inter = []
            for song in candidate:
                playlists_inter.append(self.songs_in_playlists[song])
            C_k_counter[candidate] = len(set.intersection(*playlists_inter))

        L_k_counter = {
            subset: times
            for subset, times in C_k_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        return L_k_counter

    def fit(self):
        self.prepare_data()
        self.calculate_song_frequency()
        self.generate_L_1()

        self.frequent_itemsets = []
        k = 2
        current = self.L_1
        while len(current) != 0:
            C_k = self.generate_new_candidates(current, k)
            L_k_counter = self.prune_itemsets(C_k)
            L_k = L_k_counter.keys()
            self.frequent_itemsets.append(L_k)
            print("K = {}: {}".format(k, L_k_counter))
            k += 1
            current = L_k

In [3]:
spotify_data = np.load("spotify.npy")

In [4]:
apriori = Apriori(
    data=spotify_data, min_support=0.01, min_confidence=0.01, min_lift=0.01)

In [5]:
apriori.fit()

K = 2: {frozenset({'iSpy (feat. Lil Yachty)', 'Congratulations'}): 162, frozenset({'XO TOUR Llif3', 'Drowning (feat. Kodak Black)'}): 124, frozenset({'Mask Off', 'HUMBLE.'}): 204, frozenset({'Mask Off', 'Broccoli (feat. Lil Yachty)'}): 104, frozenset({'XO TOUR Llif3', 'DNA.'}): 118, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Swang'}): 103, frozenset({'Caroline', 'Bad and Boujee (feat. Lil Uzi Vert)'}): 153, frozenset({'XO TOUR Llif3', 'Unforgettable'}): 103, frozenset({'XO TOUR Llif3', 'Mask Off'}): 163, frozenset({'X (feat. Future)', 'Bad and Boujee (feat. Lil Uzi Vert)'}): 109, frozenset({'Closer', 'Shape of You'}): 111, frozenset({'goosebumps', 'Congratulations'}): 155, frozenset({'Starving', 'Closer'}): 112, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Broccoli (feat. Lil Yachty)'}): 155, frozenset({'Panda', 'One Dance'}): 128, frozenset({'Closer', 'One Dance'}): 121, frozenset({'Bounce Back', 'Black Beatles'}): 114, frozenset({'goosebumps', 'No Problem (feat. Lil Way

K = 3: {frozenset({'Mask Off', 'DNA.', 'HUMBLE.'}): 100, frozenset({'Bounce Back', 'Bad and Boujee (feat. Lil Uzi Vert)', 'Broccoli (feat. Lil Yachty)'}): 100, frozenset({'XO TOUR Llif3', 'goosebumps', 'Mask Off'}): 101, frozenset({'Mask Off', 'Congratulations', 'HUMBLE.'}): 121, frozenset({'goosebumps', 'Mask Off', 'HUMBLE.'}): 110, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Mask Off', 'HUMBLE.'}): 110, frozenset({'XO TOUR Llif3', 'goosebumps', 'HUMBLE.'}): 111, frozenset({'XO TOUR Llif3', 'DNA.', 'HUMBLE.'}): 102, frozenset({'XO TOUR Llif3', 'Tunnel Vision', 'HUMBLE.'}): 102, frozenset({'goosebumps', 'Congratulations', 'HUMBLE.'}): 112, frozenset({'iSpy (feat. Lil Yachty)', 'Congratulations', 'HUMBLE.'}): 109, frozenset({'XO TOUR Llif3', 'Slippery (feat. Gucci Mane)', 'HUMBLE.'}): 101, frozenset({'XO TOUR Llif3', 'Congratulations', 'HUMBLE.'}): 128, frozenset({'XO TOUR Llif3', 'Mask Off', 'HUMBLE.'}): 131, frozenset({'XO TOUR Llif3', 'goosebumps', 'Congratulations'}): 104, fr