In [1]:
import numpy as np
import collections

In [2]:
class Apriori:
    def __init__(self,
                 data,
                 min_support=0.01,
                 min_confidence=0.01,
                 min_lift=0.01):
        self.data = data
        self.min_support = min_confidence
        self.min_lift = min_lift

    def prepare_data(self):
        self.playlists = list(spotify_data.item().values())
        self.playlists = [set(playlist) for playlist in self.playlists]
        unique_songs = [item for sublist in self.playlists for item in sublist]
        self.songs_counter = collections.Counter(unique_songs)

    def calculate_song_frequency(self):
        songs_in_playlists = collections.defaultdict(set)
        for index, playlist in enumerate(self.playlists):
            for song in playlist:
                songs_in_playlists[song].add(index)
        self.songs_in_playlists = songs_in_playlists

    def generate_L_1(self):
        self.L_1_counter = {
            song: times
            for song, times in self.songs_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        self.L_1 = [{song} for song in self.L_1_counter.keys()]

    def generate_new_candidates(self, current_itemsets, k):
        C_k = set()
        for candidate in current_itemsets:
            for song in self.L_1:
                new_candidate = frozenset(candidate).union(song)
                if len(new_candidate) == k:
                    C_k.add(new_candidate)
        return C_k

    def prune_itemsets(self, C_k):
        C_k_counter = {}
        for candidate in C_k:
            playlists_inter = []
            for song in candidate:
                playlists_inter.append(self.songs_in_playlists[song])
            C_k_counter[candidate] = len(set.intersection(*playlists_inter))

        L_k_counter = {
            subset: times
            for subset, times in C_k_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        return L_k_counter

    def fit(self):
        self.prepare_data()
        self.calculate_song_frequency()
        self.generate_L_1()

        self.frequent_itemsets = []
        k = 2
        current = self.L_1
        while len(current) != 0:
            C_k = self.generate_new_candidates(current, k)
            L_k_counter = self.prune_itemsets(C_k)
            L_k = L_k_counter.keys()
            self.frequent_itemsets.append(L_k)
            print("K = {}: {}".format(k, L_k_counter))
            k += 1
            current = L_k

In [3]:
spotify_data = np.load("spotify.npy")

In [4]:
apriori = Apriori(
    data=spotify_data, min_support=0.01, min_confidence=0.01, min_lift=0.01)

In [5]:
apriori.fit()

K = 2: {frozenset({'Swang', 'HUMBLE.'}): 113, frozenset({'Slippery (feat. Gucci Mane)', 'XO TOUR Llif3'}): 132, frozenset({'Broccoli (feat. Lil Yachty)', 'Fake Love'}): 110, frozenset({'Closer', 'Cold Water (feat. Justin Bieber & MØ)'}): 145, frozenset({'Mask Off', 'Swang'}): 112, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Broccoli (feat. Lil Yachty)'}): 155, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Mask Off'}): 151, frozenset({'T-Shirt', 'Bounce Back'}): 102, frozenset({'HUMBLE.', '1-800-273-8255'}): 107, frozenset({'679 (feat. Remy Boyz)', 'Trap Queen'}): 122, frozenset({'HUMBLE.', 'Caroline'}): 130, frozenset({'Slippery (feat. Gucci Mane)', 'Mask Off'}): 126, frozenset({'You Was Right', 'Money Longer'}): 117, frozenset({'Closer', 'Starving'}): 112, frozenset({'Butterfly Effect', 'Bank Account'}): 111, frozenset({'Mask Off', 'Fake Love'}): 100, frozenset({'Closer', 'One Dance'}): 121, frozenset({'goosebumps', 'pick up the phone'}): 103, frozenset({'Broccoli (feat. L

K = 3: {frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Mask Off', 'HUMBLE.'}): 110, frozenset({'goosebumps', 'Mask Off', 'HUMBLE.'}): 110, frozenset({'goosebumps', 'HUMBLE.', 'XO TOUR Llif3'}): 111, frozenset({'goosebumps', 'XO TOUR Llif3', 'Congratulations'}): 104, frozenset({'Mask Off', 'Bounce Back', 'HUMBLE.'}): 101, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Broccoli (feat. Lil Yachty)', 'Bounce Back'}): 100, frozenset({'Mask Off', 'HUMBLE.', 'iSpy (feat. Lil Yachty)'}): 101, frozenset({'DNA.', 'HUMBLE.', 'XO TOUR Llif3'}): 102, frozenset({'goosebumps', 'HUMBLE.', 'Congratulations'}): 112, frozenset({'DNA.', 'Mask Off', 'HUMBLE.'}): 100, frozenset({'HUMBLE.', 'XO TOUR Llif3', 'Congratulations'}): 128, frozenset({'Mask Off', 'XO TOUR Llif3', 'Congratulations'}): 107, frozenset({'HUMBLE.', 'Congratulations', 'iSpy (feat. Lil Yachty)'}): 109, frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'HUMBLE.', 'Congratulations'}): 101, frozenset({'goosebumps', 'Mask Off', 'XO TOU