In [1]:
import numpy as np
import collections
import itertools

In [2]:
class Apriori:
    def __init__(self,
                 data,
                 min_support=0.01,
                 min_confidence=0.01,
                 min_lift=0.01):
        self.data = data
        self.min_support = min_confidence
        self.min_lift = min_lift

    def prepare_data(self):
        self.playlists = list(spotify_data.item().values())
        self.playlists = [set(playlist) for playlist in self.playlists]
        unique_songs = [item for sublist in self.playlists for item in sublist]
        self.songs_counter = collections.Counter(unique_songs)

    def get_songs_appearences(self):
        songs_in_playlists = collections.defaultdict(set)
        for index, playlist in enumerate(self.playlists):
            for song in playlist:
                songs_in_playlists[song].add(index)
        self.songs_in_playlists = songs_in_playlists

    def generate_L_1(self):
        self.L_1_counter = {
            song: times
            for song, times in self.songs_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        self.L_1 = [{song} for song in self.L_1_counter.keys()]

    def generate_new_candidates(self, current_itemsets, k):
        C_k = set()
        for candidate in current_itemsets:
            for aux_candidate in current_itemsets:
                new_candidate = frozenset(candidate).union(aux_candidate)
                if len(new_candidate) == k:
                    C_k.add(new_candidate)


#         C_k = {
#             frozenset(i).union(j)
#             for i in current_itemsets for j in current_itemsets
#             if len(i.union(j)) == k
#         }
        return C_k

    def calculate_subset_count(self, subset):
        playlists_inter = []
        for song in subset:
            playlists_inter.append(self.songs_in_playlists[song])
        return len(set.intersection(*playlists_inter))
    
    def prune_itemsets(self, C_k):
        C_k_counter = {}
        for candidate in C_k:
            C_k_counter[candidate] = self.calculate_subset_count(candidate)
        L_k_counter = {
            subset: times
            for subset, times in C_k_counter.items()
            if times / len(self.playlists) >= self.min_support
        }
        return L_k_counter

    def fit(self):
        self.prepare_data()
        self.get_songs_appearences()
        self.generate_L_1()

        self.frequent_itemsets = []
        k = 2
        current = self.L_1
        while len(current) != 0:
            C_k = self.generate_new_candidates(current, k)
            L_k_counter = self.prune_itemsets(C_k)
            L_k = L_k_counter.keys()
            self.frequent_itemsets.extend(L_k)
            print("K = {}: {}".format(
                k, sorted(
                    L_k_counter.items(), key=lambda x: x[1], reverse=True)))
            k += 1
            current = L_k
    
    def generate(self):
        for itemset in self.frequent_itemsets:
            for i in range(1, len(itemset)):
                subsets = itertools.combinations(itemset, i)
                playlists_inter = []
                for subset in subsets:
                    subset_count = self.calculate_subset_count(subset)
                    
                        
                        
                

In [3]:
spotify_data = np.load("spotify.npy")

In [4]:
apriori = Apriori(
    data=spotify_data, min_support=0.01, min_confidence=0.01, min_lift=0.01)

In [5]:
apriori.fit()

K = 2: [(frozenset({'Congratulations', 'HUMBLE.'}), 214), (frozenset({'XO TOUR Llif3', 'HUMBLE.'}), 204), (frozenset({'HUMBLE.', 'Mask Off'}), 204), (frozenset({'DNA.', 'HUMBLE.'}), 190), (frozenset({'XO TOUR Llif3', 'Congratulations'}), 179), (frozenset({'Broccoli (feat. Lil Yachty)', 'Caroline'}), 172), (frozenset({'Bounce Back', 'Bad and Boujee (feat. Lil Uzi Vert)'}), 169), (frozenset({'goosebumps', 'HUMBLE.'}), 167), (frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'HUMBLE.'}), 167), (frozenset({'XO TOUR Llif3', 'Mask Off'}), 163), (frozenset({'Congratulations', 'iSpy (feat. Lil Yachty)'}), 162), (frozenset({'Congratulations', 'Mask Off'}), 162), (frozenset({'Closer', 'Let Me Love You'}), 159), (frozenset({'No Problem (feat. Lil Wayne & 2 Chainz)', 'Broccoli (feat. Lil Yachty)'}), 158), (frozenset({'Bounce Back', 'HUMBLE.'}), 156), (frozenset({'Congratulations', 'goosebumps'}), 155), (frozenset({'iSpy (feat. Lil Yachty)', 'HUMBLE.'}), 155), (frozenset({'Broccoli (feat. Lil Yacht

In [6]:
apriori.frequent_itemsets

[frozenset({'Broccoli (feat. Lil Yachty)',
            'No Problem (feat. Lil Wayne & 2 Chainz)'}),
 frozenset({'Congratulations', 'goosebumps'}),
 frozenset({'679 (feat. Remy Boyz)', 'Trap Queen'}),
 frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Caroline'}),
 frozenset({'T-Shirt', 'Tunnel Vision'}),
 frozenset({'goosebumps', 'iSpy (feat. Lil Yachty)'}),
 frozenset({'HUMBLE.', 'goosebumps'}),
 frozenset({'Swang', 'XO TOUR Llif3'}),
 frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Bounce Back'}),
 frozenset({'Caroline', 'Mask Off'}),
 frozenset({'Congratulations', 'rockstar'}),
 frozenset({'HUMBLE.', 'Unforgettable'}),
 frozenset({'Black Beatles', 'Fake Love'}),
 frozenset({'Mask Off', 'Slippery (feat. Gucci Mane)'}),
 frozenset({'Bad and Boujee (feat. Lil Uzi Vert)', 'Swang'}),
 frozenset({'Broccoli (feat. Lil Yachty)', 'Closer'}),
 frozenset({'Broccoli (feat. Lil Yachty)', 'iSpy (feat. Lil Yachty)'}),
 frozenset({'Bank Account', 'Magnolia'}),
 frozenset({'Congratulations', 'iS