# Cargar datos

In [1]:
import numpy as np
import pandas as pd
import collections
import itertools

In [2]:
spotify_data = np.load("spotify.npy")
spotify_playlists = list(spotify_data.item().values())
spotify_playlists = [set(playlist) for playlist in spotify_playlists]
songs_flatten = [item for sublist in spotify_playlists for item in sublist]
total_songs_count = pd.Series(songs_flatten).value_counts().to_dict()

In [3]:
min_support = 0.04
playlists_length = len(spotify_playlists)
playlists_length

10000

In [4]:
songs_id_dict = {}
songs_song_name_dict = {}
for song_id,song_name in enumerate(total_songs_count):
    songs_id_dict[song_id] = song_name
    songs_song_name_dict[song_name] = song_id

In [5]:
filtered_songs = {key:value for key,value in total_songs_count.items() if value > min_support * playlists_length}
filtered_songs_id = {songs_song_name_dict[key]: value for key,value in filtered_songs.items()}
filtered_songs_id

{0: 723, 1: 465, 2: 454, 3: 424, 4: 414, 5: 411, 6: 403}

In [6]:
def count_occurences(songs_subset, full_transactions):
    filtered = list(filter(lambda playlist: set(songs_subset) < set(playlist), full_transactions))
    return len(filtered)

In [7]:
def prune(counter, threshold):
    itemsets_list = [tuple(itemset) for itemset in itemsets_list]
    confidence = np.ceil(len(itemsets_list) * threshold)
    pruned = [itemset for itemset in counter.keys() if counter[itemset] >= confidence]
    return pruned

In [8]:
spotify_playlists_id = []
for playlist in spotify_playlists:
    new_playlist = set()
    for song in playlist:
        new_playlist.add(songs_song_name_dict[song])
    spotify_playlists_id.append(new_playlist)

In [12]:
converge_flag = False
count_len = 2
while not converge_flag:
    transactions = list(itertools.combinations(filtered_songs_id.keys(), count_len))
    print(len(transactions))
    survivors = [transaction for transaction in transactions if count_occurences(transaction, spotify_playlists_id) >= min_support * playlists_length]
    break

21
[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (2, 3), (2, 4), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6), (4, 5), (4, 6), (5, 6)]
