In [1]:
import pandas as pd
# import thinkplot
import numpy as np
import re
# from tqdm import tqdm
# from reporter import Reporter
import random
import time
import pickle

Import the dictionary of ASIN IDs as keys to product titles as values:

In [2]:
file_name = "ID_map-11-18"
picklefile = open(file_name,'rb')
ID_map = pickle.load(picklefile)

# Function for getting the name based on an ASIN ID:
def get_product_name(ASIN):
    if ASIN in ID_map:
        return ID_map[ASIN]
    else:
        print("There was an error with %s", ASIN)

Import adjacency matrix saved as copurchases pickle file

In [4]:
# open the file for writing
file_name = "copurchases-11-20"
picklefile = open(file_name,'rb')
copurchases = pickle.load(picklefile)

Class for Reporter object that stores cliques, and can print them

In [19]:
class Reporter(object):
    def __init__(self, name):
        self.name = name
        self.cnt = 0
        self.cliques = []

    def inc_count(self):
        self.cnt += 1

    def record(self, clique):
        self.cliques.append(clique)

    def sort_cliques(self):
        self.cliques.sort(key=len, reverse=True)

    def print_max(self, n):
        # print the n largest cliques
        self.sort_cliques()
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i in range(n):
            clique = self.cliques[i]
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()

    def print_report(self):
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i, clique in enumerate(self.cliques):
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()
        
    def get_cliques(self):
        self.sort_cliques()
        return self.cliques


Implement naive Bron–Kerbosch algorithm

In [20]:
def bronKerbosch1(clique, candidates, excluded, reporter):
    '''Naive Bron–Kerbosch algorithm'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    
    for v in list(candidates):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch1(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)

In [21]:
start = time.time()
report1 = Reporter('## %s' % bronKerbosch1.__doc__)
bronKerbosch1([], set(copurchases.keys()), set(), report1)
end = time.time()
print('Naive method:', end - start)

Naive method: 3.9603872299194336


In [22]:
cliques = report1.get_cliques()

In [27]:
print(len([x for x in cliques if len(x) > 6]))

0


There are no maximal cliques that are bigger than 6

In [28]:
report1.print_max(10)

## Naive Bron–Kerbosch algorithm
1523133 recursive calls
0: ['Masks of Black Africa (African Art Art of Illustration)', "A Collector's Guide to African Sculpture (Schiffer Book for Collectors)", 'African Masks: The Barbier-Mueller Collection', 'The Tribal Arts of Africa', 'History of Art in Africa (Trade Version)', 'Africa: The Art of a Continent']
1: ['The Voice of Reason : Essays in Objectivist Thought (The Ayn Rand Library, Vol V)', 'The Romantic Manifesto', 'Virtue of Selfishness: A New Concept of Egoism', 'Philosophy: Who Needs It', 'Capitalism: The Unknown Ideal', 'For the New Intellectual']
2: ['Sweet Forgiveness', 'Takin My Time', 'Streetlights', 'Give It Up', 'Bonnie Raitt', 'Home Plate']
3: ['Introducing...Rubén González', 'Chanchullo', 'Buena Vista Social Club Presents Ibrahim Ferrer', 'A Toda Cuba Le Gusta', 'Buena Vista Social Club Presents Omara Portuondo', 'Buena Vista Social Club']
4: ['The Lion, the Witch and the Wardrobe', 'The Horse and His Boy', 'The Silver Chair', 

Implement Bron–Kerbosch algorithm with a pivot. Randomly pick a vertex u from `candidates` or `excluded`. The maximal clique must include either u or one of its non-neighbors.

In [9]:
def bronKerbosch2(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    u = pick_pivot(candidates) or pick_pivot(excluded)
    # only consider u or its non neighbors
    for v in list(candidates.difference(copurchases[u])):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def pick_pivot(nodes):
    if nodes:
        return random.sample(nodes, 1)[0]

In [10]:
start = time.time()
report2 = Reporter('## %s' % bronKerbosch2.__doc__)
bronKerbosch2([], set(copurchases.keys()), set(), report2)
end = time.time()
print('Pivot method:', end - start)

Pivot method: 6.0161213874816895


In [11]:
report2.print_max(10)

## Bron–Kerbosch algorithm with pivot
1268104 recursive calls
0: ['Masks of Black Africa (African Art Art of Illustration)', "A Collector's Guide to African Sculpture (Schiffer Book for Collectors)", 'The Tribal Arts of Africa', 'Africa: The Art of a Continent', 'African Masks: The Barbier-Mueller Collection', 'History of Art in Africa (Trade Version)']
1: ['Currents of Death', "Electromagnetic Fields: A Consumer's Guide to the Issues and How to Protect Ourselves", "Cell Phones: Invisible Hazards in the Wireless Age: An Insider's Alarming Discoveries about Cancer and Genetic Damage", 'Cell Towers: Wireless Convenience? or Environmental Hazard?', 'The Body Electric: Electromagnetism and the Foundation of Life', 'Cross Currents: The Promise of Electromedicine, the Perils of Electropollution']
2: ['The Lion, the Witch and the Wardrobe', 'The Last Battle', 'The Silver Chair', 'The Horse and His Boy', "The Magician's Nephew (rack) (Narnia)", 'Prince Caspian (rack) : The Return to Narnia (Na

Implement Bron–Kerbosch algorithm with a pivot and degeneracy ordering. Degeneracy ordering is the ordering of vertices such that each vertex has d or fewer neighbors that come later in the ordering. Select vertex of minimum degree among remaining vertices.

In [37]:
from collections import deque 
from collections import defaultdict

def bronKerbosch3(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot and degeneracy ordering'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    for v in list(degeneracy_order2(candidates)):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique.append(v), new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def degeneracy_order(nodes):
    deg = {}
    for node in nodes:
        deg[node] = len(copurchases[node])
    
    output = []
    while deg:
        # find min degree
        i, d = min(deg.items(), key=lambda pair:pair[1])
        output.append(i)
        del deg[i]
        for v in copurchases[i]:
            if v in deg:
                deg[v] -= 1
    return output

def degeneracy_order2(nodes):
    ordering = []
    ordering_set = set()
    degrees = defaultdict(lambda : 0)
    degen = defaultdict(list)
    max_deg = -1
    for v in nodes:
        deg = len(copurchases[v])
        degen[deg].append(v)
        degrees[v] = deg
        if deg > max_deg:
            max_deg = deg
    
    while True:
        i = 0
        while i <= max_deg:
            if len(degen[i]) != 0:
                break
            i += 1
        else:
            break
        v = degen[i].pop()
        ordering.append(v)
        ordering_set.add(v)
        for w in copurchases[v]:
            if w not in ordering_set and w in nodes:
                deg = degrees[w]
                if deg > 0:
                    degen[deg].remove(w)
                    degrees[w] -= 1
                    degen[deg - 1].append(w)
    ordering.reverse()
    return ordering

In [38]:
start = time.time()
res = degeneracy_order2(set(copurchases.keys()))
print(res[:10])
end = time.time()
print('Time to get degeneracy ordering:', end - start)

['1564773531', '1931514720', '1584230088', '094211034X', '0810962160', '1563088320', '1560324325', '0141303158', '0864425384', 'B000005Z0M']
Time to get degeneracy ordering: 3017.6201479434967


In [42]:
start = time.time()
report3 = Reporter('## %s' % bronKerbosch3.__doc__)
bronKerbosch3([], set(copurchases.keys()), set(), report3)
end = time.time()
print('Pivot method with degeneracy ordering:', end - start)

TypeError: object of type 'NoneType' has no len()

In [None]:
report3.print_max(10)