In [10]:
import pandas as pd
# import thinkplot
import numpy as np
import re
# from tqdm import tqdm
# from reporter import Reporter
import random
import time
import pickle

Import the dictionary of ASIN IDs as keys to product titles as values:

In [11]:
file_name = "ID_map-11-18"
picklefile = open(file_name,'rb')
ID_map = pickle.load(picklefile)

# Function for getting the name based on an ASIN ID:
def get_product_name(ASIN):
    if ASIN in ID_map:
        return ID_map[ASIN]
    else:
        print("There was an error with %s", ASIN)

Import adjacency matrix saved as copurchases pickle file

In [12]:
# open the file for writing
file_name = "copurchases-11-20"
picklefile = open(file_name,'rb')
copurchases = pickle.load(picklefile)

Class for Reporter object that stores cliques, and can print them

In [13]:
class Reporter(object):
    def __init__(self, name):
        self.name = name
        self.cnt = 0
        self.cliques = []

    def inc_count(self):
        self.cnt += 1

    def record(self, clique):
        self.cliques.append(clique)

    def sort_cliques(self):
        self.cliques.sort(key=len, reverse=True)

    def print_max(self, n):
        # print the n largest cliques
        self.sort_cliques()
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i in range(n):
            clique = self.cliques[i]
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()

    def print_report(self):
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i, clique in enumerate(self.cliques):
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()


Implement naive Bron–Kerbosch algorithm

In [14]:
def bronKerbosch1(clique, candidates, excluded, reporter):
    '''Naive Bron–Kerbosch algorithm'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    
    for v in list(candidates):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch1(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)

In [15]:
start = time.time()
report1 = Reporter('## %s' % bronKerbosch1.__doc__)
bronKerbosch1([], set(copurchases.keys()), set(), report1)
end = time.time()
print('Naive method:', end - start)

Naive method: 5.654755353927612


In [16]:
report1.print_max(10)

## Naive Bron–Kerbosch algorithm
1523792 recursive calls
0: ['The Rainy Day Adventure (Little Tiger and Friends)', 'Time for Bed, Little Tiger: Lift the Flap', 'Hide and Seek, Little Tiger! (Little Tiger Lift-the-Flap)', "Little Tiger's Big Surprise!", 'Wake Up, Little Tiger: Lift the Flap', 'Bathtime, Little Tiger! (Little Tiger Lift-the-Flap)']
1: ['Scary Stories Audio CD Collection', 'More Scary Stories To Tell In The Dark', 'Ghosts!: Ghostly Tales from Folklore (An I Can Read Book, Level 2)', 'Scary Stories to Tell in the Dark 25th Anniversary Edition : Collected from American Folklore (Scary Stories)', 'In a Dark, Dark Room and Other Scary Stories', 'Scary Stories 3 : More Tales to Chill Your Bones (Scary Stories)']
2: ['More Than Singing: Discovering Music in Preschool and Kindergarten', 'More Than Counting: Whole Math Activities for Preschool and Kindergarten', 'More Than Painting: Exploring the Wonders of Art in Preschool and Kindergarten', 'More Than Magnets: Exploring the Won

Implement Bron–Kerbosch algorithm with a pivot. Randomly pick a vertex u from `candidates` or `excluded`. The maximal clique must include either u or one of its non-neighbors.

In [42]:
def bronKerbosch2(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    u = pick_pivot(candidates) or pick_pivot(excluded)
    # only consider u or its non neighbors
    for v in list(candidates.difference(copurchases[u])):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def pick_pivot(nodes):
    if nodes:
        return random.sample(nodes, 1)[0]

In [43]:
start = time.time()
report2 = Reporter('## %s' % bronKerbosch2.__doc__)
bronKerbosch2([], set(copurchases.keys()), set(), report2)
end = time.time()
print('Pivot method:', end - start)

Pivot method: 4.596645832061768


In [44]:
report2.print_max(10)

## Bron–Kerbosch algorithm with pivot
1270896 recursive calls
0: ['A Basic Guide To Importing', 'Export/Import Procedures and Documentation (Export/Import Procedures & Documentation)', 'Importing Into the United States: : A Guide for Commercial Importers', 'Building an Import/Export Business, 3rd Edition', 'Import/Export: How to Get Started in International Trade', "Start Your Own Import/Export Business (Entrepreneur Magazine's Start Ups)"]
1: ["Dr. Jensen's Guide to Body Chemistry & Nutrition", "Dr. Jensen's Juicing Therapy : Nature's Way to Better Health and a Longer Life", "Dr. Jensen's Guide to Diet and Detoxification : Healthy Secrets from Around the World", "Dr. Jensen's Nutrition Handbook : A Daily Regimen for Healthy Living", 'Foods That Heal', "Dr. Jensen's Guide to Better Bowel Care: A Complete Program for Tissue Cleansing Through Bowel Management"]
2: ['Hollywood', 'Factotum', 'Notes of a Dirty Old Man', 'Post Office', 'Women', 'Ham on Rye']
3: ['Creative Companion: How to F

Implement Bron–Kerbosch algorithm with a pivot and degeneracy ordering. Degeneracy ordering is the ordering of vertices such that each vertex has d or fewer neighbors that come later in the ordering. Select vertex of minimum degree among remaining vertices.

In [35]:
def bronKerbosch3(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot and degeneracy ordering'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    for v in list(degeneracy_order(candidates)):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique.append(v), new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def degeneracy_order(nodes):
    deg = {}
    for node in graph:
        deg[node] = len(graph[node])
    
    while deg:
        # find min degree
        i, d = min(deg.items(), key=lambda pair:pair[1])
        yield i
        del deg[i]
        for v in graph[i]:
            if v in deg:
                deg[v] -= 1

In [36]:
report3 = Reporter('## %s' % bronKerbosch3.__doc__)
bronKerbosch3([], set(copurchases.keys()), set(), report3)

KeyboardInterrupt: 

In [None]:
report3.print_max(10)