In [1]:
import pandas as pd
# import thinkplot
import numpy as np
import re
# from tqdm import tqdm
# from reporter import Reporter
import random
import time
import pickle

Import the dictionary of ASIN IDs as keys to product titles as values:

In [2]:
file_name = "ID_map-11-18"
picklefile = open(file_name,'rb')
ID_map = pickle.load(picklefile)

# Function for getting the name based on an ASIN ID:
def get_product_name(ASIN):
    if ASIN in ID_map:
        return ID_map[ASIN]
    else:
        print("There was an error with %s", ASIN)

Import adjacency matrix saved as copurchases pickle file

In [3]:
# open the file for writing
file_name = "copurchases-11-18"
picklefile = open(file_name,'rb')
copurchases = pickle.load(picklefile)

Class for Reporter object that stores cliques, and can print them

In [4]:
class Reporter(object):
    def __init__(self, name):
        self.name = name
        self.cnt = 0
        self.cliques = []

    def inc_count(self):
        self.cnt += 1

    def record(self, clique):
        self.cliques.append(clique)

    def sort_cliques(self):
        self.cliques.sort(key=len, reverse=True)

    def print_max(self, n):
        # print the n largest cliques
        self.sort_cliques()
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i in range(n):
            clique = self.cliques[i]
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()

    def print_report(self):
        print(self.name)
        print('%d recursive calls' % self.cnt)
        for i, clique in enumerate(self.cliques):
            products = []
            for ASIN in clique:
                products.append(get_product_name(ASIN))
            print('%d: %s' % (i, products))
        print()


Implement naive Bron–Kerbosch algorithm

In [5]:
def bronKerbosch1(clique, candidates, excluded, reporter):
    '''Naive Bron–Kerbosch algorithm'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    
    for v in list(candidates):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch1(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)

In [6]:
start = time.time()
report1 = Reporter('## %s' % bronKerbosch1.__doc__)
bronKerbosch1([], set(copurchases.keys()), set(), report1)
end = time.time()
print('Naive method:', end - start)

Naive method: 3.9209046363830566


In [7]:
report1.print_max(10)

## Naive Bron–Kerbosch algorithm
1525748 recursive calls
0: ['Until You', 'Whitney, My Love', 'Once and Always', 'A Kingdom of Dreams', 'Something Wonderful', 'Almost Heaven']
1: ['The Return of Santiago', 'Tales of the Galactic Midway', 'Tales of the Velvet Comet', 'The Outpost', 'Birthright: The Book of Man', 'Santiago: A Myth of the Far Future']
2: ["Greater Works: Experiencing God's Power", 'Smith Wigglesworth on Healing', 'Smith Wigglesworth on the Holy Spirit', 'Wigglesworth on the Anointing', 'Ever Increasing Faith', 'Faith That Prevails']
3: ['The Unbreakable Code', 'Winds of Freedom: The Story of the Navajo Code Talkers of World War II', 'Warriors: Navajo Code Talkers', 'Unsung Heroes of World War II: The Story of the Navajo Code Talkers', 'Navajo Weapon: The Navajo Code Talkers (Native American Culture)', 'Navajo Code Talkers']
4: ['The Book of Words (Sefer Shel Devarim): Talking Spiritual Life, Living Spiritual Talk', 'Honey from the Rock: An Introduction to Jewish Mysticism

Implement Bron–Kerbosch algorithm with a pivot. Randomly pick a vertex u from `candidates` or `excluded`. The maximal clique must include either u or one of its non-neighbors.

In [8]:
def bronKerbosch2(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    u = pick_pivot(candidates) or pick_pivot(excluded)
    # only consider u or its non neighbors
    for v in list(candidates.difference(copurchases[u])):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique+[v], new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def pick_pivot(nodes):
    if nodes:
        return random.sample(nodes, 1)[0]

In [9]:
start = time.time()
report2 = Reporter('## %s' % bronKerbosch2.__doc__)
bronKerbosch2([], set(copurchases.keys()), set(), report2)
end = time.time()
print('Pivot method:', end - start)

Pivot method: 5.460216522216797


In [44]:
report2.print_max(10)

## Bron–Kerbosch algorithm with pivot
1270896 recursive calls
0: ['A Basic Guide To Importing', 'Export/Import Procedures and Documentation (Export/Import Procedures & Documentation)', 'Importing Into the United States: : A Guide for Commercial Importers', 'Building an Import/Export Business, 3rd Edition', 'Import/Export: How to Get Started in International Trade', "Start Your Own Import/Export Business (Entrepreneur Magazine's Start Ups)"]
1: ["Dr. Jensen's Guide to Body Chemistry & Nutrition", "Dr. Jensen's Juicing Therapy : Nature's Way to Better Health and a Longer Life", "Dr. Jensen's Guide to Diet and Detoxification : Healthy Secrets from Around the World", "Dr. Jensen's Nutrition Handbook : A Daily Regimen for Healthy Living", 'Foods That Heal', "Dr. Jensen's Guide to Better Bowel Care: A Complete Program for Tissue Cleansing Through Bowel Management"]
2: ['Hollywood', 'Factotum', 'Notes of a Dirty Old Man', 'Post Office', 'Women', 'Ham on Rye']
3: ['Creative Companion: How to F

Implement Bron–Kerbosch algorithm with a pivot and degeneracy ordering. Degeneracy ordering is the ordering of vertices such that each vertex has d or fewer neighbors that come later in the ordering. Select vertex of minimum degree among remaining vertices.

In [54]:
from collections import deque 

def bronKerbosch3(clique, candidates, excluded, reporter):
    '''Bron–Kerbosch algorithm with pivot and degeneracy ordering'''
    reporter.inc_count()
    if not candidates and not excluded:
        if len(clique) >= 3:
            reporter.record(clique)
        return
    for v in list(degeneracy_order(candidates)):
        new_candidates = candidates.intersection(copurchases[v])
        new_excluded = candidates.intersection(copurchases[v])
        bronKerbosch2(clique.append(v), new_candidates, new_excluded, reporter)
        candidates.remove(v)
        excluded.add(v)
        
def degeneracy_order(nodes):
    deg = {}
    for node in nodes:
        deg[node] = len(copurchases[node])
    
    output = []
    while deg:
        # find min degree
        i, d = min(deg.items(), key=lambda pair:pair[1])
        output.append(i)
        del deg[i]
        for v in copurchases[i]:
            if v in deg:
                deg[v] -= 1
    return output

In [57]:
# start = time.time()
# res = degeneracy_order(set(copurchases.keys()))
# print(res[:10])
# end = time.time()
# print('Time to get degeneracy ordering:', end - start)

In [56]:
# start = time.time()
# report3 = Reporter('## %s' % bronKerbosch3.__doc__)
# bronKerbosch3([], set(copurchases.keys()), set(), report3)
# end = time.time()
# print('Pivot method with degeneracy ordering:', end - start)

In [None]:
report3.print_max(10)

In [60]:
from collections import defaultdict

def find_cliques(graph):
    p = set(graph.keys())
    r = set()
    x = set()
    cliques = []
    for v in degeneracy_ordering(graph):
        neighs = graph[v]
        find_cliques_pivot(graph, r.union([v]), p.intersection(neighs), x.intersection(neighs), cliques)
        p.remove(v)
        x.add(v)
    return sorted(cliques, lambda x: len(x))

def find_cliques_pivot(graph, r, p, x, cliques):
    if len(p) == 0 and len(x) == 0:
        cliques.append(r)
    else:
        u = iter(p.union(x)).next()
        for v in p.difference(graph[u]):
            neighs = graph[v]
            find_cliques_pivot(graph, r.union([v]), p.intersection(neighs), x.intersection(neighs), cliques)
            p.remove(v)
            x.add(v)

def degeneracy_ordering(graph):
    ordering = []
    ordering_set = set()
    degrees = defaultdict(lambda : 0)
    degen = defaultdict(list)
    max_deg = -1
    for v in graph:
        deg = len(graph[v])
        degen[deg].append(v)
        degrees[v] = deg
        if deg > max_deg:
            max_deg = deg

    while True:
        i = 0
        while i <= max_deg:
            if len(degen[i]) != 0:
                break
            i += 1
        else:
            break
        v = degen[i].pop()
        ordering.append(v)
        ordering_set.add(v)
        for w in graph[v]:
            if w not in ordering_set:
                deg = degrees[w]
                degen[deg].remove(w)
                if deg > 0:
                    degrees[w] -= 1
                    degen[deg - 1].append(w)

    ordering.reverse()
    return ordering

In [61]:
start = time.time()
res = find_cliques(copurchases)
print('Pivot method with degeneracy ordering:', end - start)
end = time.time()

ValueError: list.remove(x): x not in list