In [1]:
from pprint import pprint
from pymongo import MongoClient
import pyfpgrowth
import time

client = MongoClient()
db = client.test

### setting constants for calculating the minimum confidence for calculation of rules, minimum star ratings to consider to generate itemset and the city to run the algorithm on.

In [2]:
minimum_support = 5
minimum_confidence = 0.9
minimum_stars = 3
city='Pittsburgh'

In [3]:
'''
This method creates a list of business ids which have recived a star above the given threshold,
in the user reviews.
    reviews_cur : a cursor of reviews, for the busness of the selected city.
    type : cursor
    
    min_stars : minimum stars threshold in the reating to consider for a business to added to itemset.
    type : number
    
returns: list of business_ids to get categories from.
'''
def create_business_list_with_weights_by_stars(reviews_cur, min_stars):
    busness_ids_review_counts_dict = {}
    for review in reviews_cur:
        if review['stars'] >= min_stars:
            if review['business_id'] not in busness_ids_review_counts_dict:
                busness_ids_review_counts_dict[review['business_id']] = 1
    return busness_ids_review_counts_dict

In [4]:
'''
Get business's cursor by given a list of business ids.
Inputs:
        business_id_list : list of business ids.
        type : list
        
Returns:
    cursor of business
'''
def get_category_list_by_business_ids(business_id_list):
    return db.business.find({'business_id': {'$in': business_id_list }})


'''
create catefories list to feed the frequent itemset
Inputs : business_ids
return : itemset list
'''
def create_weighted_categories_list(busness_ids_with_weights):
    categories_fp_input = []
    list_busness_ids = list(busness_ids_with_weights.keys())
    business_cur = get_category_list_by_business_ids(list_busness_ids)
    
    for business in business_cur:
        if business['business_id'] in list_busness_ids:
            lists = [business['categories'] for i in range(busness_ids_with_weights[business['business_id']])]
            categories_fp_input.extend(lists)
    
    return categories_fp_input

In [5]:
'''
Creates the frequent itemset for patterns and rules
'''
def create_frequent_itemset(reviews_cursor, min_stars):
    business_list = create_business_list_with_weights_by_stars(reviews_cursor, min_stars)
    return create_weighted_categories_list(business_list)


'''
creates patterns using minimum support and itemset
'''
def find_patterns(itemset, min_support):
    return pyfpgrowth.find_frequent_patterns(itemset, min_support)


'''
finds rules from the patterns generated
'''
def find_rules(patterns, min_confidence):
    return pyfpgrowth.generate_association_rules(patterns, min_confidence)

In [6]:
'''
Returns a cursor from revies by business_ids list
'''
def get_reviews_cursor_by_city(business_ids):
    return db.review.find({'business_id': {'$in': business_ids }})


'''
returns business cursor for the given city
input- city: name of city to get all business for
'''
def get_business_by_city(city):
    return db.business.find({'city': city})

business_cursor = get_business_by_city(city)

def get_reviews_cursor_for_city():
    business_id_list = [x['business_id'] for x in business_cursor]
    return get_reviews_cursor_by_city(business_id_list)


In [7]:
reviews_for_city = get_reviews_cursor_for_city()
itemset = create_frequent_itemset(reviews_for_city, minimum_stars)

In [8]:
patterns = find_patterns(itemset, minimum_support)

In [9]:
rules = find_rules(patterns, minimum_confidence)

### print suggestions of busness domains from the rules.

In [10]:
item_together_counts = {}
for key, rule in rules.items():
    if len(key) > len(rule):
        should_include = str(rule[0])
        items = tuple(i for i in key)
        if items not in item_together_counts:
            item_together_counts[items] = should_include
        else:
            item_together_counts[items].extend(should_include)

for key, val in item_together_counts.items():
    print(str(key) + " ---> " + str(val))

('Guitar Stores', 'Local Services', 'Musical Instrument Services') ---> ('Musical Instruments & Teachers', 'Shopping')
('Guitar Stores', 'Local Services', 'Musical Instruments & Teachers') ---> ('Musical Instrument Services', 'Shopping')
('Guitar Stores', 'Musical Instrument Services', 'Musical Instruments & Teachers') ---> ('Local Services', 'Shopping')
('Local Services', 'Musical Instrument Services', 'Musical Instruments & Teachers') ---> ('Shopping',)
('Guitar Stores', 'Musical Instrument Services', 'Shopping') ---> ('Local Services', 'Musical Instruments & Teachers')
('Guitar Stores', 'Musical Instruments & Teachers', 'Shopping') ---> ('Local Services', 'Musical Instrument Services')
('Musical Instrument Services', 'Musical Instruments & Teachers', 'Shopping') ---> ('Local Services',)
('Guitar Stores', 'Local Services', 'Shopping') ---> ('Musical Instrument Services', 'Musical Instruments & Teachers')
('Local Services', 'Musical Instruments & Teachers', 'Shopping') ---> ('Musical 

('Arts & Entertainment', 'Education', 'Shopping', 'Specialty Schools') ---> ('Art Schools',)
('Art Schools', 'Education', 'Shopping') ---> ('Specialty Schools',)
('Art Schools', 'Shopping', 'Specialty Schools') ---> ('Education',)
('Local Services', 'Post Offices', 'Public Services & Government') ---> ('Shipping Centers',)
('Local Services', 'Post Offices', 'Shipping Centers') ---> ('Public Services & Government',)
('Local Services', 'Public Services & Government', 'Shipping Centers') ---> ('Post Offices',)
('Post Offices', 'Public Services & Government', 'Shipping Centers') ---> ('Local Services',)
('Home Services', 'Real Estate Agents', 'Real Estate Services') ---> ('Real Estate',)
('Real Estate', 'Real Estate Agents', 'Real Estate Services') ---> ('Home Services',)
('Jewelry', 'Local Services', 'Watches') ---> ('Shopping',)
('Local Services', 'Shopping', 'Watches') ---> ('Jewelry',)
('Computers', 'IT Services & Computer Repair', 'Local Services') ---> ('Shopping',)
('Computers', 'IT

('Barbers', 'Beauty & Spas', 'Hair Removal') ---> ('Hair Salons',)
('Barbers', 'Hair Removal', 'Hair Salons') ---> ('Beauty & Spas',)
('Day Spas', 'Hair Removal', 'Hair Salons') ---> ('Beauty & Spas',)
('Day Spas', 'Hair Removal', 'Hair Salons', 'Nail Salons') ---> ('Beauty & Spas',)
('Day Spas', 'Hair Salons', 'Nail Salons') ---> ('Beauty & Spas',)
('Bars', 'Nightlife', 'Sushi Bars') ---> ('Restaurants',)
('Bars', 'Restaurants', 'Sushi Bars') ---> ('Nightlife',)
('Food', 'Sports Bars', 'Wine & Spirits') ---> ('Bars', 'Beer', 'Nightlife')
('Nightlife', 'Sports Bars', 'Wine & Spirits') ---> ('Bars', 'Beer', 'Food')
('Bars', 'Sports Bars', 'Wine & Spirits') ---> ('Beer', 'Food', 'Nightlife')
('Bars', 'Food', 'Sports Bars', 'Wine & Spirits') ---> ('Beer', 'Nightlife')
('Bars', 'Nightlife', 'Sports Bars', 'Wine & Spirits') ---> ('Beer', 'Food')
('Food', 'Nightlife', 'Sports Bars', 'Wine & Spirits') ---> ('Bars', 'Beer')
('Beer', 'Food', 'Sports Bars') ---> ('Bars', 'Nightlife', 'Wine & Spi