In [1]:
import networkx as nx
import random
import pandas as pd

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
def create_graph():
    # Path to the GraphML file
    # GraphML is a file created from the Neo4j database.
    graphml_file_path = 'neo4jdata.graphml'
    return nx.read_graphml(graphml_file_path)

def find_node_by_attribute(G, attr, value):
    # Function to find a node by userID or categoryID
    # This is used due to the ID differences that arise when transferring from graphDB to NetworkX.
    for node, data in G.nodes(data=True):
        if data.get(attr) == value:
            return node
    return None


In [6]:
G = create_graph()

In [7]:
def category_recommender(user_id, key_category_id):
    # Step 1: Find the keySupercategory of the keyCategory
    key_category_node = find_node_by_attribute(G, 'categoryID', key_category_id)
    key_supercategory_node = None
    for u, v, d in G.edges(key_category_node, data=True):
        if d.get('label') == 'CATEGORY_BELONGS_TO':
            key_supercategory_node = v
            break

    if not key_supercategory_node:
        raise ValueError("Key supercategory not found")
    
    # Step 2: Find similar users
    target_user_node = find_node_by_attribute(G, 'userID', str(user_id))
    # Similar user가 없는 경우에는 이를 빈 list로 해줘야 한다. 
    # 이렇게 하지 않으면 모든 edges를 다 가져오기 때문에 문제가 생긴다.
    if target_user_node:
        similar_users = [v for u, v in G.edges(target_user_node) if G[u][v].get('label') == 'SIMILAR']
    else:
        similar_users = []
    
    # Initialize recommended_categories
    recommended_categories = {}

    # If there are similar users, proceed with collaborative filtering
    if similar_users:
        for su in similar_users:
            for u, v, d in G.edges(su, data=True):
                if d.get('label') == 'PREFERENCE':
                    if any(d.get('label') == 'CATEGORY_BELONGS_TO' for _, _, d in G.edges(v, data=True)):
                        category_id = G.nodes[v].get('categoryID')
                        recommended_categories[category_id] = recommended_categories.get(category_id, 0) + 1

        # Sort and get top 2 categories
        sorted_categories = sorted(recommended_categories, key=recommended_categories.get, reverse=True)[:2]

    else:
        # If no similar users, pick 2 random categories from the same supercategory
        potential_categories = [u for u, v, d in G.edges(key_category_node, data=True) 
                                if d.get('label') == 'CATEGORY_BELONGS_TO' and G.nodes[u].get('categoryID') != key_category_id]
        # no duplicates
        potential_categories = list(set(potential_categories))

        # Randomly select up to 2 categories
        selected_nodes = random.sample(potential_categories, 2) 
        sorted_categories = [G.nodes[v].get('categoryID') for v in selected_nodes]

    return [int(x) for x in sorted_categories[:2]]

In [12]:
def item_recommender(user_id, category_id):
    # Step 1: Find similar users and their rated items
    target_user_node = find_node_by_attribute(G, 'userID', str(user_id))
    if target_user_node:
        similar_users = [v for u, v in G.edges(target_user_node) if G[u][v].get('label') == 'SIMILAR']
    else:
        similar_users = []
    item_scores = {}

    # items that target_user already bought
    target_user_items = [v for _, v, d in G.edges(target_user_node, data=True) if d.get('label') == 'RATED']
    for su in similar_users:
        for _, item, data in G.edges(su, data=True):
            if data.get('label') == 'RATED' and item not in target_user_items:
                item_data = G.nodes[item]
                # 하나의 item이 동시에 여러 category에 속할 수 있으므로 (['공통']의 경우), 그것을 반영합니다
                category_ids = [G.nodes[v].get('categoryID') for u, v, d in G.edges(item, data=True) if d.get('label') == 'BELONGS_TO']
            if str(category_id) in category_ids:
                # item 평점을 가져옵니다. 이로 우선순위를 부여합니다.
                rating = data.get('Rating', 0)
                item_scores[item] = item_scores.get(item, 0) + float(rating)

    # Step 2: item을 rating을 기준으로 높은 것을 추천해줍니다.
    top_items = sorted(item_scores, key=item_scores.get, reverse=True)[:4]
    
    # Step 3: Cold Start Problem
    # similar user가 없는 경우, 또는 추천하는 item이 부족한 경우, 자체적인 score를 계산하여 추천을 합니다.
    # 동일 category 내에서 추천을 해줍니다.
    # Calculate the number of recommendations already found
    num_of_recommendations = len(top_items)
    num_of_needed_recommendations = 4 - num_of_recommendations

    # Cold Start Problem Handling
    if num_of_needed_recommendations > 0:
         # Find the node that represents the specified category
        category_node = find_node_by_attribute(G, 'categoryID', str(category_id))
    
        # Find all item nodes connected to this category node and not in top_items
        all_items_in_category = []
        if category_node:
            for u, v, d in G.edges(data=True):
                #print("Edge from", u, "to", v, "with data:", d)
                if d.get('label') == 'BELONGS_TO' and v == category_node and u not in top_items:
                    all_items_in_category.append(u)
    
        # Calculate a score for each additional item based on NumOfReviews and Rating
        cold_start_scores = {item: float(G.nodes[item].get('NumOfReviews', 0)) * float(G.nodes[item].get('Rating', 0)) for item in all_items_in_category}
        # Select additional items based on the score
        additional_items = sorted(cold_start_scores, key=cold_start_scores.get, reverse=True)[:num_of_needed_recommendations]
        top_items.extend(additional_items)
    
    # Convert node IDs to itemIDs
    recommended_itemIDs = [(G.nodes[item].get('itemID')) for item in top_items]

    return recommended_itemIDs

In [13]:
def random_recommender(tab2_cat, tab3_cat, video_subject):
    # Find all supercategories belonging to the given metacategory
    # Find the metacategory node
    metacategory_node = None
    for node, data in G.nodes(data=True):
        if data.get('metacategoryID') == str(video_subject):
            metacategory_node = node
    
    # Find all supercategories under this metacategory
    supercategories = [u for u, v, d in G.edges(data=True) if d.get('label') == 'SUPERCATEGORY_BELONGS_TO' and v == metacategory_node]

    # Find all categories within these supercategories, excluding tab2_cat and tab3_cat
    potential_categories = []
    for sc in supercategories:
        categories = [u for u, v, d in G.edges(data=True) if d.get('label') == 'CATEGORY_BELONGS_TO' and v == sc]
        potential_categories.extend(categories)

    # Filter categories and randomly select one
    potential_categories = [cat for cat in potential_categories if G.nodes[cat].get('categoryID') not in [tab2_cat, tab3_cat]]
    tab4_cat = random.choice(potential_categories) if potential_categories else None
    
    # Randomly recommend items from the selected category
    tab4_items = []
    if tab4_cat:
        items_in_category = [u for u, v, d in G.edges(data=True) if d.get('label') == 'BELONGS_TO' and v == str(tab4_cat)]
        tab4_items = random.sample(items_in_category, min(len(items_in_category), 4))

    # Convert node IDs to itemIDs
    tab4_item_ids = [int(G.nodes[item].get('itemID')) for item in tab4_items]

    return int(G.nodes[tab4_cat].get('categoryID')), tab4_item_ids

In [16]:
userID = 10
key_category = 7
video_subject = 0

In [21]:
#tab2, 3 카테고리 추천
tab2_cat, tab3_cat = category_recommender(userID, key_category)
print(tab3_cat)

#tab2, 3 item 추천
tab2_item = item_recommender(userID, tab2_cat)
tab3_item = item_recommender(userID, tab3_cat)
print(tab2_item, tab3_item)

#tab4 카테고리, item 추천
tab4_cat, tab4_item = random_recommender(tab2_cat, tab3_cat, video_subject)

#최종 결과: list 형태
# [[tab2_cat, tab3_cat, tab4_cat], tab2_item, tab3_item, tab4_item]
category_list = [tab2_cat, tab3_cat, tab4_cat]
item_list = [tab2_item, tab3_item, tab4_item]
result = [category_list] + item_list


509024
['39290', '47515', '39409', '20251'] ['52126', '52377', None, None]


In [62]:
def item_recommender(user_id, category_id):
    # Step 1: Find similar users and their rated items
    target_user_node = find_node_by_attribute(G, 'userID', str(user_id))
    if target_user_node:
        similar_users = [v for u, v in G.edges(target_user_node) if G[u][v].get('label') == 'SIMILAR']
    else:
        similar_users = []
    item_scores = {}

    print(similar_users)
    # items that target_user already bought
    target_user_items = [v for u, v, d in G.edges(target_user_node, data=True) if d.get('label') == 'RATED']
    for su in similar_users:
        print(type(su))
        for u, item, data in G.edges(data=True):
            if data.get('label') == 'RATED' and item not in target_user_items:
                item_data = G.nodes[item]
                # 하나의 item이 동시에 여러 category에 속할 수 있으므로 (['공통']의 경우), 그것을 반영합니다
                category_ids = [G.nodes[v].get('categoryID') for u, v, d in G.edges(item, data=True) if d.get('label') == 'BELONGS_TO']
                if str(category_id) in category_ids:
                    # item 평점을 가져옵니다. 이로 우선순위를 부여합니다.
                    rating = data.get('Rating', 0)
                    item_scores[item] = item_scores.get(item, 0) + float(rating)
        print(item_scores)
        print('category_ids:')
        print(category_ids)
    # Step 2: item을 rating을 기준으로 높은 것을 추천해줍니다.
    top_items = sorted(item_scores, key=item_scores.get, reverse=True)[:4]
    print(top_items)
    # Step 3: Cold Start Problem
    # similar user가 없는 경우, 또는 추천하는 item이 부족한 경우, 자체적인 score를 계산하여 추천을 합니다.
    # 동일 category 내에서 추천을 해줍니다.
    # Calculate the number of recommendations already found
    num_of_recommendations = len(top_items)
    num_of_needed_recommendations = 4 - num_of_recommendations
    print(num_of_needed_recommendations)

    # Cold Start Problem Handling
    if num_of_needed_recommendations > 0:
         # Find the node that represents the specified category
        category_node = find_node_by_attribute(G, 'categoryID', str(category_id))
    
        # Find all item nodes connected to this category node and not in top_items
        all_items_in_category = []
        if category_node:
            for u, v, d in G.edges(data=True):
                #print("Edge from", u, "to", v, "with data:", d)
                if d.get('label') == 'BELONGS_TO' and v == category_node and u not in top_items:
                    all_items_in_category.append(u)
    
        # Calculate a score for each additional item based on NumOfReviews and Rating
        cold_start_scores = {item: float(G.nodes[item].get('NumOfReviews', 0)) * float(G.nodes[item].get('Rating', 0)) for item in all_items_in_category}
        # Select additional items based on the score
        additional_items = sorted(cold_start_scores, key=cold_start_scores.get, reverse=True)[:num_of_needed_recommendations]
        top_items.extend(additional_items)
    
    # Convert node IDs to itemIDs
    recommended_itemIDs = [(G.nodes[item].get('itemID')) for item in top_items]

    return recommended_itemIDs

In [61]:
item_recommender(10, 509024)

['n842', 'n527', 'n562', 'n546']
<class 'str'>


UnboundLocalError: local variable 'category_ids' referenced before assignment

In [51]:
for item in ['n49666', 'n54012', 'n158', 'n90']:
    print(G.nodes[item])

{'labels': ':Item', 'Rating': '5.0', 'NumOfReviews': '106', 'ImgFileName': '2cb8e6936d81ffa18fea3d1a9525ba59dfcd996a9838a9dc7aee8d9ec672', 'itemID': '52126', 'itemName': '무배 NOW 나우 스몰브리드 시니어 5.4kg/애견사료 강아지사료', 'Category': '509024', 'Price': '72640'}
{'labels': ':Item', 'Rating': '5.0', 'NumOfReviews': '51', 'ImgFileName': 'e106fa0022dc59c66794e958ee8cf5c9ae92ad859b8cb4c9dfc2643acd11', 'itemID': '52377', 'itemName': '힐스 어덜트 라지브리드 강아지사료', 'Category': '509024', 'Price': '78080'}
{'labels': ':User', 'Gender': 'female', 'userID': '78', 'userName': '김미경', 'Age': '35'}
{'labels': ':User', 'Gender': 'female', 'userID': '10', 'userName': '김순자', 'Age': '27'}


In [None]:
if data.get('label') == 'RATED' and item not in target_user_items and u == su:
                item_data = G.nodes[item]