# 1. 计算用户相似度

In [1]:
import math

def UserSimilarity(train):
    # train: Dictionary of Users, s.t {A:[a,b,d], B: [a,c], ...}
    W = dict()
    for u in train.keys():
        W[u] = dict()
        for v in train.keys():
            if u == v:
                continue
            W[u][v] = len(set(train[u]) & set(train[v]))
            W[u][v] /= math.sqrt(len(train[u]) * len(train[v]) * 1.0)
    return W
        

In [2]:
train = {"A":["a", "b", "d"],
        "B":["a", "c"],
        "C":["b", "e"],
        "D":["c", "d", "e"]}

In [3]:
UserSimilarity(train)

{'A': {'B': 0.4082482904638631,
  'C': 0.4082482904638631,
  'D': 0.3333333333333333},
 'B': {'A': 0.4082482904638631, 'C': 0.0, 'D': 0.4082482904638631},
 'C': {'A': 0.4082482904638631, 'B': 0.0, 'D': 0.4082482904638631},
 'D': {'A': 0.3333333333333333,
  'B': 0.4082482904638631,
  'C': 0.4082482904638631}}

## 改进-1


In [4]:
from collections import defaultdict

def UserSimilarity(train):
    item_users = dict()
    # 创建倒排表
    for u, items in train.items():
        for i in items:
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    print("item_users: ", item_users)
    C = defaultdict(int)
    N = defaultdict(int)
    # 创建共现矩阵
    for i, users in item_users.items():
        for u in users:
            N[u] += 1
            if u not in C:
                C[u] = defaultdict(int)
            for v in users:
                if u == v:
                    continue
                C[u][v] += 1
    print("\nco-occurrence matrix: ", C)
    # Calcuate Cos-Similarity 
    W = dict()
    for u, related_users in C.items():
        W[u] = dict()
        for v, cuv in related_users.items():
            W[u][v] = cuv / math.sqrt(N[u] * N[v])
    
    return W

In [5]:
UserSimilarity(train)

item_users:  {'a': {'A', 'B'}, 'b': {'C', 'A'}, 'd': {'D', 'A'}, 'c': {'D', 'B'}, 'e': {'D', 'C'}}

co-occurrence matrix:  defaultdict(<class 'int'>, {'A': defaultdict(<class 'int'>, {'B': 1, 'C': 1, 'D': 1}), 'B': defaultdict(<class 'int'>, {'A': 1, 'D': 1}), 'C': defaultdict(<class 'int'>, {'A': 1, 'D': 1}), 'D': defaultdict(<class 'int'>, {'A': 1, 'B': 1, 'C': 1})})


{'A': {'B': 0.4082482904638631,
  'C': 0.4082482904638631,
  'D': 0.3333333333333333},
 'B': {'A': 0.4082482904638631, 'D': 0.4082482904638631},
 'C': {'A': 0.4082482904638631, 'D': 0.4082482904638631},
 'D': {'A': 0.3333333333333333,
  'B': 0.4082482904638631,
  'C': 0.4082482904638631}}

# 改进-2: 对热门物品的处理 (IIF) 

In [6]:
from collections import defaultdict

def UserSimilarity(train):
    item_users = dict()
    # 创建倒排表
    for u, items in train.items():
        for i in items:
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    print("item_users: ", item_users)
    C = defaultdict(int)
    N = defaultdict(int)
    # 创建共现矩阵
    for i, users in item_users.items():
        for u in users:
            N[u] += 1
            if u not in C:
                C[u] = defaultdict(int)
            for v in users:
                if u == v:
                    continue
                C[u][v] += 1 / math.log(1 + len(users))
    print("\nco-occurrence matrix: ", C)
    # Calcuate Cos-Similarity 
    W = dict()
    for u, related_users in C.items():
        W[u] = dict()
        for v, cuv in related_users.items():
            W[u][v] = cuv / math.sqrt(N[u] * N[v])
    
    return W

In [7]:
UserSimilarity(train)

item_users:  {'a': {'A', 'B'}, 'b': {'C', 'A'}, 'd': {'D', 'A'}, 'c': {'D', 'B'}, 'e': {'D', 'C'}}

co-occurrence matrix:  defaultdict(<class 'int'>, {'A': defaultdict(<class 'int'>, {'B': 0.9102392266268373, 'C': 0.9102392266268373, 'D': 0.9102392266268373}), 'B': defaultdict(<class 'int'>, {'A': 0.9102392266268373, 'D': 0.9102392266268373}), 'C': defaultdict(<class 'int'>, {'A': 0.9102392266268373, 'D': 0.9102392266268373}), 'D': defaultdict(<class 'int'>, {'A': 0.9102392266268373, 'B': 0.9102392266268373, 'C': 0.9102392266268373})})


{'A': {'B': 0.37160360818355515,
  'C': 0.37160360818355515,
  'D': 0.3034130755422791},
 'B': {'A': 0.37160360818355515, 'D': 0.37160360818355515},
 'C': {'A': 0.37160360818355515, 'D': 0.37160360818355515},
 'D': {'A': 0.3034130755422791,
  'B': 0.37160360818355515,
  'C': 0.37160360818355515}}

# 2. 如何推荐物品给用户

In [8]:
def recommend(user, train, W, K):
    rank = dict()
    interacted_items = train[user]
    for V, wuv in sorted(W[u].items, key = itemgetter(1), reverse=True)[0:K]:
        for i, rvi in train[v].items:
            if i in interacted_items:
                continue
            rank[i] += wuv * rvi
    return rank