# 1.1 UserCF

In [83]:
import math

def UserSimilarity(train):
    # train: Dictionary of Users, s.t {A:[a,b,d], B: [a,c], ...}
    W = dict()
    for u in train.keys():
        W[u] = dict()
        for v in train.keys():
            if u == v:
                continue
            W[u][v] = len(set(train[u]) & set(train[v]))
            W[u][v] /= math.sqrt(len(train[u]) * len(train[v]) * 1.0)
    return W
        

In [84]:
train = {"A":["a", "b", "d"],
        "B":["a", "c"],
        "C":["b", "e"],
        "D":["c", "d", "e"]}

In [85]:
UserSimilarity(train)

{'A': {'B': 0.4082482904638631,
  'C': 0.4082482904638631,
  'D': 0.3333333333333333},
 'B': {'A': 0.4082482904638631, 'C': 0.0, 'D': 0.4082482904638631},
 'C': {'A': 0.4082482904638631, 'B': 0.0, 'D': 0.4082482904638631},
 'D': {'A': 0.3333333333333333,
  'B': 0.4082482904638631,
  'C': 0.4082482904638631}}

## 改进版


In [90]:
from collections import defaultdict

def UserSimilarity(train):
    item_users = dict()
    # 创建倒排表
    for u, items in train.items():
        for i in items:
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    print("item_users: ", item_users)
    C = defaultdict(int)
    N = defaultdict(int)
    # 创建共现矩阵
    for i, users in item_users.items():
        for u in users:
            N[u] += 1
            if u not in C:
                C[u] = defaultdict(int)
            for v in users:
                if u == v:
                    continue
                C[u][v] += 1
    print("\nco-occurrence matrix: ", C)
    # Calcuate Cos-Similarity 
    W = dict()
    for u, related_users in C.items():
        W[u] = dict()
        for v, cuv in related_users.items():
            W[u][v] = cuv / math.sqrt(N[u] * N[v])
    
    return W

In [91]:
UserSimilarity(train)

item_users:  {'a': {'B', 'A'}, 'b': {'C', 'A'}, 'd': {'A', 'D'}, 'c': {'B', 'D'}, 'e': {'C', 'D'}}

co-occurrence matrix:  defaultdict(<class 'int'>, {'B': defaultdict(<class 'int'>, {'A': 1, 'D': 1}), 'A': defaultdict(<class 'int'>, {'B': 1, 'C': 1, 'D': 1}), 'C': defaultdict(<class 'int'>, {'A': 1, 'D': 1}), 'D': defaultdict(<class 'int'>, {'A': 1, 'B': 1, 'C': 1})})


{'B': {'A': 0.4082482904638631, 'D': 0.4082482904638631},
 'A': {'B': 0.4082482904638631,
  'C': 0.4082482904638631,
  'D': 0.3333333333333333},
 'C': {'A': 0.4082482904638631, 'D': 0.4082482904638631},
 'D': {'A': 0.3333333333333333,
  'B': 0.4082482904638631,
  'C': 0.4082482904638631}}