##### 📌先说UserCF
步骤：对于某一个用户a，根据`u2u_sim`矩阵找到和用户a相似的前K个用户，将k个用户的点击历史作为用户a的召回结果【`rank[item1] += u2u_sim_score`】。根据召回的每个物品的`u2u_sim_score`排序【`sorted(rank,key = lambda x : x[1])`】，然后取前N个即可。
- 所以我们需要获得**用户相似性矩阵**，怎么获得呢？
    - 用户共现
    - 余弦相似度
    
    解释：【用户共现】是指计算和用户A点击过相同物品的用户有哪些；当用户A和用户B都点击过item1，那么`co_metrix['A']['B'] += 1`;共现矩阵的形式是`{'A'：{'B':1,'C':4,'D':3}}`;【余弦相似度】是为了解决活跃用户的问题；如果用户B过度活跃，点击了很多商品，那么我们需要削弱它的影响。

In [20]:
from collections import defaultdict
import math

def u2u_sim(user_items):
    # 用户行为倒排：每个物品有哪些用户点击过
    items_user = defaultdict(set)
    for user,items in user_items.items():
        for item in items:
            items_user[item].add(user)
    
    # 计算共现
    co_matrix = defaultdict(lambda:defaultdict(int))
    count = defaultdict(int)
    for item,user in items_user.items():
        for u in user:
            count[u] += 1
            for v in user:
                if v==u:
                    continue
                else:
                    co_matrix[u][v] += 1
    
    u2u_sim_matrix = defaultdict(dict)

    for u,related_user in co_matrix.items():
        for v,score in related_user.items():
            sim = score/math.sqrt(count[u]*count[v])
            u2u_sim_matrix[u][v] = sim
            
    return u2u_sim_matrix


def recommend(user,user_items,k=2,N=5):
    u2u_sim_matrix = u2u_sim(user_items)
    user_click_items = user_items[user]
    
    rank = defaultdict(float)
    for user,score in sorted(u2u_sim_matrix[user].items(),key=lambda x:x[1],reverse=True)[:k]:
        for item in user_items[user]:
            if item not in user_click_items:
                rank[item] += (score)
            
    return sorted(rank.items(),key= lambda x:x[1],reverse=True)[:N]
    
if __name__ == '__main__':
    user_items = {
    'A':['item1','item2','item3','item4','item5'],
    'B':['item13','item2','item23','item4','item50'],
    'C':['item19','item12','item3D','item41','item85'],
    'D':['item15','item6Q','item23','item47','item85'],
    'F':['item1','item2','item3DA','item414','item54'],
    'E':['item19','item12','item3D','item45','item50'],
    'Q':['item19']
}
    result = recommend("A",user_items)
    print("\nRecommendations for user A:")
    for item, score in result:
        print(f"{item}: {score:.4f}")


Recommendations for user A:
item3DA: 0.4000
item414: 0.4000
item54: 0.4000
item13: 0.4000
item23: 0.4000


##### 📌ItemCF
步骤：对于某个用户a所点击过的商品{'item1','item2','item3'}，根据`i2i_sim`矩阵找到和每个物品最相似的几个商品。
- 所以我们需要获得**物品相似性矩阵**，怎么获得呢？
    - 物品共现
    - 余弦相似度

In [18]:
#ItemCF
from collections import defaultdict
import math

def calculate_item_similarity(user_item):
    # 用户行为倒排：每个物品有哪些用户点击过
    item_users = defaultdict(set)
    for user, items in user_item.items():
        for item in items:
            item_users[item].add(user)

    # 物品共现矩阵
    co_matrix = defaultdict(lambda: defaultdict(int))
    item_count = defaultdict(int)

    for item, users in item_users.items():
        item_count[item] = len(users)
        for u in users:
            for related_item in user_item[u]:
                if related_item == item:
                    continue
                co_matrix[item][related_item] += 1

    # 计算相似度矩阵
    item_similarity = defaultdict(dict)
    for item, related_items in co_matrix.items():
        for other_item, co_count in related_items.items():
            sim = co_count / math.sqrt(item_count[item] * item_count[other_item])
            item_similarity[item][other_item] = sim

    return item_similarity

def recommend(user, user_item, item_similarity, K=3, N=5):
    rank = defaultdict(float)
    interacted_items = set(user_item.get(user, []))

    for item in interacted_items:
        for related_item, sim in sorted(item_similarity[item].items(), key=lambda x: x[1], reverse=True)[:K]:
            if related_item in interacted_items:
                continue
            rank[related_item] += sim

    return sorted(rank.items(), key=lambda x: x[1], reverse=True)[:N]

if __name__ == '__main__':
    user_items = {
        'A':['item1','item2','item3','item4','item5'],
        'B':['item13','item2','item23','item4','item50'],
        'C':['item19','item12','item3D','item41','item85'],
        'D':['item15','item6Q','item23','item47','item85'],
        'F':['item1','item2','item3DA','item414','item54'],
        'E':['item19','item12','item3D','item45','item50'],
        'Q':['item19']
    }

    item_similarity = calculate_item_similarity(user_items)

    print("\nRecommendations for user A:")
    recs = recommend('A', user_items, item_similarity)
    for item, score in recs:
        print(f"{item}: {score:.4f}")



Recommendations for user A:
item3DA: 1.2845
item414: 0.7071
item13: 0.7071


##### 📌AUC手撕

![image.png](微信图片_20250525001232.png)

In [6]:
def auc(labels,scores):
    samples = list(zip(labels,scores))
    rank = [(label,score) for label,score in sorted(samples,key=lambda x:x[1])]
    idx = [i+1 for i in range(len(rank)) if rank[i][0] == 1]
    
    #获得正、负样本个数
    pos_num = sum(labels)
    neg_num = len(labels) - sum(labels)
    
    return (sum(idx) - (pos_num*(pos_num+1))/2) / (pos_num*neg_num)


In [7]:
import numpy as np
y_true = np.array([0,0,1,1])
y_scores = np.array([0.1,0.4,0.35,0.8])
result = auc(y_true,y_scores)
result

0.75