In [14]:
import numpy as np
import pandas as pd
import random
import math

import operator

In [3]:
#k-fold交叉验证

def SplitData(data , M , k , seed):
    test = []
    train = []
    
    random.seed(seed)
    
    for user , item in data:
        if random.randint(0 , M) == k:
            test.append([user , item])
        else:
            train.append([user , item])
    
    return train , test


In [5]:
def Recall(train , test , N):
    hit = 0
    all_ = 0
    
    for user in train.keys():
        tu = test[user]
        rank = GetRecommendation(user , N)
        
        for item , pui in rank:
            if item in tu:
                hit += 1
        
        all_ += len(tu)
    
    return hit/(all_ * 1.0)

def Precision(train , test , N):
    hit = 0
    all_ = 0
    
    for user in train.keys():
        tu = test[user]
        rank = GetRecommendation(user , N)
        
        for item , pui in rank:
            if item in tu:
                hit += 1
            
        all += N
        
    return hit/(all_ * 1.0)



In [6]:
#覆盖率 越高 越能把长尾商品推荐给用户
def Coverage(train , test , N):
    recommmend_items = set()
    all_items = set()
    
    for user in train.keys():
        for item in train[user].keys():
            all_items.add(item)
        
        rank = GetRecommendation(user , N)
        
        for item , pui in rank:
            recommmend_items.add(item)
        
    return len(recommmend_items)/(len(all_items) * 1.0)


In [8]:
#新颖度
def Popularity(train , test , N):
    item_popularity = dict()
    
    for user , items in train.items():
        for item in items.keys():
            if item not in item_popularity:
                item_popularity[item] = 0
            
            item_popularity[item] += 1
        
    ret = 0
    n = 0
    
    for user in train.keys():
        rank = GetRecommendation(user , N)
        
        for item , pui in rank:
            ret += math.log(1 + item_popularity[item])
            n += 1
        
    ret /= n*1.0
    
    return ret

In [12]:
#余弦相似度
def UserSimilarity_(train):
    W = dict()
    
    for u in train.keys():
        for v in train.keys():
            if u == v:
                continue
                
            W[u][v] = len(train[u] & train[v])
            W[u][v] /= math.sqrt(len(train[u])\
                                *len(train[v]))
    
    return W

In [13]:
def UserSimilarity(train):
    #创建倒排表
    item_user = dict()
    
    for u , items in train.items():
        for i in items.keys():
            if i not in item_user:
                item_user[i] = set()
            
            item_user[i].add(u)
    
    C = dict()
    N = dict()
    
    for i , users in item_user.items():
        for u in users:
            N[u] += 1
            
            for v in users:
                if u == v:
                    continue
                C[u][v] += 1
                #下面是增加惩罚项
                #C[u][v] += 1/math.log(1+len(users))
            
    
    W = dict()
    
    for u , related_users in C.items():
        for v , cuv in related_users.items():
            W[u][v] = cuv/math.sqrt(N[u] * N[v])
    
    return W

In [15]:
def Recommend(user , train , W):
    rank = dict()
    interacted_items = train[user]
    
    for v  , wuv in sorted(W[u].items , key = operator.itemgetter(1),
                          reverse = True)[0:K]:
        for i , rvi in train[v].items:
            if i in interacted_items:
                continue
            
            rank[i] += wuv * rvi
        
    return rank