In [1]:
import pandas as pd
import numpy as np
import math
from collections import defaultdict

from surprise import Dataset
from surprise import Reader
from surprise import SVD

In [2]:
jdata_action_train = pd.read_hdf('./data/action_train.h5', key='df', mode='r')
jdata_comment = pd.read_csv('./raw/jdata_comment.csv',sep=',')
jdata_product = pd.read_csv('./raw/jdata_product.csv',sep=',')
jdata_shop = pd.read_csv('./raw/jdata_shop.csv',sep=',')
jdata_user = pd.read_csv('./raw/jdata_user.csv',sep=',')

In [3]:
# 为action数据加上商品cate与brand
df = jdata_action_train.merge(jdata_product,on='sku_id',how='left')
df[['brand']] = df[['brand']].astype('int32', errors='ignore')
df[['cate']] = df[['cate']].astype('int32', errors='ignore')

In [4]:
# brand, cate的热度统计
cnt_brand = df.brand.value_counts().to_dict()
cnt_cate = df.cate.value_counts().to_dict()

In [None]:
# 筛选出这两个月内购买物品超过5的用户，使得矩阵不那么稀疏
data_user = pd.DataFrame(df.groupby('user_id').count()['sku_id'].sort_values(ascending=False))
data_user.columns = ['item_cnts']
data_user.reset_index(inplace=True)
# 按照以上筛选建立新的df
tmp = data_user[data_user['item_cnts']>=5]
df_new = df.merge(tmp, on='user_id',how='inner').sort_values(by='action_time')[['user_id','sku_id','brand','cate','type']]

In [None]:
# 给每个行为赋其rating权重
df_new['score'] = df_new['type'].map({1:0.1, 2:0.5, 3:0.2, 4:0.3, 5:0.4})
# 建立最终的训练dataframe
df_train = pd.DataFrame(df_new.groupby(['user_id','brand']).score.sum()).reset_index()
df_train[['brand']] = df_train[['brand']].astype('int32', errors='ignore')

In [None]:
# 分数分割
df_train['score'] = pd.qcut(df_train['score'], 10, labels=[1,2,3,4,5], duplicates='drop')

In [None]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(df_train, reader)
trainset = data.build_full_trainset()

In [None]:
model = SVD(n_factors=100, verbose=True)
model.fit(trainset)

In [None]:
def Recommend_svd(user, k):
    est = [0]*12000
    for i in range(12000):
        est[i] = model.predict(user, i).est
    idx = np.argpartition(est, -k)[-k:]
    return set(idx)

In [None]:
from Metrics import metrics
mtc = metrics()
mtc.PrecisonRecall(Recommend_svd, 3)