顾觐皓:第10次作业02(使用python2操作)
使用graphlab对于音乐数据构建推荐系统
import graphlab
graphlab.canvas.set_target("ipynb")
train_file = 'D:/data/data/10000.txt'
sf = graphlab.SFrame.read_csv(train_file, header=False, delimiter='\t',
verbose=False)
#SFrame是从其他来源提取数据以在Turi Create中使用的主要数据结构
# verbose=false 指运行的时候不显示详细数据
sf = sf.rename({'X1':'user_id', 'X2':'music_id', 'X3':'rating'})
分割测试集和训练集
train_set, test_set = sf.random_split(0.8, seed=1)
流行度
popularity_model = graphlab.popularity_recommender.create(train_set,
'user_id', 'music_id',
target = 'rating')
内容相似性
item_sim_model = graphlab.item_similarity_recommender.create(train_set,
'user_id', 'music_id',
target = 'rating',
similarity_type='cosine')
因式分解
factorization_machine_model = graphlab.recommender.factorization_recommender.create(train_set,
'user_id', 'music_id',
target='rating')
len(train_set)
比较模型
result = graphlab.recommender.util.compare_models(test_set,
[popularity_model, item_sim_model, factorization_machine_model],
user_sample=.5, skip_set=train_set)
K = 10
users = graphlab.SArray(sf['user_id'].unique().head(100))
推荐
recs = item_sim_model.recommend(users=users, k=K)
recs.head()