/
SVDPP.java
190 lines (173 loc) · 7.27 KB
/
SVDPP.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
package com.sy.zhihai.model;
import java.util.Map;
import java.util.Scanner;
import java.util.Map.Entry;
import javolution.util.FastList;
import javolution.util.FastMap;
/**
* 融合了BiasLFM以及用户的历史评分行为
* 结合了邻域和LFM
*
* @author yan.shi
* @date: 日期:2017-4-27 时间:下午1:50:33
*/
public class SVDPP extends AbsMF {
private FastMap<String, Double> BU = null;//user的偏置项
private FastMap<String, Double> BI = null;//item的偏置项
private FastMap<String, Double[]> Y = null;
private double sumMean = 0.0;//全体评分的平均
public SVDPP() {
}
public static void main(String[] args) {
String dataPath = "resultData.txt";
SVDPP svdpp = new SVDPP();
svdpp.loadData(dataPath);
svdpp.initParam(30, 0.02, 0.01, 50);
svdpp.train();
System.out.println("Input userID...");
Scanner in = new Scanner(System.in);
while (true) {
String userID = in.nextLine();
FastList<RecommendedItem> recommendedItems = svdpp.calRecSingleUser(userID, 50);
svdpp.displayRecoItem(userID, recommendedItems);
System.out.println("Input userID...");
}
}
/**
* 初始化F,α,λ,max_iter,U,I,BU,BI,Y
*
* @param F 隐因子数目
* @param α 学习速率
* @param λ 正则化参数,以防过拟合
* @param max_iter 迭代次数
*/
@Override
public void initParam(int F, double α, double λ, int max_iter) {
System.out.println("init U,I,BU,BI...");
this.F = F;
this.α = α;
this.λ = λ;
this.max_iter = max_iter;
this.U = new FastMap<String, Double[]>();
this.I = new FastMap<String, Double[]>();
this.BU = new FastMap<String, Double>();
this.BI = new FastMap<String, Double>();
this.Y = new FastMap<String, Double[]>();
int itemCount = 0;//所有user对有过行为的item总数
Double[] randomUValue = null;
Double[] randomIValue = null;
Double[] randomYValue = null;
//对U,I,Y矩阵随机初始化
for (Entry<String, FastMap<String, Double>> entry : ratingData.entrySet()) {
String userID = entry.getKey();
this.BU.put(userID, 0.0);
itemCount += entry.getValue().size();
randomUValue = new Double[F];
for (int i = 0; i < F; i++) {
double rand = Math.random() / Math.sqrt(F);//随机数填充初始化矩阵,并和1/sqrt(F)成正比
randomUValue[i] = rand;
}
U.put(userID, randomUValue);
for (Entry<String, Double> entryItem : entry.getValue().entrySet()) {
this.sumMean += entryItem.getValue();
String itemID = entryItem.getKey();
this.BI.put(itemID, 0.0);
if (I.containsKey(itemID))
continue;
randomIValue = new Double[F];
randomYValue = new Double[F];
for (int i = 0; i < F; i++) {
double randI = Math.random() / Math.sqrt(F);
randomIValue[i] = randI;
double randY = Math.random() / Math.sqrt(F);
randomYValue[i] = randY;
}
I.put(itemID, randomIValue);
Y.put(itemID, randomYValue);
}
}
this.sumMean /= itemCount;
}
/**
* 随机梯度下降训练U,I,BU,BI,Y
*/
@Override
public void train() {
System.out.println("training U,I,BU,BI,Y...");
for (int step = 0; step < this.max_iter; step++) {
System.out.println("第" + (step + 1) + "次迭代...");
for (Entry<String, FastMap<String, Double>> entry : this.ratingData.entrySet()) {
double[] z_Item = new double[this.F];//此用户历史数据的隐偏好之和
for (String item : entry.getValue().keySet()) {
Double[] Y_Item = this.Y.get(item);
for (int f = 0; f < this.F; f++) {
z_Item[f] += Y_Item[f];
}
}
double itemLength_Sqrt = 1.0 / Math.sqrt(1.0 * entry.getValue().size());
double[] s = new double[this.F];
String userID = entry.getKey();
for (Entry<String, Double> itemRatingEntry : entry.getValue().entrySet()) {
String itemID = itemRatingEntry.getKey();
double pui = this.predictRating(userID, itemID);
double err = itemRatingEntry.getValue() - pui;//根据当前参数计算误差(真实值-预测值)
double bu = this.BU.get(userID);
bu += this.α * (err - this.λ * bu);
this.BU.put(userID, bu);
double bi = this.BI.get(itemID);
bi += this.α * (err - this.λ * bi);
this.BI.put(itemID, bi);
Double[] userValue = this.U.get(userID);
Double[] itemValue = this.I.get(itemID);
for (int i = 0; i < this.F; i++) {
s[i] += itemValue[i] * err;
double us = userValue[i];
double it = itemValue[i];
us += this.α * (err * it - this.λ * us);//后一项是来防止过拟合的正则化项,λ需要根据具体应用场景反复实验得到。损失函数的优化使用随机梯度下降算法
it += this.α * (err * (us + z_Item[i] * itemLength_Sqrt) - this.λ * it);
userValue[i] = us;
itemValue[i] = it;
}
}
for (String item : entry.getValue().keySet()) {
Double[] Y_Item = this.Y.get(item);
for (int f = 0; f < this.F; f++) {
double y = Y_Item[f];
y += this.α * (s[f] * itemLength_Sqrt - this.λ * y);
Y_Item[f] = y;
}
}
}
this.α *= 0.9;//每次迭代步长要逐步缩小
}
}
/**
* userID对itemID的评分
* U每行表示该用户对各个隐因子的偏好程度
* I每列表示该物品在各个隐患因子中的概率分布
* rating=(U+Y/sqrt(sum(ui)))*I+sumMean+BU+BI
*
* @param userID
* @param itemID
* @return
*/
@Override
public double predictRating(String userID, String itemID) {
double[] z_Item = new double[this.F];
Map<String, Double> ratingItem = this.ratingData.get(userID);
for (String item : ratingItem.keySet()) {
Double[] Y_Item = this.Y.get(item);
for (int f = 0; f < this.F; f++)
z_Item[f] += Y_Item[f];
}
double p = 0.0;
Double[] userValue = this.U.get(userID);
Double[] itemValue = this.I.get(itemID);
for (int i = 0; i < this.F; i++) {
double rating = userValue[i] + z_Item[i] / Math.sqrt(1.0 * ratingItem.size());
p += rating * itemValue[i];
}
p += this.BU.get(userID) + this.BI.get(itemID) + this.sumMean;
return p;
}
}