/
Keywords.pm
492 lines (396 loc) · 16.3 KB
/
Keywords.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# This code was forked from the LiveJournal project owned and operated
# by Live Journal, Inc. The code has been modified and expanded by
# Dreamwidth Studios, LLC. These files were originally licensed under
# the terms of the license supplied by Live Journal, Inc, which can
# currently be found at:
#
# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt
#
# In accordance with the original license, this code and all its
# modifications are provided under the GNU General Public License.
# A copy of that license can be found in the LICENSE file included as
# part of this distribution.
use strict;
use LJ::Global::Constants;
package LJ;
# this function takes an intid and returns the associated keyword/intcount.
sub get_interest {
my $intid = $_[0];
return undef unless $intid && $intid =~ /^\d+$/;
my ( $int, $intcount );
my $memkey = [$intid, "introw:$intid"];
my $cached = LJ::MemCache::get( $memkey );
# memcache row is of form [$intid, $int, $intcount];
if ( $cached && ref $cached eq 'ARRAY' ) {
( $intid, $int, $intcount ) = @$cached;
} else {
my $dbr = LJ::get_db_reader();
( $int ) =
$dbr->selectrow_array( "SELECT keyword FROM sitekeywords WHERE kwid=?",
undef, $intid );
die $dbr->errstr if $dbr->err;
( $intcount ) =
$dbr->selectrow_array( "SELECT intcount FROM interests WHERE intid=?",
undef, $intid );
die $dbr->errstr if $dbr->err;
LJ::MemCache::set( $memkey, [$intid, $int, $intcount], 3600*12 );
}
return wantarray() ? ($int, $intcount) : $int;
}
# name: LJ::get_sitekeyword_id
# des: Get the id for a global keyword.
# args: keyword, autovivify?
# des-keyword: A string keyword to get the id of.
# returns: Returns a kwid into [dbtable[sitekeywords]].
# If the keyword doesn't exist, it is automatically created for you.
# des-autovivify: If present and 1, automatically create keyword.
# If present and 0, do not automatically create the keyword.
# If not present, default behavior is the old
# style -- yes, do automatically create the keyword.
#
sub get_sitekeyword_id {
my ( $kw, $autovivify, %opts ) = @_;
$autovivify = 1 unless defined $autovivify;
# setup the keyword for use
return 0 unless defined $kw && $kw =~ /\S/;
$kw = LJ::text_trim( $kw, LJ::BMAX_SITEKEYWORD, LJ::CMAX_SITEKEYWORD );
$kw = LJ::utf8_lc( $kw ) unless $opts{allowmixedcase};
# get the keyword and insert it if necessary
my $dbr = LJ::get_db_reader();
my $kwid = $dbr->selectrow_array( "SELECT kwid FROM sitekeywords WHERE keyword=?", undef, $kw );
$kwid = defined $kwid ? $kwid + 0 : 0;
if ( $autovivify && ! $kwid ) {
# create a new keyword
$kwid = LJ::alloc_global_counter( 'K' );
return undef unless $kwid;
# attempt to insert the keyword
my $dbh = LJ::get_db_writer();
my $rv = $dbh->do( "INSERT IGNORE INTO sitekeywords (kwid, keyword) VALUES (?, ?)", undef, $kwid, $kw );
return undef if $dbh->err;
# at this point, if $rv is 0, the keyword is already there so try again
unless ( $rv ) {
$kwid = $dbh->selectrow_array( "SELECT kwid FROM sitekeywords WHERE keyword=?", undef, $kw );
$kwid = defined $kwid ? $kwid + 0 : 0;
return undef if $dbh->err;
}
}
return $kwid;
}
sub interest_string_to_list {
my $intstr = $_[0];
return unless defined $intstr;
$intstr =~ s/^\s+//; # strip leading space
$intstr =~ s/\s+$//; # strip trailing space
$intstr =~ s/\n/,/g; # newlines become commas
$intstr =~ s/\s+/ /g; # strip duplicate spaces from the interest
# final list is ,-sep
return grep { length } split (/\s*,\s*/, $intstr);
}
# This function takes a list of intids and returns the list of uids
# for accounts interested in ALL of the given interests.
#
# Args: arrayref of intids, hashref of opts
# Returns: array of uids
#
# Valid opts: nousers => 1, nocomms => 1
#
sub users_with_all_ints {
my ( $ints, $opts ) = @_;
$opts ||= {};
return unless defined $ints && ref $ints eq 'ARRAY';
my @intids = grep /^\d+$/, @$ints; # numeric only
return unless @intids;
my @tables;
push @tables, 'userinterests' unless $opts->{nousers};
push @tables, 'comminterests' unless $opts->{nocomms};
return unless @tables;
# allow restricting to user's circle
my $cids;
if ( $opts->{circle} && ( my $u = LJ::get_remote() ) ) {
my @circle = ( $u->circle_userids, $u->member_of_userids, $u->id );
$cids = join ',', @circle;
}
my $dbr = LJ::get_db_reader();
my $qs = join ',', map { '?' } @intids;
my @uids;
foreach ( @tables ) {
my $q = "SELECT userid FROM $_ WHERE intid IN ($qs)";
$q .= " AND userid IN ($cids)" if $cids;
my $uref = $dbr->selectall_arrayref( $q, undef, @intids );
die $dbr->errstr if $dbr->err;
# Count the number of times the uid appears.
# If it's the same as the number of interests, it has all of them.
my %ucount;
$ucount{ $_->[0] }++ foreach @$uref;
push @uids, grep { $ucount{$_} == scalar @intids } keys %ucount;
}
return @uids;
}
sub validate_interest_list {
my $interrors = ref $_[0] eq "ARRAY" ? shift : [];
my @ints = @_;
my @valid_ints = ();
foreach my $int (@ints) {
$int = lc($int); # FIXME: use utf8?
$int =~ s/^i like //; # *sigh*
next unless $int;
# Specific interest failures
my ($bytes,$chars) = LJ::text_length($int);
my $error_string = '';
if ($int =~ /[\<\>]/) {
$int = LJ::ehtml($int);
$error_string .= '.invalid';
} else {
$error_string .= '.bytes' if $bytes > LJ::BMAX_SITEKEYWORD;
$error_string .= '.chars' if $chars > LJ::CMAX_SITEKEYWORD;
}
if ($error_string) {
$error_string = "error.interest$error_string";
push @$interrors, [ $error_string,
{ int => $int,
bytes => $bytes,
bytes_max => LJ::BMAX_SITEKEYWORD,
chars => $chars,
chars_max => LJ::CMAX_SITEKEYWORD
}
];
next;
}
push @valid_ints, $int;
}
return @valid_ints;
}
# end package LJ functions; begin user object methods.
package LJ::User;
# $opts is optional, with keys:
# forceids => 1 : don't use memcache for loading the intids
# forceints => 1 : don't use memcache for loading the interest rows
# justids => 1 : return arrayref of intids only, not names/counts
# returns otherwise an arrayref of interest rows, sorted by interest name
#
sub get_interests {
my ( $u, $opts ) = @_;
$opts ||= {};
return undef unless LJ::isu( $u );
# first check request cache inside $u
if ( my $ints = $u->{_cache_interests} ) {
return [ map { $_->[0] } @$ints ] if $opts->{justids};
return $ints;
}
my $uid = $u->userid;
my $uitable = $u->is_community ? 'comminterests' : 'userinterests';
# load the ids
my $mk_ids = [$uid, "intids:$uid"];
my $ids;
$ids = LJ::MemCache::get($mk_ids) unless $opts->{forceids};
unless ( $ids && ref $ids eq "ARRAY" ) {
$ids = [];
my $dbh = LJ::get_db_writer();
my $sth = $dbh->prepare( "SELECT intid FROM $uitable WHERE userid=?" );
$sth->execute( $uid );
push @$ids, $_ while ($_) = $sth->fetchrow_array;
LJ::MemCache::add( $mk_ids, $ids, 3600*12 );
}
# FIXME: set a 'justids' $u cache key in this case, then only return that
# later if 'justids' is requested? probably not worth it.
return $ids if $opts->{justids};
# load interest rows
my %need;
$need{$_} = 1 foreach @$ids;
my @ret;
unless ( $opts->{forceints} ) {
if ( my $mc = LJ::MemCache::get_multi( map { [$_, "introw:$_"] } @$ids ) ) {
while ( my ($k, $v) = each %$mc ) {
next unless $k =~ /^introw:(\d+)/;
delete $need{$1};
push @ret, $v;
}
}
}
if ( %need ) {
my $ids = join( ",", map { $_ + 0 } keys %need );
my $dbr = LJ::get_db_reader();
my $ints = $dbr->selectall_hashref( "SELECT kwid, keyword FROM sitekeywords ".
"WHERE kwid IN ($ids)", 'kwid' );
my $counts = $dbr->selectall_hashref( "SELECT intid, intcount FROM interests ".
"WHERE intid IN ($ids)", 'intid' );
my $memc_store = 0;
foreach my $intid ( keys %$ints ) {
my $int = $ints->{$intid}->{keyword};
my $count = $counts->{$intid}->{intcount} + 0;
my $aref = [$intid, $int, $count];
# minimize latency... only store 25 into memcache at a time
# (too bad we don't have set_multi.... hmmmm)
if ( $memc_store++ < 25 ) {
# if the count is fairly high, keep item in memcache longer,
# since count's not so important.
my $expire = $count < 10 ? 3600*12 : 3600*48;
LJ::MemCache::add( [$intid, "introw:$intid"], $aref, $expire );
}
push @ret, $aref;
}
}
@ret = sort { $a->[1] cmp $b->[1] } @ret;
return $u->{_cache_interests} = \@ret;
}
sub interest_count {
my $u = shift;
return undef unless LJ::isu( $u );
# FIXME: fall back to SELECT COUNT(*) if not cached already?
return scalar @{ $u->get_interests( { justids => 1 } ) };
}
sub interest_list {
my $u = shift;
return undef unless LJ::isu( $u );
return map { $_->[1] } @{ $u->get_interests() };
}
sub interest_update {
my ( $u, %opts ) = @_;
return undef unless LJ::isu( $u );
my ( $add, $del ) = ( $opts{add}, $opts{del} );
return 1 unless $add || $del; # nothing to do
my $lock;
unless ( $opts{has_lock} ) {
while ( 1 ) {
$lock = LJ::locker()->trylock( 'interests:' . $u->userid );
last if $lock;
# pause for 0.0-0.3 seconds to shuffle things up. generally good behavior
# when you're contending for locks.
select undef, undef, undef, rand() * 0.3;
}
}
my %wanted_add = map { $_ => 1 } @$add;
my %wanted_del = map { $_ => 1 } @$del;
my %cur_ids = map { $_ => 1 } @{ $u->get_interests( { justids => 1, forceids => 1 } ) };
# track if we made changes to refresh memcache later.
my $did_mod = 0;
# community interests go in a different table than user interests,
# though the schemas are the same so we can run the same queries on them
my $uitable = $u->is_community ? 'comminterests' : 'userinterests';
my $uid = $u->userid;
my $dbh = LJ::get_db_writer() or die LJ::Lang::ml( "error.nodb" );
my @filtered_del = grep { delete $cur_ids{$_} && ! $wanted_add{$_} } @$del;
if ( $del && @filtered_del ) {
$did_mod = 1;
my $intid_in = join ',', map { $dbh->quote( $_ ) } @filtered_del;
$dbh->do( "DELETE FROM $uitable WHERE userid=$uid AND intid IN ($intid_in)" );
die $dbh->errstr if $dbh->err;
$dbh->do( "UPDATE interests SET intcount=intcount-1 WHERE intid IN ($intid_in)" );
die $dbh->errstr if $dbh->err;
}
my @filtered_add = grep { ! $cur_ids{$_}++ && ! $wanted_del{$_} } @$add;
if ( $add && @filtered_add ) {
# assume we've already checked maxinterests
$did_mod = 1;
my $intid_in = join ',', map { $dbh->quote( $_ ) } @filtered_add;
my $sqlp = join ',', map { "(?,?)" } @filtered_add;
$dbh->do( "REPLACE INTO $uitable (userid, intid) VALUES $sqlp",
undef, map { ( $uid, $_ ) } @filtered_add );
die $dbh->errstr if $dbh->err;
# set a zero intcount for any new ints
$dbh->do( "INSERT IGNORE INTO interests (intid, intcount) VALUES $sqlp",
undef, map { ( $_, 0 ) } @filtered_add );
die $dbh->errstr if $dbh->err;
# now do the increment for all ints
$dbh->do( "UPDATE interests SET intcount=intcount+1 WHERE intid IN ($intid_in)" );
die $dbh->errstr if $dbh->err;
}
# do migrations to clean up userinterests vs comminterests conflicts
# also clears memcache and object cache for intids if needed
$u->lazy_interests_cleanup( $did_mod );
return 1;
}
# return hashref with intname => intid
sub interests {
my ( $u, $opts ) = @_;
return undef unless LJ::isu( $u );
delete $opts->{justids} if $opts && ref $opts;
my $uints = $u->get_interests( $opts );
my %interests;
foreach my $int (@$uints) {
$interests{$int->[1]} = $int->[0]; # $interests{name} = intid
}
return \%interests;
}
sub lazy_interests_cleanup {
my ( $u, $expire ) = @_;
my $dbh = LJ::get_db_writer();
if ($u->is_community) {
$dbh->do("INSERT IGNORE INTO comminterests SELECT * FROM userinterests WHERE userid=?", undef, $u->id);
$dbh->do("DELETE FROM userinterests WHERE userid=?", undef, $u->id);
} else {
$dbh->do("INSERT IGNORE INTO userinterests SELECT * FROM comminterests WHERE userid=?", undef, $u->id);
$dbh->do("DELETE FROM comminterests WHERE userid=?", undef, $u->id);
}
# don't expire memcache unless requested
return 1 unless $expire;
LJ::memcache_kill( $u, "intids" );
$u->{_cache_interests} = undef;
return 1;
}
# des: Change a user's interests.
# des-new: listref of new interests
# returns: 1 on success, undef on failure
sub set_interests {
my ($u, $new) = @_;
$u = LJ::want_user( $u ) or return undef;
return undef unless ref $new eq 'ARRAY';
my $lock;
while ( 1 ) {
$lock = LJ::locker()->trylock( 'interests:' . $u->userid );
last if $lock;
# pause for 0.0-0.3 seconds to shuffle things up. generally good behavior
# when you're contending for locks.
select undef, undef, undef, rand() * 0.3;
}
my $old = $u->interests( { forceids => 1 } );
my %int_add = ();
my %int_del = %$old; # assume deleting everything, unless in @$new
my @valid_ints = LJ::validate_interest_list( @$new );
foreach my $int ( @valid_ints ) {
$int_add{$int} = 1 unless $old->{$int};
delete $int_del{$int};
}
### do we have new interests to add?
my @new_intids = (); ## existing IDs we'll add for this user
foreach my $int ( keys %int_add ) {
my $intid = LJ::get_sitekeyword_id( $int );
push @new_intids, $intid if $intid;
}
# Note this does NOT check against maxinterests, do that in the caller.
$u->interest_update( add => \@new_intids, del => [ values %int_del ], has_lock => 1 );
LJ::Hooks::run_hooks("set_interests", $u, \%int_del, \@new_intids); # interest => intid
return 1;
}
# arguments: hashref of submitted form and list of user's previous intids
# returns: hashref with number of ints added (or toomany) and deleted
sub sync_interests {
my ( $u, $args, @intids ) = @_;
warn "sync_interests: invalid arguments" and return undef
unless LJ::isu( $u ) and ref $args eq "HASH";
@intids = grep /^\d+$/, @intids; # numeric
my %uint = reverse %{ $u->interests }; # intid => interest
my $rv = {};
my ( @todel, @toadd );
foreach my $intid ( @intids ) {
next unless $intid > 0; # prevent adding zero or negative intid
push @todel, $intid if $uint{$intid} && ! $args->{"int_$intid"};
push @toadd, $intid if ! $uint{$intid} && $args->{"int_$intid"};
}
my $addcount = scalar( @toadd );
my $delcount = scalar( @todel );
if ( $addcount ) {
my $intcount = scalar( keys %uint ) + $addcount - $delcount;
my $maxinterests = $u->count_max_interests;
if ( $intcount > $maxinterests ) {
# let the user know they're over the limit
$rv->{toomany} = $maxinterests;
@toadd = (); # deletion still OK
}
}
$u->interest_update( add => \@toadd, del => \@todel );
$rv->{added} = $addcount;
$rv->{deleted} = $delcount;
return $rv;
}
1;