Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
branch: gh-pages
Fetching contributors…

Cannot retrieve contributors at this time

843 lines (711 sloc) 26.958 kB
<!DOCTYPE html>
<html>
<head>
<title>Recommendations Engines with Redis and Ruby</title>
<meta charset='utf-8' />
<meta content='width=1024, user-scalable=no' name='viewport' />
<!-- deck.js's core css -->
<link href="deck.js/core/deck.core.css" rel="stylesheet" type="text/css"/>
<!-- deck.js extension CSS files -->
<link href="deck.js/extensions/codemirror/deck.codemirror.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/goto/deck.goto.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/hash/deck.hash.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/menu/deck.menu.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/navigation/deck.navigation.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/scale/deck.scale.css" rel="stylesheet" type="text/css"/>
<link href="deck.js/extensions/status/deck.status.css" rel="stylesheet" type="text/css"/>
<!-- all css in the css dir: Keydown CSS, your custom CSS, and themes from deck.js -->
<link href="css/keydown.css" rel="stylesheet" type="text/css"/>
<link href="css/default.css" rel="stylesheet" type="text/css"/>
<link href="css/horizontal-slide.css" rel="stylesheet" type="text/css"/>
<link href="css/rerr.css" rel="stylesheet" type="text/css"/>
<link href="css/swiss.css" rel="stylesheet" type="text/css"/>
<!-- Modernizr (provided for legacy browsers) -->
<script src="deck.js/support/modernizr.custom.js" type="text/javascript"></script>
</head>
<body class='deck-container keydown'>
<section class='middle slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Evan Light</h1>
<h2><a href="http://twitter.com/elight">@elight</a></h2>
<h2><a href="mailto:evan.light@rackspace.com">evan.light@rackspace.com</a></h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Recommendation Engines</h1>
<h4>&nbsp;</h4>
<h1>with Redis and Ruby</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background middle ruby_dcamp slide'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background rackspace slide'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Agenda</h1>
<h2>Context</h2>
<h2>Solution</h2>
<h2>Redis-related tangents</h2>
<h2>Lessons learned</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>Context</h1>
<h2>&nbsp;</h2>
<h2>Soccer social network</h2>
<h2>Display popular &amp; relevant Posts</h2>
<h2>Near real-time</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>What the client asked for</h1>
<h2>A recommendation engine!</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Suggests possible choices for the user</h1>
<h5>&nbsp;</h5>
<h1>based on previous chocies by the user.</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Approximation</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background i_have_no_idea_what_im_doing slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>Statistics and I</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Why not statistical methods?</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Why Ruby?</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>The nouns</h1>
<h2>User</h2>
<h2>Post</h2>
<h2>Comment</h2>
<h2><strong>Team</strong></h2>
<h2><strong>Player</strong></h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>The verbs</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Submitting a Post</h1>
<h2>Polymorphic tags</h2>
<h3>Teams</h3>
<h3>Players</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Commenting on a Post</h1>
<h2>Tag (by citation) other Users</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Other verbs</h1>
<h2>Favoriting a Team or Player</h2>
<h2>Liking a Post</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='domain full-background slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>Given this...</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background slide top-left user-tag'>
<div class='spacer top'></div>
<div class='content'>
<h1>... and maybe this...</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background scored-user-posts slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>... we want <em>this</em>!</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background slide top-left user-tag'>
<div class='spacer top'></div>
<div class='content'>
<h1>That "maybe"</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>We start with ActiveRecord...</h1>
<h4>&nbsp;</h4>
<h2><img src="images/troll.jpg"/></h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Ok, ActiveRecord::Observer</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Callbacks</h1>
<h2>Post / Comment / Favorite / Like</h2>
<h3>create/destroy</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>We want two kinds of Posts</h1>
<h2>Popular</h2>
<h2>Relevant</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Popularity</h1>
<h2>Comments</h2>
<h2>Likes</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Measuring User interest</h1>
<h2><strong>Favoriting</strong> DC United</h2>
<h2><strong>Submitting</strong> a Post tagged with DC United</h2>
<h2><strong>Liking</strong> ...</h2>
<h2><strong>Commenting</strong> on ...</h2>
<h2>Being <strong>mentioned</strong> in a Comment on ...</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='dc_united full-background slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>Ok, so I used to follow DC United...</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Relevance (cont'd)</h1>
<h2>Given any individual expression of interest</h2>
<h2>For each associated Tag</h2>
<h2>For each User interested in Tag</h2>
<h2>(Re)score the User's interest in Tag</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='bottom-left full-background im_so_sorry slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><span style="font-size: 1.5em">O(n<sup>2</sup>)</span></h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background kobayashi-maru slide'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><a href="http://github.com/resque/resque">Resque</a></h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><a href="http://redis.io">Redis</a> in 120 seconds or Less</h1>
<h4>(Don't time me)</h4>
<h2>Key/value store</h2>
<h2>In memory</h2>
<h2>Few but adequate persistent options</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Redis (cont'd)</h1>
<h2>"value" can itself be a data structure:</h2>
<h3>List</h3>
<h3>Hash</h3>
<h3>Set</h3>
<h3>Sorted set</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Redis (cont'd)</h1>
<h2>Time to Live (TTLs)</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>AR::Observer push events to Resque</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Example event</h1>
<h1>&nbsp;</h1>
<p><textarea class='code' display='none' mode='javascript'>{&#x000A; event_type: "post",&#x000A; action: "create",&#x000A; id: post_id_here,&#x000A; user_id: poster_id_here,&#x000A; tags: ["Player1", "Team1"]&#x000A;}</textarea>
</p>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background load slide top-left'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='calculator_workflow full-background slide'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><em>Calculator</em></h1>
<h2>Resque worker</h2>
<h2>Strategy</h2>
<h2>Handles all persistence</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Persistence</h1>
<h2>Hide Redis behind abstraction</h2>
<h2>Duck-typeable</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Trendingness Calculator</h1>
<h2>(Re)compute individual Post's popularity</h2>
<h3>Input</h3>
<h4>Event</h4>
<h3>Output</h3>
<h4>New score for Post</h4>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Post trendingness in Redis</h1>
<h2>Stored as key-value pairs</h2>
<h2>3 day TTL when set</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><em>User Interest Calculator</em></h1>
<h2>(Re)compute User's interest in Tags</h2>
<h3>Input</h3>
<h4>Event</h4>
<h3>Output</h3>
<h4>Updated User interest per Tag</h4>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>User interest in Redis</h1>
<h2>Hash per User</h2>
<h2>Key: User ID</h2>
<h2>Field names: Tags</h2>
<h2>Values: scalar representation of User's interest by Tag</h2>
<h2>Never expire by intent</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='bottom-left full-background recompute_all_the_users slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Post Score Calculator</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1><em>Post Score Calculator</em></h1>
<h2>Recompute User interests for all effected users</h2>
<h2>Input</h2>
<h3>Event</h3>
<h3>User's interests per Tag from User Interest Calculator</h3>
<h2>O(n<sup>2</sup>)</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>But <a href="http://en.wikipedia.org/wiki/Inverted_index">inverted indices</a> helped</h1>
<h2>Index content to location of content</h2>
<h2>Redis Set of Post IDs keyed by Tag</h2>
<h2>Redis Set of interested User IDs key by Tag</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>User Post scores in Redis</h1>
<h2>Hash</h2>
<h2>Key: User ID</h2>
<h2>Field: Post ID</h2>
<h2>Value: User interest score</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='calculator_workflow full-background slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>What is this?</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='full-background series_of_queues slide'>
<div class='spacer top'></div>
<div class='content'>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Design considerations</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h2>Aggressively optimized to reduce DB queries</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h2>Cache interesting primary keys in Redis Sets</h2>
<h2>Reduces need for RDBMS queries</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h2>For example</h2>
<h3>User's IDs interested in a particular Tag</h3>
<h3>Post IDs related to a particular Tag</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h2>Break up <em>Calculator</em></h2>
<h2>Wrap each <em>X Calculator</em> in a Resque worker</h2>
<h2>Each <em>X Calculator</em> becomes Resque worker</h2>
<h2>Allows <em>X Calculator</em> to scale independently</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Ideal?</h1>
<h2>Not really</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Ideal enough?</h1>
<h2>It ran successfully in production!</h2>
<h2>Customer was happy!</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Would it scale?</h1>
<h2>Limited by Redis (memory)</h2>
<h2>Limited by O(n<sup>2</sup>)</h2>
<h2>Also, pruning</h2>
<h4>(more on that later)</h4>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Emergent behavior</h1>
<h2>Everything is "taggable" in app</h2>
<h2>Most AR object lifecycle queued as events</h2>
<h2>Engine scored <strong>every</strong> Tag relationship...</h2>
<h2>... including how interested I am in other people!</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Side business as dating site? ;-)</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Lesson learned</h1>
<h2>Statistical methods</h2>
<h3>Because O(n<sup>2</sup>) burnses us preciousssss</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Lesson learned</h1>
<h2>Prefer straight K-V over Hash</h2>
<h3>No per field TTL on Hash</h3>
<h3>Replace with convoluted key names for namespacing</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='calculator_workflow full-background slide top-left'>
<div class='spacer top'></div>
<div class='content'>
<h1>Extract smaller workers</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Lesson learned</h1>
<h2>Less chattiness with Redis</h2>
<h3>Redis Lua scripting</h3>
<h4>Wasn't an option ~2 years ago</h4>
<h4>&nbsp;</h4>
<h3>Pipelining</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Lesson learned</h1>
<h2>Pruning</h2>
<h3>Set of Post IDs keyed by Tag</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Lesson learned</h1>
<h2>Runtime faster than Ruby</h2>
<h2>&nbsp;</h2>
<h1><img src="images/troll.jpg"/></h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Conclusion</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Redis is terrific</h1>
<h3>As long as you have enough RAM</h3>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Inverted indices</h1>
<h2>Powerful in K-V data stores</h2>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Make constraints work for you</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Monolithic, but easily divisable, worker</h1>
</div>
<div class='spacer bottom'></div>
</section>
<section class='middle slide'>
<div class='spacer top'></div>
<div class='content'>
<h1>Evan Light</h1>
<h2><a href="http://twitter.com/elight">@elight</a></h2>
<h2><a href="mailto:evan.light@rackspace.com">evan.light@rackspace.com</a></h2>
</div>
<div class='spacer bottom'></div>
</section>
<!-- deck.js navigation extension -->
<a class='deck-prev-link' href='#' title='Previous'>&#8592;</a>
<a class='deck-next-link' href='#' title='Next'>&#8594;</a>
<!-- deck.js hash extension -->
<a class='deck-permalink' href='.' title='Permalink to this slide'>#</a>
<!-- deck.js status extension -->
<p class='deck-status'>
<span class='deck-status-current'></span>
/
<span class='deck-status-total'></span>
</p>
<!-- jQuery & deck.js -->
<script src="deck.js/support/jquery.1.6.4.min.js" type="text/javascript"></script>
<script src="deck.js/core/deck.core.js" type="text/javascript"></script>
<!-- deck.js extension JS files -->
<script src="deck.js/extensions/codemirror/codemirror.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/deck.codemirror.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/clike/clike.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/clojure/clojure.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/coffeescript/coffeescript.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/css/css.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/diff/diff.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/haskell/haskell.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/htmlmixed/htmlmixed.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/javascript/javascript.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/lua/lua.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/php/php.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/plsql/plsql.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/python/python.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/r/r.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/rst/rst.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/ruby/ruby.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/scheme/scheme.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/smalltalk/smalltalk.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/sparql/sparql.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/stex/stex.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/velocity/velocity.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/xml/xml.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/xmlpure/xmlpure.js" type="text/javascript"></script>
<script src="deck.js/extensions/codemirror/mode/yaml/yaml.js" type="text/javascript"></script>
<script src="deck.js/extensions/goto/deck.goto.js" type="text/javascript"></script>
<script src="deck.js/extensions/hash/deck.hash.js" type="text/javascript"></script>
<script src="deck.js/extensions/menu/deck.menu.js" type="text/javascript"></script>
<script src="deck.js/extensions/navigation/deck.navigation.js" type="text/javascript"></script>
<script src="deck.js/extensions/scale/deck.scale.js" type="text/javascript"></script>
<script src="deck.js/extensions/status/deck.status.js" type="text/javascript"></script>
<!-- your custom JS here, including call to initialize deck.js-codemirror -->
<script src="js/rerr.js" type="text/javascript"></script>
<!-- Initialize the deck. -->
<script type='text/javascript'>
$(function() { $.deck('.slide'); });
</script>
</body>
</html>
Jump to Line
Something went wrong with that request. Please try again.