In [3]:
%matplotlib inline
import graphlab
import graphlab.aggregate as agg
import numpy as np
import matplotlib.pyplot as plt  
import string
from nltk import PorterStemmer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

stemmer = PorterStemmer()

plt.style.use('ggplot')

# Load the data and remove unneeded columns

In [6]:
sf = graphlab.SFrame('./data/boardgames-no-comments')
sf.remove_columns(['age', 'average', 'bayesaverage', 'maxplayers', 'maxplaytime', 'minplayers', 'minplaytime', 'owned', 'playingtime', 'stddev', 'trading', 'wanting', 'wishing', 'yearpublished'])

categories,description,id,mechanics
"[Book, Miniatures, Modern Warfare, Wargame] ...",Micro Armour: The Game - Modern provides players ...,26717,"[Dice Rolling, Point to Point Movement, ..."
"[Economic, Negotiation, Sports] ...",Introducing the first ever MONOPOLY game that ...,26718,"[Auction/Bidding, Roll / Spin and Move, Set ..."
[Word Game],Loose Connections is billed as &quot;The Game ...,26719,[Roll / Spin and Move]
"[American West, Educational, Trivia] ...",Klondike is a trivia game that allows players to ...,26720,[Roll / Spin and Move]
[Card Game],Marjapussi (Berry bag) is a marriage-style trick- ...,26721,"[Partnerships, Trick- taking] ..."
"[Bluffing, Card Game, Dice, Exploration, ...",Werewolf's Castle is an unique combination ...,26722,"[Dice Rolling, Memory, Roll / Spin and Move] ..."
[Trivia],The game is simply a collection of 400 ...,26723,[]
"[American West, Card Game, Mythology, Novel- ...",Pecos Bill is a card game played with a slightly ...,26724,[]
[Dice],A German game based on one the most popular ...,26725,"[Acting, Roll / Spin and Move] ..."
"[American West, Movies / TV / Radio theme] ...","A game for 2-4 players, a tie-in to the TV series ...",26726,[]

name,subdomains,usersrated
Modern Micro Armour: The Game ...,[],7
Monopoly: My NHL,[],8
Loose Connections,[],2
Klondike: Trivia Game on the Yukon ...,[],1
Marjapussi,[],13
Werewolf's Castle,[],5
Foodie Craze,[],0
Pecos Bill,[],2
Der Goldene Schuss,[],0
Die Leute von der Shiloh- Ranch ...,[],0


# Expand the category columns and remove all the expansions

In [7]:
sf['categories'] = sf[['categories']].apply(lambda x: {w: 1 for w in x['categories']})
sf['mechanics'] = sf[['mechanics']].apply(lambda x: {w: 1 for w in x['mechanics']})
sf['subdomains'] = sf[['subdomains']].apply(lambda x: {w: 1 for w in x['subdomains']})
sf = sf[sf.apply(lambda x: 'Expansion for Base-game' not in x['categories'])]
len(sf)

69407

In [18]:
categories = set().union(*sf[['categories']].unstack('categories')[0]['List of categories'])
subdomains = set().union(*sf[['subdomains']].unstack('subdomains')[0]['List of subdomains'])
mechanics = set().union(*sf[['mechanics']].unstack('mechanics')[0]['List of mechanics'])

# Create a topic model based on categories

In [26]:
category_model = graphlab.topic_model.create(sf['categories'], num_topics=15, num_iterations=500)

PROGRESS: Learning a topic model
PROGRESS:        Number of documents     69407
PROGRESS:            Vocabulary size        83
PROGRESS:    Running collapsed Gibbs sampling
PROGRESS: +-----------+---------------+----------------+-----------------+
PROGRESS: | Iteration | Elapsed Time  | Tokens/Second  | Est. Perplexity |
PROGRESS: +-----------+---------------+----------------+-----------------+
PROGRESS: | 10        | 483.529ms     | 3.43165e+06    | 0               |
PROGRESS: | 20        | 928.739ms     | 3.55098e+06    | 0               |
PROGRESS: | 30        | 1.41s         | 2.5584e+06     | 0               |
PROGRESS: | 40        | 1.85s         | 3.71126e+06    | 0               |
PROGRESS: | 50        | 2.31s         | 3.64185e+06    | 0               |
PROGRESS: | 60        | 2.75s         | 3.71297e+06    | 0               |
PROGRESS: | 70        | 3.20s         | 3.68051e+06    | 0               |
PROGRESS: | 80        | 3.64s         | 3.75246e+06    | 0               |
PR

In [70]:
import graphlab.aggregate as agg
topics = category_model.get_topics()
topics = topics.pack_columns(['score', 'word'], dtype=dict, new_column_name='data').groupby('topic', operations={'children': agg.CONCAT('data')})

In [69]:
import json
from IPython.display import Javascript
Javascript("""
           window.topics={};
           """.format(json.dumps(list(topics))))

<IPython.core.display.Javascript object>

In [112]:
from IPython.core.display import HTML
HTML("""
<style>

circle {
  fill: rgb(31, 119, 180);
  fill-opacity: .25;
  stroke: rgb(31, 119, 180);
  stroke-width: 1px;
}

.leaf circle {
  fill: #ff7f0e;
  fill-opacity: 1;
}

text {
  font: 10px sans-serif;
}

</style>
""")

In [71]:
%%javascript
require.config({
    paths: {
        d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
    }
});

<IPython.core.display.Javascript object>

In [129]:
%%javascript


require(['d3'], function(d3){
    
    //a weird idempotency thing
  $("#chart1").remove();
  //create canvas
  element.append("<div id='chart1'></div>");
  $("#chart1").width("960px");
  $("#chart1").height("600px");        
  var margin = {top: 20, right: 20, bottom: 30, left: 40};
  var width = 880 - margin.left - margin.right;
  var height = 800 - margin.top - margin.bottom;
  var diameter = height;
    var format = d3.format(",d");
    
var root;

    root = { topic: 'all', score: 1.0, children: topics };
    console.log(root);
    
    var pack = d3.layout.pack()
    .size([diameter - 4, diameter - 4])
    .value(function(d) { return d.score; });
    
  var svg = d3.select("#chart1").append("svg")
    .style("position", "relative")
    .style("max-width", "960px")
    .attr("width", width + "px")
    .attr("height", (height + 50) + "px")
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
    

  var node = svg.datum(root).selectAll(".node")
      .data(pack.nodes)
    .enter().append("g")
      .attr("class", function(d) { return d.children ? "node" : "leaf node"; })
      .attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; });

  node.append("title")
      .text(function(d) { return d.value < 0.1 ? '' : (d.topic || d.word); });

  node.append("circle")
      .attr("r", function(d) { return d.r; });

  node.filter(function(d) { return !d.children; }).append("text")
      .attr("dy", ".3em")
      .style("text-anchor", "middle")
      .text(function(d) { return d.value < 0.1 ? '' : (d.topic || d.word); });

    

});

<IPython.core.display.Javascript object>

In [28]:
sf['category_topic']= category_model.predict(sf['categories'])

In [32]:
sf.head(9)

categories,description,id,mechanics
"{'Miniatures': 1, 'Book': 1, 'Modern Warfare': 1, ...",Micro Armour: The Game - Modern provides players ...,26717,"{'Point to Point Movement': 1, 'Dice ..."
"{'Negotiation': 1, 'Economic': 1, 'Sports': ...",Introducing the first ever MONOPOLY game that ...,26718,"{'Roll / Spin and Move': 1, 'Set Collection': 1, ..."
{'Word Game': 1},Loose Connections is billed as &quot;The Game ...,26719,{'Roll / Spin and Move': 1} ...
"{'Educational': 1, 'Trivia': 1, 'American ...",Klondike is a trivia game that allows players to ...,26720,{'Roll / Spin and Move': 1} ...
{'Card Game': 1},Marjapussi (Berry bag) is a marriage-style trick- ...,26721,"{'Trick-taking': 1, 'Partnerships': 1} ..."
"{'Humor': 1, 'Dice': 1, 'Fantasy': 1, ...",Werewolf's Castle is an unique combination ...,26722,"{'Roll / Spin and Move': 1, 'Dice Rolling': 1, ..."
{'Trivia': 1},The game is simply a collection of 400 ...,26723,{}
"{'Card Game': 1, 'Mythology': 1, 'Amer ...",Pecos Bill is a card game played with a slightly ...,26724,{}
{'Dice': 1},A German game based on one the most popular ...,26725,"{'Acting': 1, 'Roll / Spin and Move': 1} ..."

name,subdomains,usersrated,category_topic
Modern Micro Armour: The Game ...,{},7,1
Monopoly: My NHL,{},8,5
Loose Connections,{},2,3
Klondike: Trivia Game on the Yukon ...,{},1,3
Marjapussi,{},13,11
Werewolf's Castle,{},5,0
Foodie Craze,{},0,3
Pecos Bill,{},2,12
Der Goldene Schuss,{},0,11
