## Comparing Association Rules to Word2Vec models

### Required packages

Use Pip to install: 
* The fim package from Christian Borgelt  http://www.borgelt.net/fpm.html
* The gensim package from Radim Hurek http://radimrehurek.com/gensim/models/word2vec.html

In [4]:
import  fim, gensim, gensim.corpora

In [5]:
from fim import arules
from gensim.corpora import Dictionary

### Inputs & Pre-process
* Vectors of words
* Dictionary and word index

In [6]:
sentences = [
    ['Century', 'Developed', 'Easily', 'Field', 
     'Formalize', 'Logic', 'Mathematical', 
     'Mathematics', 'Reasoning', 'Symbols'], 
    ['Dilemma', 'Disambiguation', 'Good', 
     'Group', 'Logic', 'Mathematics',
    'Meanings', 'Problems', 'Solutions', 'Song'], 
    ['Called', 'Diagram', 'Disjunction', 
     'Displaystyle', 'Exclusive', 'False',
    'Inclusive', 'Inputs', 'Logic', 'Lor', 
     'Operation', 'Scriptstyle', 'Takes',
    'True', 'Venn'], 
    ['ASCII', 'ATL', 'Cote', 'False', 'Flips',
     'Input', 'Logic', 'Logical','Negation', 
     'Operation', 'Output', 'Returns', 'Takes', 'True'], 
    ['Block', 'Called', 'Component', 'Configurable',
     'Configured', 'FPGA','Gates', 'Hold', 
     'Interconnected', 'Logic', 'Number', 'Simplified'], 
    ['Constructed', 'Engineered', 'Experiment',
     'Experimental', 'Language','Languages', 
     'Linguistics', 'Logic', 'Logical', 'Philosophical', 
    'Philosophy', 'Types'], 
    ['Attempts', 'Deducted', 'Deduction', 'Deductive', 
     'Expenditure', 'Give','Logic', 'Logical', 
     'Model', 'Natural', 'Naturally', 'Occurs',
     'Profits','Reasoning', 'Taxation'], 
    ['ASCII', 'ATL', 'Cote', 'False', 'Flips',
     'Input', 'Logic', 'Logical','Negation', 
     'Operation', 'Output', 'Returns', 'Takes', 'True'], 
    ['Commonly', 'Discourse', 'Element', 'Existence',
     'Logic', 'Mirrored','Proposition', 'Quantifier', 
     'True', 'Universe', 'Written'], 
    ['Aristotle', 'Grammar', 'Implication', 'Logic',
     'Man', 'Men','Mortal', 'Suggest', 'Syllogism',
     'True', 'Wednesday']
]

dictionary = Dictionary()
for sentence in sentences:
        dictionary.doc2bow(sentence, allow_update=True, return_missing=True)
inv_map = {v: k for k, v in dictionary.items()}

In [7]:
fimdata=[]
for sentence in sentences:
    in_numbers =[inv_map.get(word)for word in sentence]
    fimdata.append(in_numbers) 

## Parameters
* Minimum number of cases in evidence common to both
* Minimum words in Association Rule, shorter rules more numerous
* See import package documentation for options

In [8]:
min_examples=3
min_setsize=4

## Make models

In [9]:
w2v_model = gensim.models.Word2Vec\
(sentences, min_count=min_examples,size=1000)

In [10]:
association_rules = arules\
(fimdata, supp=-min_examples, zmin=min_setsize, report='a')

## Compare results

In [11]:
for conclusion,premise,count in association_rules:
    cue = [dictionary[x] for x in premise]
    matches = w2v_model.most_similar(positive=cue, topn=2)
    response = [m for m,c in matches]
    print(cue,"AR->",dictionary[conclusion],"<-W2V",response)

['Operation', 'False', 'True'] AR-> Logic <-W2V ['Logic', 'Takes']
['Operation', 'False', 'Logic'] AR-> True <-W2V ['True', 'Takes']
['Operation', 'True', 'Logic'] AR-> False <-W2V ['False', 'Takes']
['False', 'True', 'Logic'] AR-> Operation <-W2V ['Takes', 'Operation']
['Takes', 'False', 'True'] AR-> Logic <-W2V ['Logic', 'Operation']
['Takes', 'False', 'Logic'] AR-> True <-W2V ['True', 'Operation']
['Takes', 'True', 'Logic'] AR-> False <-W2V ['False', 'Operation']
['False', 'True', 'Logic'] AR-> Takes <-W2V ['Takes', 'Operation']
['Takes', 'Operation', 'True'] AR-> Logic <-W2V ['Logic', 'False']
['Takes', 'Operation', 'Logic'] AR-> True <-W2V ['True', 'False']
['Takes', 'True', 'Logic'] AR-> Operation <-W2V ['False', 'Operation']
['Operation', 'True', 'Logic'] AR-> Takes <-W2V ['False', 'Takes']
['Takes', 'Operation', 'False'] AR-> Logic <-W2V ['True', 'Logic']
['Takes', 'Operation', 'Logic'] AR-> False <-W2V ['True', 'False']
['Takes', 'False', 'Logic'] AR-> Operation <-W2V ['True',