In [1]:
import os
import Orange
from orangecontrib.associate.fpgrowth import *

In [2]:
# the pwd assignment is needed for using it via docker.
# absolute path as interpreted by the container has to be specified
pwd: str = os.environ['HOME'] + '/work/assignment/assignment-4'

In [3]:
titanic = Orange.data.Table(pwd + '/Titanic.csv')
titanic.domain

[Class, Sex, Age | Survived]

In [4]:
titanic.domain.attributes

(DiscreteVariable(name='Class', values=['1st', '2nd', '3rd', 'Crew']),
 DiscreteVariable(name='Sex', values=['Female', 'Male']),
 DiscreteVariable(name='Age', values=['Adult', 'Child']))

In [5]:
titanic.domain.class_var

DiscreteVariable(name='Survived', values=['No', 'Yes'])

#### Transform titanic into a table X that has “one-hot” vector for each of the 4 variables

In [6]:
X, mapping = OneHot.encode(titanic, include_class=True)

In [7]:
X.shape

(2201, 10)

In [8]:
mapping.items()

dict_items([(0, (0, 0)), (1, (0, 1)), (2, (0, 2)), (3, (0, 3)), (4, (1, 0)), (5, (1, 1)), (6, (2, 0)), (7, (2, 1)), (8, (3, 0)), (9, (3, 1))])

#### Get the item names from the indices

In [9]:
decoded_names = {item: '{}={}'.format(var.name, val)
          for item, var, val in OneHot.decode(mapping, titanic, mapping)}
decoded_names

{0: 'Class=1st',
 1: 'Class=2nd',
 2: 'Class=3rd',
 3: 'Class=Crew',
 4: 'Sex=Female',
 5: 'Sex=Male',
 6: 'Age=Adult',
 7: 'Age=Child',
 8: 'Survived=No',
 9: 'Survived=Yes'}

#### Look at the transformed target variable indices

In [10]:
decoded_class_names = {item 
               for item, var, _ in OneHot.decode(mapping, titanic, mapping) 
               if var is titanic.domain.class_var}
decoded_class_names

{8, 9}

#### Mine the frequent patterns with support threshold as 0.1

In [11]:
item_sets = dict(frequent_itemsets(X, .1))
len(item_sets)

35

In [12]:
list(item_sets)

[frozenset({0}),
 frozenset({1}),
 frozenset({2}),
 frozenset({3}),
 frozenset({4}),
 frozenset({5}),
 frozenset({2, 5}),
 frozenset({3, 5}),
 frozenset({6}),
 frozenset({0, 6}),
 frozenset({1, 6}),
 frozenset({2, 6}),
 frozenset({3, 6}),
 frozenset({4, 6}),
 frozenset({5, 6}),
 frozenset({2, 5, 6}),
 frozenset({3, 5, 6}),
 frozenset({8}),
 frozenset({2, 8}),
 frozenset({3, 8}),
 frozenset({5, 8}),
 frozenset({2, 5, 8}),
 frozenset({3, 5, 8}),
 frozenset({6, 8}),
 frozenset({2, 6, 8}),
 frozenset({3, 6, 8}),
 frozenset({5, 6, 8}),
 frozenset({2, 5, 6, 8}),
 frozenset({3, 5, 6, 8}),
 frozenset({9}),
 frozenset({4, 9}),
 frozenset({5, 9}),
 frozenset({6, 9}),
 frozenset({4, 6, 9}),
 frozenset({5, 6, 9})]

#### Generate rules on top of item_sets with confidence threshold 0.7

In [13]:
rules = [(P, Q, supp, conf)
          for P, Q, supp, conf in association_rules(item_sets, .7)
          if len(Q) == 1 and Q & decoded_class_names]
rules

[(frozenset({2, 5, 6}), frozenset({8}), 387, 0.8376623376623377),
 (frozenset({3, 5, 6}), frozenset({8}), 670, 0.777262180974478),
 (frozenset({2, 5}), frozenset({8}), 422, 0.8274509803921568),
 (frozenset({3, 5}), frozenset({8}), 670, 0.777262180974478),
 (frozenset({2, 6}), frozenset({8}), 476, 0.759170653907496),
 (frozenset({3, 6}), frozenset({8}), 673, 0.7604519774011299),
 (frozenset({5, 6}), frozenset({8}), 1329, 0.7972405518896221),
 (frozenset({4, 6}), frozenset({9}), 316, 0.7435294117647059),
 (frozenset({2}), frozenset({8}), 528, 0.7478753541076487),
 (frozenset({3}), frozenset({8}), 673, 0.7604519774011299),
 (frozenset({5}), frozenset({8}), 1364, 0.7879838243789717),
 (frozenset({4}), frozenset({9}), 344, 0.7319148936170212)]

#### Convert the indices in the rules to item names

In [14]:
for ante, cons, supp, conf in rules:
     print(', '.join(decoded_names[i] for i in ante), '-->',
           decoded_names[next(iter(cons))],
           '(supp: {}, conf: {})'.format(supp, conf))


Class=3rd, Sex=Male, Age=Adult --> Survived=No (supp: 387, conf: 0.8376623376623377)
Class=Crew, Sex=Male, Age=Adult --> Survived=No (supp: 670, conf: 0.777262180974478)
Class=3rd, Sex=Male --> Survived=No (supp: 422, conf: 0.8274509803921568)
Class=Crew, Sex=Male --> Survived=No (supp: 670, conf: 0.777262180974478)
Class=3rd, Age=Adult --> Survived=No (supp: 476, conf: 0.759170653907496)
Class=Crew, Age=Adult --> Survived=No (supp: 673, conf: 0.7604519774011299)
Sex=Male, Age=Adult --> Survived=No (supp: 1329, conf: 0.7972405518896221)
Sex=Female, Age=Adult --> Survived=Yes (supp: 316, conf: 0.7435294117647059)
Class=3rd --> Survived=No (supp: 528, conf: 0.7478753541076487)
Class=Crew --> Survived=No (supp: 673, conf: 0.7604519774011299)
Sex=Male --> Survived=No (supp: 1364, conf: 0.7879838243789717)
Sex=Female --> Survived=Yes (supp: 344, conf: 0.7319148936170212)
