# Explaining prediction

This notebook shows how to visualize weights of learned Vowpal Wabbit model in human readable way.

At first, please train example model by running `tests/benchmarks/pretrain_model.py`.

In [1]:
import json
import random

from subwabbit import VowpalWabbitBaseFormatter, VowpalWabbitProcess

We will define formatter that converts feature dicts into Vowpal Wabbit's Input Format. Important part here is overriding method `parse_element`:

In [2]:
class BenchmarkFormatter(VowpalWabbitBaseFormatter):

    def get_common_line_part(self, common_features, debug_info=None):
        """
        Return part of VW line with features that are common for one call of predict/train.

        :param common_features: Features common for all items
        :param debug_info: Optional dict that can be filled by information useful for debugging
        :return: Part of line that is common for each item in one call. Returned string have to start with '|' symbol.
        """
        return ' '.join([
            '|d {}'.format(' '.join('d{}:{:.2f}'.format(k, v) for k, v in common_features['d'].items())),
            '|e {}'.format(' '.join('e{}:{:.2f}'.format(k, v) for k, v in common_features['e'].items())),
            '|f {}'.format(' '.join('f{}'.format(f) for f in common_features['f'])),
            '|g {}'.format(' '.join('g{}:{:.2f}'.format(k, v) for k, v in common_features['g'].items())),
            '|h h{}'.format(common_features['h']),
            '|i i{}'.format(common_features['i'])
        ])

    def get_item_line_part(self, common_features, item_features, debug_info=None):
        """
        Return part of VW line with features specific for each item/entity.

        :param common_features: Features common for all items
        :param item_features: Features for item
        :param debug_info: Optional dict that can be filled by information useful for debugging
        :return: Part of line that is specific for item. Returned string should not has any namespaces
                 or it has to start with '|' symbol.
        """
        return ' '.join([
            '|a a{}'.format(item_features['a']),
            '|b b{}'.format(item_features['b']),
            '|c {}'.format(' '.join('c{}'.format(c) for c in item_features['c']))
        ])
    
    def parse_element(self, element, feature_translator=None):
        """
        This method is supposed to translate namespace name and feature name to human readable form.

        For example, element can be "a_item_id^i123" and result can be ('Item ID', 'News of the day: ID of item is 123')

        :param element: namespace name and feature name, e.g. a_item_id^i123
        :param feature_translator: Any object that can help you with translation, e.g. some database connection
        :return: (human understandable namespace name, human understandable feature name)
        """
        if '^' in element:
            namespace, feature_name = element.split('^')
        else:
            namespace = None
            feature_name = element
        
        if namespace == 'a':
            return 'Item ID', feature_name
        elif namespace == 'b':
            return 'Item property', feature_name
        elif namespace == 'c':
            return 'Item tag', feature_name
        elif namespace in {'d', 'e', 'f', 'g', 'h', 'i'}:
            return 'Request feature {}'.format(namespace.upper()), feature_name
        return 'Unknown feature {}'.format(namespace), feature_name
        

Then we instantiate Vowpal Wabbit process in audit mode.

In [3]:
formatter = BenchmarkFormatter()

model = VowpalWabbitProcess(
    formatter=formatter,
    vw_args=['-i', '../tests/benchmarks/model.vw'],
    audit_mode=True
)

Load some example features used in training model.

In [4]:
with open('../tests/benchmarks/requests.json') as f:
    requests = json.load(f)
with open('../tests/benchmarks/items.json') as f:
    items = json.load(f)

In [5]:
requests[0]

{'d': {},
 'e': {},
 'f': [],
 'g': {'0dceb': 0.0027175746,
  '1b7db': 0.0058458746,
  '1e5ee': 0.0095307727,
  '3a5f9': 0.0051918319,
  '3bb58': 0.0030790292,
  '45e03': 0.0025652254,
  '4e455': 0.0111844653,
  '57ebe': 0.0632717824,
  '5e800': 0.0039546815,
  '61180': 0.08349630250000001,
  '624c5': 0.0076227528,
  '73c83': 0.0026011386,
  '78bbb': 0.150152073,
  '7a677': 0.027146533100000002,
  '856b1': 0.0051462646,
  '8a66c': 0.006105892700000001,
  '8ea1e': 0.0044644849,
  '95766': 0.0048473435,
  '9a32e': 0.0319729173,
  '9ca90': 0.0069261085,
  'a896b': 0.034539330300000004,
  'b1e29': 0.0267581808,
  'b4e62': 0.0205558879,
  'b8e32': 0.017786406600000002,
  'bdcb5': 0.1895181926,
  'bdffc': 0.005942737500000001,
  'c7495': 0.018038030400000002,
  'c7bc7': 0.0151302432,
  'c8829': 0.0032719483000000003,
  'ccbb0': 0.0045337064,
  'cdf49': 0.1394195405,
  'd299a': 0.0104898966,
  'e3743': 0.0469601029,
  'f0128': 0.0043615247,
  'f2dff': 0.0025652254,
  'f9d39': 0.01390013190000

In [6]:
items[0]

{'a': 5741672901246703469, 'b': 39, 'c': [272, 10]}

Translate features into Vowpal Wabbit Input Format:

In [7]:
vw_line = formatter.get_vw_line(
    common_line_part=formatter.get_common_line_part(
        common_features=requests[random.randint(0, len(requests) - 1)]
    ),
    item_line_part=formatter.get_item_line_part(
        common_features=None,
        item_features=items[random.randint(0, len(items) - 1)]
    )
)

In [8]:
vw_line

'|d d374:0.17 d30:0.33 d266:0.17 d10:0.17 d53:0.17 |e e243:0.10 e51:0.10 e81:0.10 e164:0.10 e262:0.20 e429:0.10 e404:0.30 |f f2099715132508010584 f16347014626840415796 f5460200835877347079 f15901405900115419411 f2868430111867810921 f16392528256463451848 f5035192394965831262 f16445323628105595511 f16833477272344963305 f646294422815440915 |g g5e800:0.01 g8a2bd:0.05 gd9b64:0.02 g5a110:0.01 gccbb0:0.28 gcdf49:0.00 g75702:0.02 gc8829:0.05 g856b1:0.02 gb6297:0.02 gbdcb5:0.01 geaf90:0.00 g69ddb:0.01 gd787f:0.02 gc1e24:0.01 gfd512:0.02 g1a16a:0.02 g1b7db:0.09 g57ebe:0.01 g8eab9:0.01 g8a66c:0.01 gcb163:0.01 gfc79f:0.00 gdeb91:0.00 gd299a:0.10 gaf8a2:0.00 g9a32e:0.01 g1e5ee:0.00 gf0d48:0.02 gde9d9:0.03 gc7495:0.00 g89fee:0.07 g3bb58:0.02 g8c856:0.00 gb9ea0:0.01 g5b708:0.03 |h h69691 |i iaab32 |a a1760747215799128656 |b b272 |c c49 c22'

Let Vowpal Wabbit explains how it calculated score. Be aware when using link function like `logistic`, the you have to pass `link_function=True`.

In [13]:
score, explanation_string = model.explain_vw_line(vw_line, link_function=True)

In [14]:
score

-0.130156

Explanation string looks like this:

In [15]:
print(explanation_string[:150] + '...') 

b^b272*e^e164:29794476:0.1:-0.300118@0	a^a1760747215799128656*e^e164:18321926:0.1:-0.300118@0	c^c49*h^h69691:18276055:1:0.0167762@0	c^c49*f^f163470146...


Let formatter to parse it:

In [12]:
formatter.get_human_readable_explanation_html(explanation_string)

[{'hashindex': '29794476',
  'names': [('Item property', 'b272'), ('Request feature E', 'e164')],
  'original_feature_name': 'b^b272*e^e164',
  'potential': -0.0300118,
  'relative_potential': 0.04402731723192768,
  'value': 0.1,
  'weight': -0.300118},
 {'hashindex': '18321926',
  'names': [('Item ID', 'a1760747215799128656'),
   ('Request feature E', 'e164')],
  'original_feature_name': 'a^a1760747215799128656*e^e164',
  'potential': -0.0300118,
  'relative_potential': 0.04402731723192768,
  'value': 0.1,
  'weight': -0.300118},
 {'hashindex': '18276055',
  'names': [('Item tag', 'c49'), ('Request feature H', 'h69691')],
  'original_feature_name': 'c^c49*h^h69691',
  'potential': 0.0167762,
  'relative_potential': 0.024610689107160022,
  'value': 1.0,
  'weight': 0.0167762},
 {'hashindex': '15669716',
  'names': [('Item tag', 'c49'),
   ('Request feature F', 'f16347014626840415796')],
  'original_feature_name': 'c^c49*f^f16347014626840415796',
  'potential': -0.0163225,
  'relative_p

You can also use HTML version of previous method:

In [23]:
formatter.print_human_readable_explanation(explanation_string, max_rows=100)

Relative potential,Potential,Value,Weight,Feature name
,0.0094,1.0,0.0094,Item ID: a9357144256753103115  IN COMBINATION WITH  Request feature I: i0708d
,0.0093,1.0,0.0093,Item property: b515  IN COMBINATION WITH  Request feature I: i0708d
,-0.0072,0.58,-0.0124,Item ID: a9357144256753103115  IN COMBINATION WITH  Request feature G: g57ebe
,-0.0055,0.58,-0.0094,Item tag: c28  IN COMBINATION WITH  Request feature G: g57ebe
,-0.0046,1.0,-0.0046,Item ID: a9357144256753103115
,-0.004,1.0,-0.004,Item tag: c28  IN COMBINATION WITH  Request feature I: i0708d
,0.0038,1.0,0.0038,Item tag: c35  IN COMBINATION WITH  Request feature H: h69691
,-0.0034,0.58,-0.0059,Request feature G: g57ebe
,0.0031,1.0,0.0031,Item property: b515  IN COMBINATION WITH  Request feature H: h69691
,0.003,0.42,0.0071,Item ID: a9357144256753103115  IN COMBINATION WITH  Request feature G: g33d65
