Skip to content

Commit

Permalink
docs update
Browse files Browse the repository at this point in the history
  • Loading branch information
sidharthms committed Apr 18, 2018
1 parent 4df4e84 commit 584cd22
Show file tree
Hide file tree
Showing 28 changed files with 450 additions and 250 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -1,3 +1,5 @@
*.pyc
*.pth
deepmatcher.egg-info/*
examples/.ipynb_checkpoints/*
docs/build
13 changes: 8 additions & 5 deletions deepmatcher/__init__.py
@@ -1,16 +1,19 @@
r"""
The deepmatcher package contains high level modules used in the construction of deep
learning modules for entity matching. It also contains data processing utilities.
"""

from .data import process
from .optim import Optimizer
from .runner import Statistics
from .models import modules
from .models.core import (MatchingModel, AttrSummarizer, AttrComparator,
WordContextualizer, WordComparator, WordAggregator, Classifier)
from .models import (attr_summarizers, word_aggregators, word_comparators,
word_contextualizers)

__all__ = [
attr_summarizers, word_aggregators, word_comparators,
word_contextualizers, process, Optimizer, Statistics, MatchingModel, AttrSummarizer,
AttrComparator, WordContextualizer, WordComparator, WordAggregator, Classifier, modules
attr_summarizers, word_aggregators, word_comparators, word_contextualizers, process,
MatchingModel, AttrSummarizer, AttrComparator, WordContextualizer,
WordComparator, WordAggregator, Classifier, modules
]

_check_nan = True
Expand Down
2 changes: 1 addition & 1 deletion deepmatcher/batch.py
Expand Up @@ -40,7 +40,7 @@ def from_old_metadata(data, old_attrtensor):
return AttrTensor(data, *old_attrtensor[1:])


class Batch(object):
class MatchingBatch(object):

def __init__(self, input, train_dataset):
copy_fields = train_dataset.all_text_fields
Expand Down
1 change: 0 additions & 1 deletion deepmatcher/config.py

This file was deleted.

4 changes: 2 additions & 2 deletions deepmatcher/data/iterator.py
Expand Up @@ -4,7 +4,7 @@

from torchtext import data

from ..batch import Batch
from ..batch import MatchingBatch

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,7 +44,7 @@ def splits(cls, datasets, batch_sizes=None, **kwargs):

def __iter__(self):
for batch in super(MatchingIterator, self).__iter__():
yield Batch(batch, self.train_dataset)
yield MatchingBatch(batch, self.train_dataset)

def create_batches(self):
if self.sort_in_buckets:
Expand Down
40 changes: 0 additions & 40 deletions deepmatcher/loss.py

This file was deleted.

160 changes: 81 additions & 79 deletions deepmatcher/models/attr_summarizers.py
Expand Up @@ -2,24 +2,28 @@


class SIF(dm.AttrSummarizer):
r"""The attribute summarizer for the SIF (Smooth Inverse Frequency) model.
"""__init__(word_contextualizer=None, word_comparator=None, word_aggregator=None, \
hidden_size=None)
The attribute summarizer for the SIF (Smooth Inverse Frequency) model.
Args:
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The word
contextualizer module (refer to :class:`~dm.WordContextualizer` for details) to
use for attribute summarization. The SIF model does not take word context
information into account, hence this defaults to None.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use for
attribute summarization. The SIF model does not perform word by word comparisons,
hence this defaults to None.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use for
attribute summarization. This model uses SIF-based weighted average aggregation
over the word embeddings of an input sequence, hence this defaults to 'sif-pool'.
hidden_size (int): The hidden size to use for all 3 attribute summarization
sub-modules (i.e., word contextualizer, word comparator, and word aggregator),
if they are customized. By default, the SIF model does not use this parameter.
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The
word contextualizer module (refer to :class:`~dm.WordContextualizer` for
details) to use for attribute summarization. The SIF model does not take word
context information into account, hence this defaults to None.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use
for attribute summarization. The SIF model does not perform word by word
comparisons, hence this defaults to None.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use
for attribute summarization. This model uses SIF-based weighted average
aggregation over the word embeddings of an input sequence, hence this
defaults to 'sif-pool'.
hidden_size (int): The hidden size to use for all 3 attribute summarization
sub-modules (i.e., word contextualizer, word comparator, and word aggregator),
if they are customized. By default, the SIF model does not use this parameter.
"""

def _init(self,
Expand All @@ -39,27 +43,27 @@ class RNN(dm.AttrSummarizer):
r"""The attribute summarizer for the RNN model.
Args:
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The word
contextualizer module (refer to :class:`~dm.WordContextualizer` for details) to
use for attribute summarization. This model uses RNN to take into account the
context information, and the default value is 'gru' (i.e., uses the bidirectional
GRU model as the specific RNN instantiation.) Other options are 'rnn' (the vanilla
bi-RNN) and 'lstm' (the bi-LSTM model).
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use for
attribute summarization. The RNN model does not perform word by word comparisons,
hence this defaults to None.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use for
attribute summarization. The RNN model uses bi-directional RNN and concatenates
the last ouputs of the forward and backward RNNs, hence the default value is
'birnn-last-pool'.
hidden_size (int): The hidden size to use for the word contextualizer. This value
will also be used as the hidden size for the other 2 attribute summarization
sub-modules (i.e., word comparator, and word aggregator), if they are customized.
If not specified, the hidden size for each component will be set to be the same as
its input size. E.g. if the word embedding dimension is 300 and hidden_size is
None, the word contextualizer's hidden size will be 300.
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The
word contextualizer module (refer to :class:`~dm.WordContextualizer` for
details) to use for attribute summarization. This model uses RNN to take into
account the context information, and the default value is 'gru' (i.e., uses
the bidirectional GRU model as the specific RNN instantiation.) Other options
are 'rnn' (the vanilla bi-RNN) and 'lstm' (the bi-LSTM model).
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use
for attribute summarization. The RNN model does not perform word by word
comparisons, hence this defaults to None.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use
for attribute summarization. The RNN model uses bi-directional RNN and
concatenates the last ouputs of the forward and backward RNNs, hence the
default value is 'birnn-last-pool'.
hidden_size (int): The hidden size to use for the word contextualizer. This value
will also be used as the hidden size for the other 2 attribute summarization
sub-modules (i.e., word comparator, and word aggregator), if they are
customized. If not specified, the hidden size for each component will be set
to be the same as its input size. E.g. if the word embedding dimension is 300
and hidden_size is None, the word contextualizer's hidden size will be 300.
"""

def _init(self,
Expand All @@ -77,31 +81,30 @@ def _init(self,


class Attention(dm.AttrSummarizer):

r"""The attribute summarizer for the attention-based model.
Args:
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The word
contextualizer module (refer to :class:`~dm.WordContextualizer` for details) to
use for attribute summarization. The attention model does not take word context
information into account, hence this defaults to None.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use for
attribute summarization. The attention model performs word by word comparison with
the decomposable attention mechanism, hence this defaults to
'decomposable-attention'.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use for
attribute summarization. The Attention model performs the aggregation by summing
over the comparison results from the word comparator, divided by the length of
the input sequence (to get constant variance through the network flow). Hence
this defaults to 'divsqrt-pool'.
hidden_size (int): The hidden size to use for the word comparator. This value
will also be used as the hidden size for the other 2 attribute summarization
sub-modules (i.e., word contextualizer, and word aggregator), if they are customized.
If not specified, the hidden size for each component will be set to be the same as
its input size. E.g. if the word embedding dimension is 300 and hidden_size is
None, the word contextualizer's hidden size will be 300.
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The
word contextualizer module (refer to :class:`~dm.WordContextualizer` for
details) to use for attribute summarization. The attention model does not take
word context information into account, hence this defaults to None.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use
for attribute summarization. The attention model performs word by word
comparison with the decomposable attention mechanism, hence this defaults to
'decomposable-attention'.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use
for attribute summarization. The Attention model performs the aggregation by
summing over the comparison results from the word comparator, divided by the
length of the input sequence (to get constant variance through the network
flow). Hence this defaults to 'divsqrt-pool'.
hidden_size (int): The hidden size to use for the word comparator. This value
will also be used as the hidden size for the other 2 attribute summarization
sub-modules (i.e., word contextualizer, and word aggregator), if they are
customized. If not specified, the hidden size for each component will be set
to be the same as its input size. E.g. if the word embedding dimension is 300
and hidden_size is None, the word contextualizer's hidden size will be 300.
"""

def _init(self,
Expand All @@ -119,29 +122,28 @@ def _init(self,


class Hybrid(dm.AttrSummarizer):

r"""The attribute summarizer for the hybrid model.
Args:
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The word
contextualizer module (refer to :class:`~dm.WordContextualizer` for details) to
use for attribute summarization. The hybrid model uses bidirectional GRU(a
specific type of RNN) to take into account the context information. The default
value is 'gru'.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use for
attribute summarization. The hybrid model performs word by word comparison over
the raw input word embeddings (rather than the RNN hiddens states), hence this
defaults to an Attention object with 'decomposable' as the attention mechanism
on the raw input embeddings.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use for
attribute summarization. A second layer of attention has been used for the
aggregation. Please consult the paper for more information. The default value is
'concat-attention-with-rnn'.
hidden_size (int): The hidden size to use for all 3 attribute summarization
sub-modules (i.e., word contextualizer, word comparator, and word aggregator),
if they are customized.
word_contextualizer (string or :class:`~dm.WordContextualizer` or callable): The
word contextualizer module (refer to :class:`~dm.WordContextualizer` for
details) to use for attribute summarization. The hybrid model uses
bidirectional GRU(a specific type of RNN) to take into account the context
information. The default value is 'gru'.
word_comparator (string or :class:`~dm.WordComparator` or callable): The word
comparator module (refer to :class:`~dm.WordComparator` for details) to use
for attribute summarization. The hybrid model performs word by word comparison
over the raw input word embeddings (rather than the RNN hiddens states), hence
this defaults to an Attention object with 'decomposable' as the attention
mechanism on the raw input embeddings.
word_aggregator (string or :class:`~dm.WordAggregator` or callable): The word
aggregator module (refer to :class:`~dm.WordAggregator` for details) to use
for attribute summarization. A second layer of attention has been used for the
aggregation. Please consult the paper for more information. The default value
is 'concat-attention-with-rnn'.
hidden_size (int): The hidden size to use for all 3 attribute summarization
sub-modules (i.e., word contextualizer, word comparator, and word aggregator),
if they are customized.
"""

def _init(self,
Expand Down

0 comments on commit 584cd22

Please sign in to comment.