This repository has been archived by the owner on Feb 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use char-ngram embedding for out-of-vocabulary words
Summary: **Description** Provide DeepText model with the functionality to load a secondary index (pre-trained char-ngram embedding, e.g. FastText) during training/test. Embeddings of out-of-vocabulary words will be computed on-the-fly during training/test by averaging the char-ngram embeddings. **Approach** This diff provides two custom operators to accomplish this task – ConditionalOp and IndexCharNgramGetOp. We first use IndexCharNgramGetOp to perform char-ngram index lookup and return a sparse tensor segmented by lengths for each token. The sparse tensor is then used to compute the average embedding provided by the char-ngram index. Finally, we use a ConditionalOp to replace those whose embeddings were not found in the original index during the feature apply stage. Please refer to documentations of the code for more details. Reviewed By: jamesr66a Differential Revision: D5666924 fbshipit-source-id: f76605d093154a014d5b9ebf9510de9d79874eee
- Loading branch information
1 parent
725a099
commit 067f704
Showing
3 changed files
with
117 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#include "caffe2/operators/conditional_op.h" | ||
#include "caffe2/core/operator.h" | ||
#include "caffe2/core/tensor.h" | ||
|
||
namespace caffe2 { | ||
|
||
template <> | ||
bool ConditionalOp<CPUContext>::RunOnDevice() { | ||
auto& condition = Input(0); | ||
auto& dataT = Input(1); | ||
auto& dataF = Input(2); | ||
|
||
// verify the inputs shape | ||
CAFFE_ENFORCE_EQ(condition.ndim(), 1); | ||
CAFFE_ENFORCE(dataT.ndim() >= 1); | ||
CAFFE_ENFORCE(dataT.dims()[0] == condition.dims()[0]); | ||
CAFFE_ENFORCE_EQ(dataT.ndim(), dataF.ndim()); | ||
for (size_t i = 0; i < dataT.dims().size(); i++) { | ||
CAFFE_ENFORCE(dataT.dims().at(i) == dataF.dims().at(i)); | ||
} | ||
const auto innerSize = dataT.size_from_dim(1); | ||
const auto innerSizeBytes = innerSize * dataT.meta().itemsize(); | ||
CAFFE_ENFORCE(innerSize * dataF.meta().itemsize() == innerSizeBytes); | ||
|
||
// initialize output shape | ||
auto* dataOut = Output(0); | ||
const auto* condPtr = condition.template data<bool>(); | ||
dataOut->ResizeLike(dataT); | ||
auto* outPtr = (char*)dataOut->raw_mutable_data(dataT.meta()); | ||
|
||
// perform conditional op along first dimension | ||
const auto* ptrT = (char*)dataT.raw_data(); | ||
const auto* ptrF = (char*)dataF.raw_data(); | ||
for (TIndex i = 0; i < condition.size(); i++) { | ||
auto* dst = outPtr + i * innerSizeBytes; | ||
if (condPtr[i]) { | ||
context_.template CopyItems<CPUContext, CPUContext>( | ||
dataT.meta(), innerSize, ptrT + i * innerSizeBytes, dst); | ||
} else { | ||
context_.template CopyItems<CPUContext, CPUContext>( | ||
dataF.meta(), innerSize, ptrF + i * innerSizeBytes, dst); | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
REGISTER_CPU_OPERATOR(Conditional, ConditionalOp<CPUContext>); | ||
|
||
OPERATOR_SCHEMA(Conditional) | ||
.NumInputs(3) | ||
.NumOutputs(1) | ||
.SetDoc(R"DOC( | ||
Given a 1-D tensor of boolean values, apply conditional operator along the first | ||
dimension of DataT and DataF and return DataO. Note, DataT and DataF must | ||
have the exact same shape and type. | ||
)DOC") | ||
.Input(0, "Condition", "Boolean tensor to select DataT or DataF") | ||
.Input(1, "DataT", "Data to use when True") | ||
.Input(2, "DataF", "Data to use when False") | ||
.Output(0, "DataO", "Output data after applying ConditionalOp"); | ||
|
||
NO_GRADIENT(Conditional); | ||
|
||
} // caffe2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright 2004-present Facebook. All Rights Reserved. | ||
|
||
#ifndef CONDITIONAL_OP_H | ||
#define CONDITIONAL_OP_H | ||
|
||
#include "caffe2/core/context.h" | ||
#include "caffe2/core/operator.h" | ||
#include "caffe2/core/tensor.h" | ||
|
||
namespace caffe2 { | ||
|
||
template <class Context> | ||
class ConditionalOp final : public Operator<Context> { | ||
public: | ||
USE_OPERATOR_CONTEXT_FUNCTIONS; | ||
ConditionalOp(const OperatorDef& operator_def, Workspace* ws) | ||
: Operator<Context>(operator_def, ws) {} | ||
|
||
bool RunOnDevice() override; | ||
}; | ||
|
||
} // caffe2 | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
from hypothesis import given | ||
import hypothesis.strategies as st | ||
from caffe2.python import core | ||
import caffe2.python.hypothesis_test_util as hu | ||
|
||
|
||
class TestConditionalOp(hu.HypothesisTestCase): | ||
@given(rows_num=st.integers(1, 10000), **hu.gcs_cpu_only) | ||
def test_conditional(self, rows_num, gc, dc): | ||
op = core.CreateOperator( | ||
"Conditional", ["condition", "data_t", "data_f"], "output" | ||
) | ||
data_t = np.random.random((rows_num, 10, 20)).astype(np.float32) | ||
data_f = np.random.random((rows_num, 10, 20)).astype(np.float32) | ||
condition = np.random.choice(a=[True, False], size=rows_num) | ||
|
||
def ref(condition, data_t, data_f): | ||
output = [ | ||
data_t[i] if condition[i] else data_f[i] | ||
for i in range(rows_num) | ||
] | ||
return (output,) | ||
|
||
self.assertReferenceChecks(gc, op, [condition, data_t, data_f], ref) |