Skip to content
Permalink
Browse files

Updating comments for lint rules

  • Loading branch information...
buckhx committed Jul 29, 2019
1 parent a14f47b commit 6ebbc2bad208ffab89a34e2933d0b8e320752c3b
@@ -35,17 +35,17 @@ func main() {
fmt.Println("Pooling...")
embs := make([]mat.Vector, len(vals))
for s, sent := range vals {
vec := Pool(sent)
vec := pool(sent)
embs[s] = vec
}
fmt.Println("Done Pooling.")
for i := 1; i < len(vals); i++ {
fmt.Printf("%q, %q -> %v\n", texts[0], texts[i], CosSim(embs[0], embs[i]))
fmt.Printf("%q, %q -> %v\n", texts[0], texts[i], cosSim(embs[0], embs[i]))
}
}
}

func Pool(toks [][]float32) mat.Vector {
func pool(toks [][]float32) mat.Vector {
c := len(toks[0])
vec := mat.NewVecDense(c, nil)
x := make([]float64, c)
@@ -58,7 +58,7 @@ func Pool(toks [][]float32) mat.Vector {
return vec
}

func CosSim(x, y mat.Vector) float64 {
func cosSim(x, y mat.Vector) float64 {
return (mat.Dot(x, y)) / (mat.Norm(x, 2) * mat.Norm(y, 2))

}
@@ -1,3 +1,4 @@
// Package main is an example of a semantic search engine using BERT embeddings
package main

import (
@@ -9,6 +10,7 @@ import (
"runtime"
)

// ExitText is the keyword to type from STDIN to exit the query loop
const ExitText = "exit"

// Using a convention for this project that _* is a cmdline arg
@@ -72,10 +74,10 @@ func main() {
}
fmt.Printf("-> %s\n", res[TextHeader])
fmt.Printf("\tSimilarity Score (%.2f)\n", score)
if score < 0.9 {
fmt.Println("\tNot so sure about that, might need to look somewhere else...")
} else {
if score > 0.89 {
fmt.Println("\tLGTM")
} else {
fmt.Println("\tNot so sure about that, might need to look somewhere else...")
}
}
fmt.Printf("\nEnter Query or \"exit\":\n\n")
@@ -1,3 +1,4 @@
// Package model provides functionality for working with exported BERT models
package model

import (
@@ -17,18 +18,25 @@ const (
InputTypeIDsOp = "input_type_ids"
)

// Default values
const (
DefaultSeqLen = 128
DefaultVocabFile = "vocab.txt"
)

// TensorInputFunc maps tensors to an estimator.InputFunc in the Predict pipeline
type TensorInputFunc func(map[string]*tf.Tensor) estimator.InputFunc

// FeatureTensorFunc translates features to tensors
type FeatureTensorFunc func(fs ...tokenize.Feature) (map[string]*tf.Tensor, error)

// ValueProvider is a simple interface for tensors responses without the baggage
type ValueProvider interface {
Value() interface{}
}

// Bert is a model that translates features to values from an exported model. It processes as follows:
// Pipeline: text -> FeatureFactory -> TensorFunc -> InputFunc -> ModelFunc -> Value
type Bert struct {
m *tf.SavedModel
p estimator.Predictor
@@ -39,7 +47,8 @@ type Bert struct {
verbose bool
}

// Pipeline: text -> FeatureFactory -> TensorFunc -> InputFunc -> ModelFunc -> Value
// NewBert will create a new default BERT model from the exported model and vocab.
// Generally used for producing embeddings
func NewBert(m *tf.SavedModel, vocabPath string, opts ...BertOption) (Bert, error) {
voc, err := vocab.FromFile(vocabPath)
if err != nil {
@@ -76,10 +85,13 @@ func NewBert(m *tf.SavedModel, vocabPath string, opts ...BertOption) (Bert, erro

}

// Features will tokenize a text
func (b Bert) Features(texts ...string) []tokenize.Feature {
return b.factory.Features(texts...)
}

// PredictValues will run the BERT model on the provided texts.
// The returned values are in the same order as the provided texts.
func (b Bert) PredictValues(texts ...string) ([]ValueProvider, error) {
b.println("Building Features...")
fs := b.factory.Features(texts...)
@@ -108,6 +120,7 @@ func (b Bert) println(msg ...interface{}) {
}
}

// Print is a utility for printing the operations in a saved model
func Print(m *tf.SavedModel) {
fmt.Printf("%+v\n", m)
fmt.Println("Session")
@@ -4,19 +4,21 @@ import (
tf "github.com/tensorflow/tensorflow/tensorflow/go"
)

// DefaultOverrides
const (
ClassifierOutputOp = "probabilities"
ClassifierModelTag = "bert-tuned"
DefaultClassifierSeqLen = 64
ClassifierOutputOp = "probabilities"
ClassifierModelTag = "bert-tuned"
ClassifierSeqLen = 64
)

// NewBertClassifier returns a model configured for classification after being fine-tuned with run_classification.py
func NewBertClassifier(path string, vocabPath string, opts ...BertOption) (Bert, error) {
m, err := tf.LoadSavedModel(path, []string{ClassifierModelTag}, nil)
if err != nil {
return Bert{}, err
}
return NewBert(m, vocabPath, append(opts,
WithSeqLen(DefaultClassifierSeqLen),
WithSeqLen(ClassifierSeqLen),
WithModelFunc(func(m *tf.SavedModel) ([]tf.Output, []*tf.Operation) {
return []tf.Output{
m.Graph.Operation(ClassifierOutputOp).Output(0),

This file was deleted.

@@ -4,6 +4,7 @@ import (
tf "github.com/tensorflow/tensorflow/tensorflow/go"
)

// Embedding Defaults
const (
EmbeddingModelTag = "bert-pretrained"
EmbeddingOp = "embedding"
@@ -1,28 +1,34 @@
// Package estimator is a utility method for interactinfg with tf models
// Package estimator is a utility method for interacting with tf models.
// *** Experimental ***
// This package is meant ot be a pseudo-port of the python Estimator API
package estimator

import (
tf "github.com/tensorflow/tensorflow/tensorflow/go"
)

// Input, matches feeds in sessions
// InputFunc matches feeds in sessions
type InputFunc func(m *tf.SavedModel) map[tf.Output]*tf.Tensor

// odelFunc, the returned params match fetches & targers from the API
// ModelFunc the returned params match fetches & targets from the API
type ModelFunc func(m *tf.SavedModel) ([]tf.Output, []*tf.Operation)

// Estimator matches the tf, p
type Estimator interface {
Trainer
Evaluator
/*
Trainer
Evaluator
Exporter
*/
Predictor
Exporter
}

// Predictor creates tensors for prediction
type Predictor interface {
Predict(InputFunc) ([]*tf.Tensor, error)
}

/*
type Evaluator interface {
Evaluate(InputFunc) ([]*tf.Tensor, error)
}
@@ -34,3 +40,4 @@ type Trainer interface {
type Exporter interface {
Export(InputFunc) ([]*tf.Tensor, error)
}
*/
@@ -12,6 +12,7 @@ type predictor struct {
targets []*tf.Operation
}

// NewPredictor creates a new Predictor in lieu of a full estimator
func NewPredictor(m *tf.SavedModel, fn ModelFunc) Predictor {
outputs, targets := fn(m)
return predictor{
@@ -21,6 +22,7 @@ func NewPredictor(m *tf.SavedModel, fn ModelFunc) Predictor {
}
}

// Predictor will apply fn to the estimator model
func (p predictor) Predict(fn InputFunc) ([]*tf.Tensor, error) {
inputs := fn(p.m)
return p.m.Session.Run(inputs, p.outputs, p.targets)
@@ -3,49 +3,47 @@ package model
import (
"github.com/buckhx/gobert/model/estimator"
"github.com/buckhx/gobert/tokenize"
tf "github.com/tensorflow/tensorflow/tensorflow/go"
)

// BertOption configures a BERT model
type BertOption func(b Bert) Bert

// WithTokenizer applies the given tokenizer to the model
func WithTokenizer(tkz tokenize.VocabTokenizer) BertOption {
return func(b Bert) Bert {
b.factory.Tokenizer = tkz
return b
}
}

// WithSeqLen applies the seqlen, should match max_seq_len from trained model
func WithSeqLen(l int32) BertOption {
return func(b Bert) Bert {
b.factory.SeqLen = l
return b
}
}

// WithFeatureFactory replaces the default feature factory
func WithFeatureFactory(ff *tokenize.FeatureFactory) BertOption {
return func(b Bert) Bert {
b.factory = ff
return b
}
}

// WithModelFunc applies the given model func, used when outputs do not match the default
func WithModelFunc(fn estimator.ModelFunc) BertOption {
return func(b Bert) Bert {
b.modelFunc = fn
return b
}
}

// WithInputFunc updates the input func, used if input tensors vary from defaults
func WithInputFunc(fn TensorInputFunc) BertOption {
return func(b Bert) Bert {
b.inputFunc = fn
return b
}
}

func WithSavedModel(m *tf.SavedModel) BertOption {
return func(b Bert) Bert {
b.m = m
return b
}
}
@@ -1,5 +1,6 @@
// Package tokenize supplies tokenizzation operations for BERT.
// Package tokenize supplies tokenization operations for BERT.
// Ports the tokenizer.py capbilites from the core BERT repo
//
// NOTE: All defintions are related to BERT and may vary from unicode defintions,
// for example, BERT considers '$' punctuation, but unicode does not.
package tokenize

0 comments on commit 6ebbc2b

Please sign in to comment.
You can’t perform that action at this time.