Skip to content

Commit

Permalink
feat: add task chunk text (#139)
Browse files Browse the repository at this point in the history
Because

- we want to enrich the functions to chunk text for knowledge base

This commit

- add the task to chunk
  • Loading branch information
chuang8511 authored May 31, 2024
1 parent d1cc77d commit 7b36553
Show file tree
Hide file tree
Showing 10 changed files with 916 additions and 126 deletions.
80 changes: 42 additions & 38 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ module github.com/instill-ai/component
go 1.22.3

require (
cloud.google.com/go/bigquery v1.57.1
cloud.google.com/go/iam v1.1.5
cloud.google.com/go/storage v1.30.1
cloud.google.com/go/bigquery v1.59.1
cloud.google.com/go/iam v1.1.6
cloud.google.com/go/storage v1.38.0
code.sajari.com/docconv v1.3.8
github.com/JohannesKaufmann/html-to-markdown v1.5.0
github.com/PuerkitoBio/goquery v1.9.1
Expand All @@ -30,94 +30,98 @@ require (
github.com/redis/go-redis/v9 v9.5.1
github.com/santhosh-tekuri/jsonschema/v5 v5.3.0
github.com/slack-go/slack v0.12.5
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.9.0
github.com/tmc/langchaingo v0.1.10
go.uber.org/zap v1.24.0
golang.org/x/image v0.15.0
golang.org/x/text v0.15.0
google.golang.org/api v0.149.0
google.golang.org/grpc v1.61.1
google.golang.org/api v0.172.0
google.golang.org/grpc v1.62.1
google.golang.org/protobuf v1.33.0
)

require (
cloud.google.com/go v0.111.0 // indirect
cloud.google.com/go/compute v1.23.3 // indirect
cloud.google.com/go v0.112.1 // indirect
cloud.google.com/go/compute v1.24.0 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
cloud.google.com/go/longrunning v0.5.4 // indirect
cloud.google.com/go/longrunning v0.5.6 // indirect
github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198 // indirect
github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1 // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/antchfx/htmlquery v1.2.3 // indirect
github.com/antchfx/xmlquery v1.3.1 // indirect
github.com/antchfx/xpath v1.1.10 // indirect
github.com/apache/arrow/go/v12 v12.0.0 // indirect
github.com/apache/thrift v0.16.0 // indirect
github.com/antchfx/htmlquery v1.3.0 // indirect
github.com/antchfx/xmlquery v1.3.17 // indirect
github.com/antchfx/xpath v1.2.4 // indirect
github.com/apache/arrow/go/v14 v14.0.2 // indirect
github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/fatih/set v0.2.1 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/gigawattio/window v0.0.0-20180317192513-0f5467e35573 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/gobwas/glob v0.2.3 // indirect
github.com/goccy/go-json v0.9.11 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/flatbuffers v2.0.8+incompatible // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/flatbuffers v23.5.26+incompatible // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/uuid v1.4.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/googleapis/gax-go/v2 v2.12.3 // indirect
github.com/gorilla/websocket v1.5.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect
github.com/itchyny/timefmt-go v0.1.5 // indirect
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/klauspost/asmfmt v1.3.2 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/otiai10/gosseract/v2 v2.2.4 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/pierrec/lz4/v4 v4.1.18 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/richardlehane/mscfb v1.0.3 // indirect
github.com/richardlehane/msoleps v1.0.3 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/rogpeppe/go-internal v1.9.0 // indirect
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/temoto/robotstxt v1.1.1 // indirect
github.com/temoto/robotstxt v1.1.2 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
go.opentelemetry.io/otel/sdk v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/mod v0.10.0 // indirect
golang.org/x/mod v0.16.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.15.0 // indirect
golang.org/x/sync v0.5.0 // indirect
golang.org/x/oauth2 v0.18.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/tools v0.9.1 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.14.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto v0.0.0-20231212172506-995d672761c0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect
google.golang.org/genproto v0.0.0-20240221002015-b0ce06bbee7c // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240325203815-454cdb8f5daa // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading

0 comments on commit 7b36553

Please sign in to comment.