Skip to content
This repository has been archived by the owner on Oct 29, 2024. It is now read-only.

Commit

Permalink
feat: Add task to chunk text
Browse files Browse the repository at this point in the history
  • Loading branch information
chuang8511 committed May 24, 2024
1 parent ae12f02 commit 715b78e
Show file tree
Hide file tree
Showing 27 changed files with 2,986 additions and 104 deletions.
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ require (
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/temoto/robotstxt v1.1.1 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
Expand Down
51 changes: 51 additions & 0 deletions go.sum

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions pkg/external/langchaingo/schema/chat_message_history.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package schema

// import (
// "context"

// "github.com/tmc/langchaingo/llms"
// )

// // ChatMessageHistory is the interface for chat history in memory/store.
// type ChatMessageHistory interface {
// // AddMessage adds a message to the store.
// AddMessage(ctx context.Context, message llms.ChatMessage) error

// // AddUserMessage is a convenience method for adding a human message string
// // to the store.
// AddUserMessage(ctx context.Context, message string) error

// // AddAIMessage is a convenience method for adding an AI message string to
// // the store.
// AddAIMessage(ctx context.Context, message string) error

// // Clear removes all messages from the store.
// Clear(ctx context.Context) error

// // Messages retrieves all messages from the store
// Messages(ctx context.Context) ([]llms.ChatMessage, error)

// // SetMessages replaces existing messages in the store
// SetMessages(ctx context.Context, messages []llms.ChatMessage) error
// }
3 changes: 3 additions & 0 deletions pkg/external/langchaingo/schema/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package schema implements a shared core set of data types for use in
// langchaingo.
package schema
8 changes: 8 additions & 0 deletions pkg/external/langchaingo/schema/documents.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package schema

// Document is the interface for interacting with a document.
type Document struct {
PageContent string
Metadata map[string]any
Score float32
}
18 changes: 18 additions & 0 deletions pkg/external/langchaingo/schema/memory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package schema

import "context"

// Memory is the interface for memory in chains.
type Memory interface {
// GetMemoryKey getter for memory key.
GetMemoryKey(ctx context.Context) string
// MemoryVariables Input keys this memory class will load dynamically.
MemoryVariables(ctx context.Context) []string
// LoadMemoryVariables Return key-value pairs given the text input to the chain.
// If None, return all memories
LoadMemoryVariables(ctx context.Context, inputs map[string]any) (map[string]any, error)
// SaveContext Save the context of this model run to memory.
SaveContext(ctx context.Context, inputs map[string]any, outputs map[string]any) error
// Clear memory contents.
Clear(ctx context.Context) error
}
15 changes: 15 additions & 0 deletions pkg/external/langchaingo/schema/output_parsers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package schema

// import "github.com/tmc/langchaingo/llms"

// // OutputParser is an interface for parsing the output of an LLM call.
// type OutputParser[T any] interface {
// // Parse parses the output of an LLM call.
// Parse(text string) (T, error)
// // ParseWithPrompt parses the output of an LLM call with the prompt used.
// ParseWithPrompt(text string, prompt llms.PromptValue) (T, error)
// // GetFormatInstructions returns a string describing the format of the output.
// GetFormatInstructions() string
// // Type returns the string type key uniquely identifying this class of parser
// Type() string
// }
8 changes: 8 additions & 0 deletions pkg/external/langchaingo/schema/retrivers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package schema

import "context"

// Retriever is an interface that defines the behavior of a retriever.
type Retriever interface {
GetRelevantDocuments(ctx context.Context, query string) ([]Document, error)
}
21 changes: 21 additions & 0 deletions pkg/external/langchaingo/schema/schema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package schema

// AgentAction is the agent's action to take.
type AgentAction struct {
Tool string
ToolInput string
Log string
ToolID string
}

// AgentStep is a step of the agent.
type AgentStep struct {
Action AgentAction
Observation string
}

// AgentFinish is the agent's return value.
type AgentFinish struct {
ReturnValues map[string]any
Log string
}
17 changes: 17 additions & 0 deletions pkg/external/langchaingo/textsplitter/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
Package textsplitter provides tools for splitting long texts into smaller chunks
based on configurable rules and parameters.
It aims to help in processing these chunks more efficiently
when interacting with language models or other text-processing tools.
The main components of this package are:
- TextSplitter interface: a common interface for splitting texts into smaller chunks.
- RecursiveCharacter: a text splitter that recursively splits texts by different characters (separators)
combined with chunk size and overlap settings.
- Helper functions: utility functions for creating documents out of split texts and rejoining them if necessary.
Using the TextSplitter interface, developers can implement custom
splitting strategies for their specific use cases and requirements.
*/
package textsplitter
Loading

0 comments on commit 715b78e

Please sign in to comment.