Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/deepgram/deepgram.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ var Command = &cobra.Command{
RunE: func(cmd *cobra.Command, args []string) error {
apiKey := viper.GetString("deepgram_api_key")
if apiKey == "" {
return errors.New("Deepgram API key not found. Please run 'podscript configure' or set the PODSCRIPT_DEEPGRAM_API_KEY environment variable.")
return errors.New("Deepgram API key not found. Please run 'podscript configure' or set the DEEPGRAM_API_KEY environment variable.")
}

folder, _ := cmd.Flags().GetString("path")
Expand Down
2 changes: 1 addition & 1 deletion cmd/groq/groq.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ var Command = &cobra.Command{
RunE: func(cmd *cobra.Command, args []string) error {
apiKey := viper.GetString("groq_api_key")
if apiKey == "" {
return errors.New("Groq API key not found. Please run 'podscript configure' or set the PODSCRIPT_GROQ_API_KEY environment variable")
return errors.New("Groq API key not found. Please run 'podscript configure' or set the GROQ_API_KEY environment variable")
}

folder, _ := cmd.Flags().GetString("path")
Expand Down
12 changes: 10 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ var rootCmd = &cobra.Command{
Speech-To-Text (STT) APIs.`,
}

var supportedLLMKeys = []string{
"openai_api_key",
"anthropic_api_key",
"groq_api_key",
}

func init() {
cobra.OnInitialize(initConfig)

Expand All @@ -39,8 +45,10 @@ func initConfig() {
viper.SetConfigType("toml")
viper.SetConfigFile(path.Join(homeDir, ".podscript.toml"))

viper.SetEnvPrefix("PODSCRIPT")
viper.AutomaticEnv()
// Bind env values to keys
for _, k := range supportedLLMKeys {
viper.BindEnv(k)
}

// Read in config file and ENV variables if set
if err := viper.ReadInConfig(); err != nil {
Expand Down
158 changes: 46 additions & 112 deletions cmd/ytt/llms.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
package ytt

import (
"context"
"errors"
"fmt"
"net/http"
"time"
"unicode"

"github.com/cenkalti/backoff/v4"
"github.com/liushuangls/go-anthropic/v2"
"github.com/sashabaranov/go-openai"
"github.com/spf13/viper"
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/anthropic"
"github.com/tmc/langchaingo/llms/openai"
"github.com/tmc/langchaingo/textsplitter"
)

type Model string

const (
ChatGPT4o Model = openai.GPT4o
ChatGpt4oMini Model = openai.GPT4oMini
Claude3Dot5Sonnet20240620 Model = anthropic.ModelClaude3Dot5Sonnet20240620
ChatGPT4o Model = "gpt-4o"
ChatGpt4oMini Model = "gpt-4o-mini"
Claude3Dot5Sonnet20240620 Model = "claude-3-5-sonnet-20240620"
)

var (
Expand All @@ -27,118 +27,52 @@ var (
Claude3Dot5Sonnet20240620: 8192}
)

type TranscriptCleaner interface {
CleanupTranscript(string) (string, error)
}

type OpenAITranscriptCleaner struct {
client *openai.Client
model Model
}

type AnthropicTranscriptCleaner struct {
client *anthropic.Client
}

func NewOpenAITranscriptCleaner(apiKey string, model Model) TranscriptCleaner {
return &OpenAITranscriptCleaner{
client: openai.NewClient(apiKey),
model: model,
func getModel(model Model) (llms.Model, error) {
switch model {
case ChatGPT4o, ChatGpt4oMini:
openaiApiKey := viper.GetString("openai_api_key")
if openaiApiKey == "" {
return nil, errors.New("OpenAI API key not found. Please run 'podscript configure' or set the OPENAI_API_KEY environment variable")
}
return openai.New(openai.WithToken(openaiApiKey), openai.WithModel(string(model)))
case Claude3Dot5Sonnet20240620:
anthropicApiKey := viper.GetString("anthropic_api_key")
if anthropicApiKey == "" {
return nil, errors.New("Anthropic API key not found. Please run 'podscript configure' or set the ANTHROPIC_API_KEY environment variable")
}
return anthropic.New(anthropic.WithToken(anthropicApiKey), anthropic.WithModel(string(model)), anthropic.WithAnthropicBetaHeader(anthropic.MaxTokensAnthropicSonnet35))
default:
panic(fmt.Sprintf("Invalid model %s. Should not get here!", model))
}
}

func NewAnthropicTranscriptCleaner(apiKey string) TranscriptCleaner {
return &AnthropicTranscriptCleaner{
client: anthropic.NewClient(apiKey, anthropic.WithBetaVersion(anthropic.BetaMaxTokens35Sonnet20240715)),
}
func calcWordsFromTokens(tokens int) int {
// round down to nearest 1000
return int((float64(tokens)*0.75)/1000) * 1000
}

func (tc *OpenAITranscriptCleaner) CleanupTranscript(text string) (string, error) {
req := openai.ChatCompletionRequest{
Model: string(tc.model),
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
Content: fmt.Sprintf(userPrompt, text),
},
},
MaxTokens: maxTokens[tc.model],
}

backOff := backoff.NewExponentialBackOff()
backOff.MaxElapsedTime = 10 * time.Minute

var resp openai.ChatCompletionResponse
func countWords(s string) int {
count := 0
inWord := false

err := backoff.Retry(func() (err error) {
resp, err = tc.client.CreateChatCompletion(context.Background(), req)
if err != nil {
// Check if the error is a 429 (Too Many Requests) error
var openAIError *openai.APIError
if errors.As(err, &openAIError) {
if openAIError.HTTPStatusCode == http.StatusTooManyRequests {
// This is a 429 error, so we'll retry
fmt.Printf("%v\n", err)
fmt.Println("Retrying…")
return err
}
}
// For any other error, we'll stop retrying
return backoff.Permanent(err)
for _, char := range s {
if unicode.IsSpace(char) {
inWord = false
} else if !inWord {
inWord = true
count++
}
return nil
}, backOff)

if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", fmt.Errorf("no choices returned from API")
}

// TODO: Log this as debug output
// fmt.Printf("Usage: %+v\n", resp.Usage)
return resp.Choices[0].Message.Content, nil
return count
}

func (tc *AnthropicTranscriptCleaner) CleanupTranscript(text string) (string, error) {

req := &anthropic.MessagesRequest{
Model: string(Claude3Dot5Sonnet20240620),
Messages: []anthropic.Message{
anthropic.NewUserTextMessage(fmt.Sprintf(userPrompt, text)),
},
MaxTokens: 8192,
}

backOff := backoff.NewExponentialBackOff()
backOff.MaxElapsedTime = 10 * time.Minute

var resp anthropic.MessagesResponse

err := backoff.Retry(func() (err error) {
resp, err = tc.client.CreateMessages(context.Background(), *req)
if err != nil {
var anthropicAPIError *anthropic.APIError
if errors.As(err, &anthropicAPIError) {
if anthropicAPIError.IsRateLimitErr() || anthropicAPIError.IsOverloadedErr() {
fmt.Printf("%v\n", err)
fmt.Println("Retrying…")
return err
}
}
// For any other error, we'll stop retrying
return backoff.Permanent(err)
}
return nil
}, backOff)

if err != nil {
return "", err
}

// TODO: Log this as debug output
// fmt.Printf("Usage: %+v\n", resp.Usage)
return resp.GetFirstContentText(), nil

func splitText(text string, model Model) ([]string, error) {
maxChunkSize := calcWordsFromTokens(maxTokens[model])
splitter := textsplitter.NewRecursiveCharacter(
textsplitter.WithChunkSize(maxChunkSize),
textsplitter.WithChunkOverlap(0),
textsplitter.WithLenFunc(countWords),
)
return splitter.SplitText(text)
}
102 changes: 40 additions & 62 deletions cmd/ytt/ytt.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
package ytt

import (
"bufio"
"errors"
"context"
"fmt"
"os"
"path"
Expand All @@ -12,7 +11,7 @@ import (

"github.com/kkdai/youtube/v2"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/tmc/langchaingo/llms"
)

const (
Expand Down Expand Up @@ -51,35 +50,41 @@ func extractTranscript(input string) string {
return ""
}

func calcWordsFromTokens(tokens int) int {
// round down to nearest 1000
return int((float64(tokens)*0.75)/1000) * 1000
type transcriptCleaner struct {
modelOpt Model
model llms.Model
}

func chunkTranscript(transcript string, maxWordsPerChunk int) []string {
// Split the transcript into chunks
var chunks []string
scanner := bufio.NewScanner(strings.NewReader(transcript))
scanner.Split(bufio.ScanWords)

var chunkBuilder strings.Builder
wordCount := 0

for scanner.Scan() {
word := scanner.Text()
chunkBuilder.WriteString(word + " ")
wordCount++
if wordCount >= maxWordsPerChunk {
chunks = append(chunks, chunkBuilder.String())
chunkBuilder.Reset()
wordCount = 0
}
func newTranscriptCleaner(model Model) (*transcriptCleaner, error) {
llm, err := getModel(model)
if err != nil {
return nil, err
}
if chunkBuilder.Len() > 0 {
chunks = append(chunks, chunkBuilder.String())
return &transcriptCleaner{modelOpt: model, model: llm}, nil
}

func (tc transcriptCleaner) cleanupTranscript(transcript string) (string, error) {
chunks, err := splitText(transcript, tc.modelOpt)

if err != nil {
return "", fmt.Errorf("error splitting text: %w", err)
}
return chunks

var cleanedTranscript strings.Builder
for i, chunk := range chunks {
cleanedChunk, err := llms.GenerateFromSinglePrompt(
context.Background(),
tc.model, userPrompt+"\n\n"+chunk,
llms.WithMaxTokens(maxTokens[tc.modelOpt]),
)
if err != nil {
return "", fmt.Errorf("failed to process chunk: %w", err)
}
cleanedChunk = extractTranscript(cleanedChunk)
cleanedTranscript.WriteString(cleanedChunk)
fmt.Printf("transcribed part %d/%d…\n", i+1, len(chunks))
}
return cleanedTranscript.String(), nil
}

var Command = &cobra.Command{
Expand Down Expand Up @@ -149,47 +154,20 @@ var Command = &cobra.Command{
}

// Initialize API client
var (
model Model
tc TranscriptCleaner
)
m, _ := cmd.Flags().GetString("model")
model = Model(m)
switch model {
case ChatGPT4o, ChatGpt4oMini:
openaiApiKey := viper.GetString("openai_api_key")
if openaiApiKey == "" {
return errors.New("OpenAI API key not found. Please run 'podscript configure' or set the PODSCRIPT_OPENAI_API_KEY environment variable")
}
tc = NewOpenAITranscriptCleaner(openaiApiKey, model)

case Claude3Dot5Sonnet20240620:
anthropicApiKey := viper.GetString("anthropic_api_key")
if anthropicApiKey == "" {
return errors.New("Anthropic API key not found. Please run 'podscript configure' or set the PODSCRIPT_ANTHROPIC_API_KEY environment variable")
}
tc = NewAnthropicTranscriptCleaner(anthropicApiKey)
default:
// Should never get here
panic(fmt.Sprintf("Cannot initialise API client from model %s", model))
model := Model(m)
tc, err := newTranscriptCleaner(model)
if err != nil {
return fmt.Errorf("failed to initialize model %s: %v", model, err)
}

// Chunk and Send to LLM API
chunks := chunkTranscript(transcriptTxt, calcWordsFromTokens(maxTokens[model]))

var cleanedTranscript strings.Builder
for i, chunk := range chunks {
cleanedChunk, err := tc.CleanupTranscript(chunk)
if err != nil {
return fmt.Errorf("failed to process chunk: %w", err)
}
cleanedChunk = extractTranscript(cleanedChunk)
cleanedTranscript.WriteString(cleanedChunk)
fmt.Printf("transcribed part %d/%d…\n", i+1, len(chunks))
cleanedTranscriptTxt, err := tc.cleanupTranscript(transcriptTxt)
if err != nil {
return fmt.Errorf("failed to transcribe: %w", err)
}

cleanedTranscriptFilename := path.Join(folder, fmt.Sprintf("cleaned_transcript_%s.txt", filenameSuffix))
if err = os.WriteFile(cleanedTranscriptFilename, []byte(cleanedTranscript.String()), 0644); err != nil {
if err = os.WriteFile(cleanedTranscriptFilename, []byte(cleanedTranscriptTxt), 0644); err != nil {
return fmt.Errorf("failed to write cleaned transcript: %w", err)
}
fmt.Printf("wrote cleaned up transcripts to %s\n", cleanedTranscriptFilename)
Expand Down
Loading