From 59b3c4723cb10ad10c7d8a8db59e076d29bbe117 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 13 Sep 2025 15:31:31 +0300 Subject: [PATCH 01/19] feat(go bindings): add state abstraction --- bindings/go/pkg/whisper/consts.go | 1 + bindings/go/pkg/whisper/context.go | 78 +++++------- bindings/go/pkg/whisper/interface.go | 9 ++ bindings/go/pkg/whisper/model.go | 16 +++ bindings/go/pkg/whisper/state.go | 125 +++++++++++++++++++ bindings/go/pkg/whisper/state_test.go | 128 ++++++++++++++++++++ bindings/go/whisper.go | 167 +++++++++++++++++++++++--- bindings/go/whisper_test.go | 160 +++++++++++++++++++++++- 8 files changed, 618 insertions(+), 66 deletions(-) create mode 100644 bindings/go/pkg/whisper/state.go create mode 100644 bindings/go/pkg/whisper/state_test.go diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index 5c22dc13a31..6c778d3d2d7 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -16,6 +16,7 @@ var ( ErrProcessingFailed = errors.New("processing failed") ErrUnsupportedLanguage = errors.New("unsupported language") ErrModelNotMultilingual = errors.New("model is not multilingual") + ErrUnableToCreateState = errors.New("unable to create state") ) /////////////////////////////////////////////////////////////////////////////// diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index cb3d9eb8c1c..a7df02a6653 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -20,9 +20,6 @@ type context struct { params whisper.Params } -// Make sure context adheres to the interface -var _ Context = (*context)(nil) - /////////////////////////////////////////////////////////////////////////////// // LIFECYCLE @@ -241,7 +238,7 @@ func (context *context) Process( return nil } -// Return the next segment of tokens +// NextSegment returns the next segment from the context buffer func (context *context) NextSegment() (Segment, error) { if context.model.ctx == nil { return Segment{}, ErrInternalAppError @@ -249,18 +246,41 @@ func (context *context) NextSegment() (Segment, error) { if context.n >= context.model.ctx.Whisper_full_n_segments() { return Segment{}, io.EOF } - - // Populate result result := toSegment(context.model.ctx, context.n) - - // Increment the cursor context.n++ - - // Return success return result, nil } -// Test for text tokens +/////////////////////////////////////////////////////////////////////////////// +// PRIVATE METHODS + +func toSegment(ctx *whisper.Context, n int) Segment { + return Segment{ + Num: n, + Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)), + Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10, + End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10, + Tokens: toTokens(ctx, n), + } +} + +func toTokens(ctx *whisper.Context, n int) []Token { + result := make([]Token, ctx.Whisper_full_n_tokens(n)) + for i := 0; i < len(result); i++ { + data := ctx.Whisper_full_get_token_data(n, i) + + result[i] = Token{ + Id: int(ctx.Whisper_full_get_token_id(n, i)), + Text: ctx.Whisper_full_get_token_text(n, i), + P: ctx.Whisper_full_get_token_p(n, i), + Start: time.Duration(data.T0()) * time.Millisecond * 10, + End: time.Duration(data.T1()) * time.Millisecond * 10, + } + } + return result +} + +// Token helpers func (context *context) IsText(t Token) bool { switch { case context.IsBEG(t): @@ -280,37 +300,30 @@ func (context *context) IsText(t Token) bool { } } -// Test for "begin" token func (context *context) IsBEG(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg() } -// Test for "start of transcription" token func (context *context) IsSOT(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot() } -// Test for "end of transcription" token func (context *context) IsEOT(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot() } -// Test for "start of prev" token func (context *context) IsPREV(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev() } -// Test for "start of lm" token func (context *context) IsSOLM(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm() } -// Test for "No timestamps" token func (context *context) IsNOT(t Token) bool { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not() } -// Test for token associated with a specific language func (context *context) IsLANG(t Token, lang string) bool { if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 { return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id) @@ -318,32 +331,3 @@ func (context *context) IsLANG(t Token, lang string) bool { return false } } - -/////////////////////////////////////////////////////////////////////////////// -// PRIVATE METHODS - -func toSegment(ctx *whisper.Context, n int) Segment { - return Segment{ - Num: n, - Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)), - Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10, - End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10, - Tokens: toTokens(ctx, n), - } -} - -func toTokens(ctx *whisper.Context, n int) []Token { - result := make([]Token, ctx.Whisper_full_n_tokens(n)) - for i := 0; i < len(result); i++ { - data := ctx.Whisper_full_get_token_data(n, i) - - result[i] = Token{ - Id: int(ctx.Whisper_full_get_token_id(n, i)), - Text: ctx.Whisper_full_get_token_text(n, i), - P: ctx.Whisper_full_get_token_p(n, i), - Start: time.Duration(data.T0()) * time.Millisecond * 10, - End: time.Duration(data.T1()) * time.Millisecond * 10, - } - } - return result -} diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index e3122c44b76..b4705ec1c67 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -85,6 +85,15 @@ type Context interface { SystemInfo() string } +// State is a per-request speech recognition state which shares the loaded model +// but isolates recognition results. It embeds Context, so any state-specific +// methods can be added later without breaking existing API. +type State interface { + io.Closer + + Context +} + // Segment is the text result of a speech recognition. type Segment struct { // Segment Number diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 68a150223c7..0142a787aa9 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -99,3 +99,19 @@ func (model *model) NewContext() (Context, error) { // Return new context return newContext(model, params) } + +// NewState returns a new per-request state sharing the loaded model +func (model *model) NewState() (State, error) { + if model.ctx == nil { + return nil, ErrInternalAppError + } + params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + params.SetTranslate(false) + params.SetPrintSpecial(false) + params.SetPrintProgress(false) + params.SetPrintRealtime(false) + params.SetPrintTimestamps(false) + params.SetThreads(runtime.NumCPU()) + params.SetNoContext(true) + return newState(model, params) +} diff --git a/bindings/go/pkg/whisper/state.go b/bindings/go/pkg/whisper/state.go new file mode 100644 index 00000000000..4ae81baf790 --- /dev/null +++ b/bindings/go/pkg/whisper/state.go @@ -0,0 +1,125 @@ +package whisper + +import ( + "io" + "strings" + "time" + + // Bindings + whisper "github.com/ggerganov/whisper.cpp/bindings/go" +) + +// state embeds context behavior and carries a low-level state pointer +// for isolated processing results. +type state struct { + *context + st *whisper.State +} + +// NewState creates a new per-request State from a Model without changing the Model interface. +func NewState(m Model) (State, error) { + impl, ok := m.(*model) + if !ok { + return nil, ErrInternalAppError + } + params := impl.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + params.SetTranslate(false) + params.SetPrintSpecial(false) + params.SetPrintProgress(false) + params.SetPrintRealtime(false) + params.SetPrintTimestamps(false) + return newState(impl, params) +} + +// internal constructor used by model.NewState +func newState(model *model, params whisper.Params) (State, error) { + ctx := &context{model: model, params: params} + st := model.ctx.Whisper_init_state() + if st == nil { + return nil, ErrUnableToCreateState + } + return &state{context: ctx, st: st}, nil +} + +// Process using an isolated state for concurrency +func (s *state) Process( + data []float32, + callEncoderBegin EncoderBeginCallback, + callNewSegment SegmentCallback, + callProgress ProgressCallback, +) error { + if s.model.ctx == nil || s.st == nil { + return ErrInternalAppError + } + if callNewSegment != nil { + s.params.SetSingleSegment(true) + } + if err := s.model.ctx.Whisper_full_with_state(s.st, s.params, data, callEncoderBegin, + func(new int) { + if callNewSegment != nil { + num_segments := s.model.ctx.Whisper_full_n_segments_from_state(s.st) + s0 := num_segments - new + for i := s0; i < num_segments; i++ { + callNewSegment(toSegmentFromState(s.model.ctx, s.st, i)) + } + } + }, func(progress int) { + if callProgress != nil { + callProgress(progress) + } + }); err != nil { + return err + } + return nil +} + +// Return the next segment of tokens for state +func (s *state) NextSegment() (Segment, error) { + if s.model.ctx == nil { + return Segment{}, ErrInternalAppError + } + if s.n >= s.model.ctx.Whisper_full_n_segments_from_state(s.st) { + return Segment{}, io.EOF + } + result := toSegmentFromState(s.model.ctx, s.st, s.n) + s.n++ + return result, nil +} + +func (s *state) Close() error { + if s.st != nil { + s.st.Whisper_free_state() + s.st = nil + } + return nil +} + +// Helpers specific to state-based results +func toSegmentFromState(ctx *whisper.Context, st *whisper.State, n int) Segment { + return Segment{ + Num: n, + Text: stringsTrim(ctx.Whisper_full_get_segment_text_from_state(st, n)), + Start: duration10x(ctx.Whisper_full_get_segment_t0_from_state(st, n)), + End: duration10x(ctx.Whisper_full_get_segment_t1_from_state(st, n)), + Tokens: toTokensFromState(ctx, st, n), + } +} + +func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { + result := make([]Token, ctx.Whisper_full_n_tokens_from_state(st, n)) + for i := 0; i < len(result); i++ { + data := ctx.Whisper_full_get_token_data_from_state(st, n, i) + result[i] = Token{ + Id: int(ctx.Whisper_full_get_token_id_from_state(st, n, i)), + Text: ctx.Whisper_full_get_token_text_from_state(st, n, i), + P: ctx.Whisper_full_get_token_p_from_state(st, n, i), + Start: duration10x(data.T0()), + End: duration10x(data.T1()), + } + } + return result +} + +// small shared helpers to avoid importing time/strings here unnecessarily +func stringsTrim(s string) string { return strings.TrimSpace(s) } +func duration10x(ms10 int64) time.Duration { return time.Duration(ms10) * time.Millisecond * 10 } diff --git a/bindings/go/pkg/whisper/state_test.go b/bindings/go/pkg/whisper/state_test.go new file mode 100644 index 00000000000..f893b8d2b87 --- /dev/null +++ b/bindings/go/pkg/whisper/state_test.go @@ -0,0 +1,128 @@ +package whisper_test + +import ( + "os" + "sync" + "testing" + + "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + "github.com/go-audio/wav" + assert "github.com/stretchr/testify/assert" +) + +func TestState_Process(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer fh.Close() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + defer model.Close() + + st, err := whisper.NewState(model) + assert.NoError(err) + assert.NotNil(st) + defer func() { _ = st.Close() }() + + err = st.Process(data, nil, nil, nil) + assert.NoError(err) + + seg, err := st.NextSegment() + assert.NoError(err) + assert.NotEmpty(seg.Text) +} + +func TestState_Parallel_DifferentInputs(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer fh.Close() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + assert.Greater(len(data), 10) + + // Create half-sample (second half) + half := make([]float32, len(data)/2) + copy(half, data[len(data)/2:]) + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + defer model.Close() + + st1, err := whisper.NewState(model) + assert.NoError(err) + st2, err := whisper.NewState(model) + assert.NoError(err) + defer func() { _ = st1.Close() }() + defer func() { _ = st2.Close() }() + + // Run in parallel, but guard core call to respect context safety + var wg sync.WaitGroup + var first1, first2 string + var e1, e2 error + + wg.Add(2) + + // No mutex needed because each state is isolated + go func() { + defer wg.Done() + e1 = st1.Process(data, nil, nil, nil) + if e1 == nil { + seg, err := st1.NextSegment() + if err == nil { + first1 = seg.Text + } else { + e1 = err + } + } + }() + + go func() { + defer wg.Done() + e2 = st2.Process(half, nil, nil, nil) + if e2 == nil { + seg, err := st2.NextSegment() + if err == nil { + first2 = seg.Text + } else { + e2 = err + } + } + }() + + wg.Wait() + assert.NoError(e1) + assert.NoError(e2) + assert.NotEmpty(first1) + assert.NotEmpty(first2) + assert.NotEqual(first1, first2, "first segments should differ for different inputs") +} diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go index 3ef73414d90..cb5907ffe06 100644 --- a/bindings/go/whisper.go +++ b/bindings/go/whisper.go @@ -2,6 +2,7 @@ package whisper import ( "errors" + "sync" "unsafe" ) @@ -67,6 +68,7 @@ import "C" type ( Context C.struct_whisper_context + State C.struct_whisper_state Token C.whisper_token TokenData C.struct_whisper_token_data SamplingStrategy C.enum_whisper_sampling_strategy @@ -116,6 +118,19 @@ func (ctx *Context) Whisper_free() { C.whisper_free((*C.struct_whisper_context)(ctx)) } +// Allocates a new state associated with the context. Returns nil on failure. +func (ctx *Context) Whisper_init_state() *State { + if s := C.whisper_init_state((*C.struct_whisper_context)(ctx)); s != nil { + return (*State)(s) + } + return nil +} + +// Frees all memory allocated by the state. +func (s *State) Whisper_free_state() { + C.whisper_free_state((*C.struct_whisper_state)(s)) +} + // Convert RAW PCM audio to log mel spectrogram. // The resulting spectrogram is stored inside the provided whisper context. func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error { @@ -126,6 +141,15 @@ func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error { } } +// Convert RAW PCM audio to log mel spectrogram into the provided state. +func (ctx *Context) Whisper_pcm_to_mel_with_state(state *State, data []float32, threads int) error { + if C.whisper_pcm_to_mel_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 { + return nil + } else { + return ErrConversionFailed + } +} + // This can be used to set a custom log mel spectrogram inside the provided whisper context. // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram. // n_mel must be 80 @@ -137,6 +161,15 @@ func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error { } } +// Set a custom log mel spectrogram into the provided state. +func (ctx *Context) Whisper_set_mel_with_state(state *State, data []float32, n_mel int) error { + if C.whisper_set_mel_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 { + return nil + } else { + return ErrConversionFailed + } +} + // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context. // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. // offset can be used to specify the offset of the first frame in the spectrogram. @@ -148,6 +181,15 @@ func (ctx *Context) Whisper_encode(offset, threads int) error { } } +// Run the Whisper encoder using the provided state. +func (ctx *Context) Whisper_encode_with_state(state *State, offset, threads int) error { + if C.whisper_encode_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), C.int(offset), C.int(threads)) == 0 { + return nil + } else { + return ErrConversionFailed + } +} + // Run the Whisper decoder to obtain the logits and probabilities for the next token. // Make sure to call whisper_encode() first. // tokens + n_tokens is the provided context for the decoder. @@ -160,6 +202,15 @@ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error { } } +// Run the Whisper decoder using the provided state. +func (ctx *Context) Whisper_decode_with_state(state *State, tokens []Token, past, threads int) error { + if C.whisper_decode_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 { + return nil + } else { + return ErrConversionFailed + } +} + // Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens. // Returns the number of tokens on success func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) { @@ -205,6 +256,16 @@ func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float3 } } +// Use mel data at offset_ms to auto-detect language using the provided state. +func (ctx *Context) Whisper_lang_auto_detect_with_state(state *State, offset_ms, n_threads int) ([]float32, error) { + probs := make([]float32, Whisper_lang_max_id()+1) + if n := int(C.whisper_lang_auto_detect_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 { + return nil, ErrAutoDetectFailed + } else { + return probs, nil + } +} + func (ctx *Context) Whisper_n_len() int { return int(C.whisper_n_len((*C.struct_whisper_context)(ctx))) } @@ -323,6 +384,28 @@ func (ctx *Context) Whisper_full( } } +// Run the entire model using the provided state: PCM -> mel -> encoder -> decoder -> text +func (ctx *Context) Whisper_full_with_state( + state *State, + params Params, + samples []float32, + encoderBeginCallback func() bool, + newSegmentCallback func(int), + progressCallback func(int), +) error { + registerEncoderBeginCallback(ctx, encoderBeginCallback) + registerNewSegmentCallback(ctx, newSegmentCallback) + registerProgressCallback(ctx, progressCallback) + defer registerEncoderBeginCallback(ctx, nil) + defer registerNewSegmentCallback(ctx, nil) + defer registerProgressCallback(ctx, nil) + if C.whisper_full_with_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 { + return nil + } else { + return ErrConversionFailed + } +} + // Split the input audio in chunks and process each chunk separately using whisper_full() // It seems this approach can offer some speedup in some cases. // However, the transcription accuracy can be worse at the beginning and end of each chunk. @@ -357,102 +440,152 @@ func (ctx *Context) Whisper_full_n_segments() int { return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx))) } +func (ctx *Context) Whisper_full_n_segments_from_state(state *State) int { + return int(C.whisper_full_n_segments_from_state((*C.struct_whisper_state)(state))) +} + // Get the start and end time of the specified segment. func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 { return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment))) } +func (ctx *Context) Whisper_full_get_segment_t0_from_state(state *State, segment int) int64 { + return int64(C.whisper_full_get_segment_t0_from_state((*C.struct_whisper_state)(state), C.int(segment))) +} + // Get the start and end time of the specified segment. func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 { return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment))) } +func (ctx *Context) Whisper_full_get_segment_t1_from_state(state *State, segment int) int64 { + return int64(C.whisper_full_get_segment_t1_from_state((*C.struct_whisper_state)(state), C.int(segment))) +} + // Get the text of the specified segment. func (ctx *Context) Whisper_full_get_segment_text(segment int) string { return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment))) } +func (ctx *Context) Whisper_full_get_segment_text_from_state(state *State, segment int) string { + return C.GoString(C.whisper_full_get_segment_text_from_state((*C.struct_whisper_state)(state), C.int(segment))) +} + // Get number of tokens in the specified segment. func (ctx *Context) Whisper_full_n_tokens(segment int) int { return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment))) } +func (ctx *Context) Whisper_full_n_tokens_from_state(state *State, segment int) int { + return int(C.whisper_full_n_tokens_from_state((*C.struct_whisper_state)(state), C.int(segment))) +} + // Get the token text of the specified token index in the specified segment. func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string { return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token))) } +func (ctx *Context) Whisper_full_get_token_text_from_state(state *State, segment int, token int) string { + return C.GoString(C.whisper_full_get_token_text_from_state((*C.struct_whisper_context)(ctx), (*C.struct_whisper_state)(state), C.int(segment), C.int(token))) +} + // Get the token of the specified token index in the specified segment. func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token { return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token))) } +func (ctx *Context) Whisper_full_get_token_id_from_state(state *State, segment int, token int) Token { + return Token(C.whisper_full_get_token_id_from_state((*C.struct_whisper_state)(state), C.int(segment), C.int(token))) +} + // Get token data for the specified token in the specified segment. // This contains probabilities, timestamps, etc. func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData { return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token))) } +func (ctx *Context) Whisper_full_get_token_data_from_state(state *State, segment int, token int) TokenData { + return TokenData(C.whisper_full_get_token_data_from_state((*C.struct_whisper_state)(state), C.int(segment), C.int(token))) +} + // Get the probability of the specified token in the specified segment. func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 { return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token))) } +func (ctx *Context) Whisper_full_get_token_p_from_state(state *State, segment int, token int) float32 { + return float32(C.whisper_full_get_token_p_from_state((*C.struct_whisper_state)(state), C.int(segment), C.int(token))) +} + +func (ctx *Context) Whisper_full_lang_id_from_state(state *State) int { + return int(C.whisper_full_lang_id_from_state((*C.struct_whisper_state)(state))) +} + +func (ctx *Context) Whisper_n_len_from_state(state *State) int { + return int(C.whisper_n_len_from_state((*C.struct_whisper_state)(state))) +} + +func (ctx *Context) Whisper_get_logits_from_state(state *State) []float32 { + return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_logits_from_state((*C.struct_whisper_state)(state))))[:ctx.Whisper_n_vocab()] +} + /////////////////////////////////////////////////////////////////////////////// // CALLBACKS var ( - cbNewSegment = make(map[unsafe.Pointer]func(int)) - cbProgress = make(map[unsafe.Pointer]func(int)) - cbEncoderBegin = make(map[unsafe.Pointer]func() bool) + cbNewSegment sync.Map // map[unsafe.Pointer]func(int) + cbProgress sync.Map // map[unsafe.Pointer]func(int) + cbEncoderBegin sync.Map // map[unsafe.Pointer]func() bool ) func registerNewSegmentCallback(ctx *Context, fn func(int)) { + k := unsafe.Pointer(ctx) if fn == nil { - delete(cbNewSegment, unsafe.Pointer(ctx)) + cbNewSegment.Delete(k) } else { - cbNewSegment[unsafe.Pointer(ctx)] = fn + cbNewSegment.Store(k, fn) } } func registerProgressCallback(ctx *Context, fn func(int)) { + k := unsafe.Pointer(ctx) if fn == nil { - delete(cbProgress, unsafe.Pointer(ctx)) + cbProgress.Delete(k) } else { - cbProgress[unsafe.Pointer(ctx)] = fn + cbProgress.Store(k, fn) } } func registerEncoderBeginCallback(ctx *Context, fn func() bool) { + k := unsafe.Pointer(ctx) if fn == nil { - delete(cbEncoderBegin, unsafe.Pointer(ctx)) + cbEncoderBegin.Delete(k) } else { - cbEncoderBegin[unsafe.Pointer(ctx)] = fn + cbEncoderBegin.Store(k, fn) } } //export callNewSegment func callNewSegment(user_data unsafe.Pointer, new C.int) { - if fn, ok := cbNewSegment[user_data]; ok { - fn(int(new)) + if v, ok := cbNewSegment.Load(user_data); ok { + v.(func(int))(int(new)) } } //export callProgress func callProgress(user_data unsafe.Pointer, progress C.int) { - if fn, ok := cbProgress[user_data]; ok { - fn(int(progress)) + if v, ok := cbProgress.Load(user_data); ok { + v.(func(int))(int(progress)) } } //export callEncoderBegin func callEncoderBegin(user_data unsafe.Pointer) C.bool { - if fn, ok := cbEncoderBegin[user_data]; ok { - if fn() { + if v, ok := cbEncoderBegin.Load(user_data); ok { + if v.(func() bool)() { return C.bool(true) - } else { - return C.bool(false) } + return C.bool(false) } return true } diff --git a/bindings/go/whisper_test.go b/bindings/go/whisper_test.go index 40648ffa8d4..23bbfbff01a 100644 --- a/bindings/go/whisper_test.go +++ b/bindings/go/whisper_test.go @@ -1,8 +1,10 @@ package whisper_test import ( + "errors" "os" "runtime" + "sync" "testing" "time" @@ -39,7 +41,7 @@ func Test_Whisper_001(t *testing.T) { // Open samples fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() // Read samples d := wav.NewDecoder(fh) @@ -89,7 +91,7 @@ func Test_Whisper_003(t *testing.T) { // Open samples fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() // Read samples d := wav.NewDecoder(fh) @@ -111,3 +113,157 @@ func Test_Whisper_003(t *testing.T) { t.Logf("%s: %f", whisper.Whisper_lang_str(i), p) } } + +func Test_Whisper_State_Init_Free(t *testing.T) { + assert := assert.New(t) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + + ctx := whisper.Whisper_init(ModelPath) + assert.NotNil(ctx) + defer ctx.Whisper_free() + + state := ctx.Whisper_init_state() + assert.NotNil(state) + state.Whisper_free_state() +} + +func Test_Whisper_Full_With_State(t *testing.T) { + assert := assert.New(t) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + // Open samples + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer func() { _ = fh.Close() }() + + // Read samples + d := wav.NewDecoder(fh) + buf, err := d.FullPCMBuffer() + assert.NoError(err) + data := buf.AsFloat32Buffer().Data + + ctx := whisper.Whisper_init(ModelPath) + assert.NotNil(ctx) + defer ctx.Whisper_free() + + state := ctx.Whisper_init_state() + assert.NotNil(state) + defer state.Whisper_free_state() + + params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + // Run using state + err = ctx.Whisper_full_with_state(state, params, data, nil, nil, nil) + assert.NoError(err) + + // Validate results are stored in state + nSegments := ctx.Whisper_full_n_segments_from_state(state) + assert.GreaterOrEqual(nSegments, 1) + text := ctx.Whisper_full_get_segment_text_from_state(state, 0) + assert.NotEmpty(text) +} + +func Test_Whisper_Lang_Auto_Detect_With_State(t *testing.T) { + assert := assert.New(t) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + // Open samples + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer func() { _ = fh.Close() }() + + // Read samples + d := wav.NewDecoder(fh) + buf, err := d.FullPCMBuffer() + assert.NoError(err) + data := buf.AsFloat32Buffer().Data + + ctx := whisper.Whisper_init(ModelPath) + assert.NotNil(ctx) + defer ctx.Whisper_free() + + state := ctx.Whisper_init_state() + assert.NotNil(state) + defer state.Whisper_free_state() + + threads := runtime.NumCPU() + // Prepare mel into state then detect + assert.NoError(ctx.Whisper_pcm_to_mel_with_state(state, data, threads)) + probs, err := ctx.Whisper_lang_auto_detect_with_state(state, 0, threads) + assert.NoError(err) + assert.Equal(whisper.Whisper_lang_max_id()+1, len(probs)) +} + +func Test_Whisper_Concurrent_With_State(t *testing.T) { + assert := assert.New(t) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + // Load audio once + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer func() { _ = fh.Close() }() + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + data := buf.AsFloat32Buffer().Data + + ctx := whisper.Whisper_init(ModelPath) + assert.NotNil(ctx) + defer ctx.Whisper_free() + + // Each goroutine has its own state + state1 := ctx.Whisper_init_state() + state2 := ctx.Whisper_init_state() + assert.NotNil(state1) + assert.NotNil(state2) + defer state1.Whisper_free_state() + defer state2.Whisper_free_state() + + params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + + var wg sync.WaitGroup + var mu sync.Mutex // guard calls into shared ctx, per upstream note not thread-safe for same context + errs := make(chan error, 2) + + worker := func(state *whisper.State) { + defer wg.Done() + mu.Lock() + err := ctx.Whisper_full_with_state(state, params, data, nil, nil, nil) + if err == nil { + n := ctx.Whisper_full_n_segments_from_state(state) + if n <= 0 { + err = errors.New("no segments") + } else { + _ = ctx.Whisper_full_get_segment_text_from_state(state, 0) + } + } + mu.Unlock() + errs <- err + } + + wg.Add(2) + go worker(state1) + go worker(state2) + wg.Wait() + close(errs) + + for e := range errs { + assert.NoError(e) + } +} From ebbcf3f17ffcc9bb4bc6ffdfb7dcebfde9fcb231 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 13 Sep 2025 20:47:13 +0300 Subject: [PATCH 02/19] refactor(go bindings): make thread-safe and stateful context --- bindings/go/pkg/whisper/consts.go | 1 + bindings/go/pkg/whisper/context.go | 356 ++++++++------------ bindings/go/pkg/whisper/context_test.go | 172 ++++++++++ bindings/go/pkg/whisper/interface.go | 202 ++++++++--- bindings/go/pkg/whisper/model.go | 91 +++-- bindings/go/pkg/whisper/params_wrap.go | 69 ++++ bindings/go/pkg/whisper/state.go | 125 ------- bindings/go/pkg/whisper/state_test.go | 128 ------- bindings/go/pkg/whisper/token_identifier.go | 115 +++++++ bindings/go/pkg/whisper/whisper_ctx.go | 49 +++ bindings/go/pkg/whisper/whisper_state.go | 37 ++ bindings/go/whisper.go | 4 + 12 files changed, 813 insertions(+), 536 deletions(-) create mode 100644 bindings/go/pkg/whisper/params_wrap.go delete mode 100644 bindings/go/pkg/whisper/state.go delete mode 100644 bindings/go/pkg/whisper/state_test.go create mode 100644 bindings/go/pkg/whisper/token_identifier.go create mode 100644 bindings/go/pkg/whisper/whisper_ctx.go create mode 100644 bindings/go/pkg/whisper/whisper_state.go diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index 6c778d3d2d7..0af45ee8c20 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -17,6 +17,7 @@ var ( ErrUnsupportedLanguage = errors.New("unsupported language") ErrModelNotMultilingual = errors.New("model is not multilingual") ErrUnableToCreateState = errors.New("unable to create state") + ErrModelClosed = errors.New("model has been closed") ) /////////////////////////////////////////////////////////////////////////////// diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index a7df02a6653..01a510fe742 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -11,160 +11,77 @@ import ( whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) -/////////////////////////////////////////////////////////////////////////////// -// TYPES - type context struct { n int - model *model - params whisper.Params + model Model + st WhisperState + params Parameters + Parameters } -/////////////////////////////////////////////////////////////////////////////// -// LIFECYCLE - -func newContext(model *model, params whisper.Params) (Context, error) { - context := new(context) - context.model = model - context.params = params - - // Return success - return context, nil -} +func newContext(model Model, params whisper.Params) (Context, error) { + c := new(context) + c.model = model -/////////////////////////////////////////////////////////////////////////////// -// PUBLIC METHODS + c.params = newParameters(¶ms) + c.Parameters = c.params -// Set the language to use for speech recognition. -func (context *context) SetLanguage(lang string) error { - if context.model.ctx == nil { - return ErrInternalAppError - } - if !context.model.IsMultilingual() { - return ErrModelNotMultilingual + // allocate isolated state per context + ctx, err := model.WhisperContext().UnsafeContext() + if err != nil { + return nil, err } - if lang == "auto" { - context.params.SetLanguage(-1) - } else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 { - return ErrUnsupportedLanguage - } else if err := context.params.SetLanguage(id); err != nil { - return err + st := ctx.Whisper_init_state() + if st == nil { + return nil, ErrUnableToCreateState } - // Return success - return nil -} -func (context *context) IsMultilingual() bool { - return context.model.IsMultilingual() -} + c.st = newWhisperState(st) -// Get language -func (context *context) Language() string { - id := context.params.Language() - if id == -1 { - return "auto" - } - return whisper.Whisper_lang_str(context.params.Language()) + // Return success + return c, nil } +// DetectedLanguage returns the detected language for the current context data func (context *context) DetectedLanguage() string { - return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id()) -} - -// Set translate flag -func (context *context) SetTranslate(v bool) { - context.params.SetTranslate(v) -} - -func (context *context) SetSplitOnWord(v bool) { - context.params.SetSplitOnWord(v) -} - -// Set number of threads to use -func (context *context) SetThreads(v uint) { - context.params.SetThreads(int(v)) -} - -// Set time offset -func (context *context) SetOffset(v time.Duration) { - context.params.SetOffset(int(v.Milliseconds())) -} - -// Set duration of audio to process -func (context *context) SetDuration(v time.Duration) { - context.params.SetDuration(int(v.Milliseconds())) -} - -// Set timestamp token probability threshold (~0.01) -func (context *context) SetTokenThreshold(t float32) { - context.params.SetTokenThreshold(t) -} - -// Set timestamp token sum probability threshold (~0.01) -func (context *context) SetTokenSumThreshold(t float32) { - context.params.SetTokenSumThreshold(t) -} - -// Set max segment length in characters -func (context *context) SetMaxSegmentLength(n uint) { - context.params.SetMaxSegmentLength(int(n)) -} - -// Set token timestamps flag -func (context *context) SetTokenTimestamps(b bool) { - context.params.SetTokenTimestamps(b) -} - -// Set max tokens per segment (0 = no limit) -func (context *context) SetMaxTokensPerSegment(n uint) { - context.params.SetMaxTokensPerSegment(int(n)) -} - -// Set audio encoder context -func (context *context) SetAudioCtx(n uint) { - context.params.SetAudioCtx(int(n)) -} - -// Set maximum number of text context tokens to store -func (context *context) SetMaxContext(n int) { - context.params.SetMaxContext(n) -} - -// Set Beam Size -func (context *context) SetBeamSize(n int) { - context.params.SetBeamSize(n) -} + ctx, err := context.model.WhisperContext().UnsafeContext() + if err != nil { + return "" + } -// Set Entropy threshold -func (context *context) SetEntropyThold(t float32) { - context.params.SetEntropyThold(t) -} + st, err := context.st.UnsafeState() + if err != nil { + return "" + } -// Set Temperature -func (context *context) SetTemperature(t float32) { - context.params.SetTemperature(t) + return whisper.Whisper_lang_str( + ctx.Whisper_full_lang_id_from_state( + st, + ), + ) } -// Set the fallback temperature incrementation -// Pass -1.0 to disable this feature -func (context *context) SetTemperatureFallback(t float32) { - context.params.SetTemperatureFallback(t) +// Close frees the whisper state and marks the context as closed. +func (context *context) Close() error { + return context.st.Close() } -// Set initial prompt -func (context *context) SetInitialPrompt(prompt string) { - context.params.SetInitialPrompt(prompt) +// Params returns a high-level parameters wrapper +func (context *context) Params() Parameters { + return context.params } -// ResetTimings resets the mode timings. Should be called before processing +// ResetTimings resets the model performance timing counters. +// Deprecated: Use Model.ResetTimings() instead - these are model-level performance metrics. func (context *context) ResetTimings() { - context.model.ctx.Whisper_reset_timings() + context.model.ResetTimings() } -// PrintTimings prints the model timings to stdout. +// PrintTimings prints the model performance timings to stdout. +// Deprecated: Use Model.PrintTimings() instead - these are model-level performance metrics. func (context *context) PrintTimings() { - context.model.ctx.Whisper_print_timings() + context.model.PrintTimings() } // SystemInfo returns the system information @@ -178,12 +95,23 @@ func (context *context) SystemInfo() string { // Use mel data at offset_ms to try and auto-detect the spoken language // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. -// Returns the probabilities of all languages. +// Returns the probabilities of all languages for this context's state. func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) { - langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads) + ctx, err := context.model.WhisperContext().UnsafeContext() + if err != nil { + return nil, err + } + + st, err := context.st.UnsafeState() + if err != nil { + return nil, err + } + + langProbs, err := ctx.Whisper_lang_auto_detect_with_state(st, offset_ms, n_threads) if err != nil { return nil, err } + return langProbs, nil } @@ -194,36 +122,33 @@ func (context *context) Process( callNewSegment SegmentCallback, callProgress ProgressCallback, ) error { - if context.model.ctx == nil { - return ErrInternalAppError + ctx, err := context.model.WhisperContext().UnsafeContext() + if err != nil { + return err } + // If the callback is defined then we force on single_segment mode if callNewSegment != nil { context.params.SetSingleSegment(true) } - // We don't do parallel processing at the moment - processors := 0 - if processors > 1 { - if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin, - func(new int) { - if callNewSegment != nil { - num_segments := context.model.ctx.Whisper_full_n_segments() - s0 := num_segments - new - for i := s0; i < num_segments; i++ { - callNewSegment(toSegment(context.model.ctx, i)) - } - } - }); err != nil { - return err - } - } else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin, + lowLevelParams := context.params.WhisperParams() + if lowLevelParams == nil { + return fmt.Errorf("lowLevelParams is nil: %w", ErrInternalAppError) + } + + st, err := context.st.UnsafeState() + if err != nil { + return err + } + + if err := ctx.Whisper_full_with_state(st, *lowLevelParams, data, callEncoderBegin, func(new int) { if callNewSegment != nil { - num_segments := context.model.ctx.Whisper_full_n_segments() + num_segments := ctx.Whisper_full_n_segments_from_state(st) s0 := num_segments - new for i := s0; i < num_segments; i++ { - callNewSegment(toSegment(context.model.ctx, i)) + callNewSegment(toSegmentFromState(ctx, st, i)) } } }, func(progress int) { @@ -240,94 +165,111 @@ func (context *context) Process( // NextSegment returns the next segment from the context buffer func (context *context) NextSegment() (Segment, error) { - if context.model.ctx == nil { - return Segment{}, ErrInternalAppError + ctx, err := context.model.WhisperContext().UnsafeContext() + if err != nil { + return Segment{}, err + } + + st, err := context.st.UnsafeState() + if err != nil { + return Segment{}, err } - if context.n >= context.model.ctx.Whisper_full_n_segments() { + + if context.n >= ctx.Whisper_full_n_segments_from_state(st) { return Segment{}, io.EOF } - result := toSegment(context.model.ctx, context.n) - context.n++ - return result, nil -} -/////////////////////////////////////////////////////////////////////////////// -// PRIVATE METHODS + result := toSegmentFromState(ctx, st, context.n) + context.n++ -func toSegment(ctx *whisper.Context, n int) Segment { - return Segment{ - Num: n, - Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)), - Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10, - End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10, - Tokens: toTokens(ctx, n), - } + return result, nil } -func toTokens(ctx *whisper.Context, n int) []Token { - result := make([]Token, ctx.Whisper_full_n_tokens(n)) - for i := 0; i < len(result); i++ { - data := ctx.Whisper_full_get_token_data(n, i) - - result[i] = Token{ - Id: int(ctx.Whisper_full_get_token_id(n, i)), - Text: ctx.Whisper_full_get_token_text(n, i), - P: ctx.Whisper_full_get_token_p(n, i), - Start: time.Duration(data.T0()) * time.Millisecond * 10, - End: time.Duration(data.T1()) * time.Millisecond * 10, - } - } - return result +func (context *context) IsMultilingual() bool { + return context.model.IsMultilingual() } // Token helpers +// Deprecated: Use Model.IsText() instead - token checking is model-specific. func (context *context) IsText(t Token) bool { - switch { - case context.IsBEG(t): - return false - case context.IsSOT(t): - return false - case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot(): - return false - case context.IsPREV(t): - return false - case context.IsSOLM(t): - return false - case context.IsNOT(t): - return false - default: - return true - } + result, _ := context.model.TokenIdentifier().IsText(t) + return result } +// Deprecated: Use Model.IsBEG() instead - token checking is model-specific. func (context *context) IsBEG(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg() + result, _ := context.model.TokenIdentifier().IsBEG(t) + return result } +// Deprecated: Use Model.IsSOT() instead - token checking is model-specific. func (context *context) IsSOT(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot() + result, _ := context.model.TokenIdentifier().IsSOT(t) + return result } +// Deprecated: Use Model.IsEOT() instead - token checking is model-specific. func (context *context) IsEOT(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot() + result, _ := context.model.TokenIdentifier().IsEOT(t) + return result } +// Deprecated: Use Model.IsPREV() instead - token checking is model-specific. func (context *context) IsPREV(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev() + result, _ := context.model.TokenIdentifier().IsPREV(t) + return result } +// Deprecated: Use Model.IsSOLM() instead - token checking is model-specific. func (context *context) IsSOLM(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm() + result, _ := context.model.TokenIdentifier().IsSOLM(t) + return result } +// Deprecated: Use Model.IsNOT() instead - token checking is model-specific. func (context *context) IsNOT(t Token) bool { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not() + result, _ := context.model.TokenIdentifier().IsNOT(t) + return result } +func (context *context) SetLanguage(lang string) error { + if !context.model.IsMultilingual() { + return ErrModelNotMultilingual + } + + return context.params.SetLanguage(lang) +} + +// Deprecated: Use Model.IsLANG() instead - token checking is model-specific. func (context *context) IsLANG(t Token, lang string) bool { - if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 { - return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id) - } else { - return false + result, _ := context.model.TokenIdentifier().IsLANG(t, lang) + return result +} + +// State-backed helper functions +func toSegmentFromState(ctx *whisper.Context, st *whisper.State, n int) Segment { + return Segment{ + Num: n, + Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text_from_state(st, n)), + Start: time.Duration(ctx.Whisper_full_get_segment_t0_from_state(st, n)) * time.Millisecond * 10, + End: time.Duration(ctx.Whisper_full_get_segment_t1_from_state(st, n)) * time.Millisecond * 10, + Tokens: toTokensFromState(ctx, st, n), } } + +func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { + result := make([]Token, ctx.Whisper_full_n_tokens_from_state(st, n)) + + for i := 0; i < len(result); i++ { + data := ctx.Whisper_full_get_token_data_from_state(st, n, i) + result[i] = Token{ + Id: int(ctx.Whisper_full_get_token_id_from_state(st, n, i)), + Text: ctx.Whisper_full_get_token_text_from_state(st, n, i), + P: ctx.Whisper_full_get_token_p_from_state(st, n, i), + Start: time.Duration(data.T0()) * time.Millisecond * 10, + End: time.Duration(data.T1()) * time.Millisecond * 10, + } + } + + return result +} diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index e98a4c2b80b..305446624f9 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -2,11 +2,13 @@ package whisper_test import ( "os" + "sync" "testing" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/go-audio/wav" assert "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSetLanguage(t *testing.T) { @@ -122,3 +124,173 @@ func TestDetectedLanguage(t *testing.T) { actualLanguage := context.DetectedLanguage() assert.Equal(expectedLanguage, actualLanguage) } + +// TestContext_ConcurrentProcessing tests that multiple contexts can process concurrently +// without interfering with each other (validates the whisper_state isolation fix) +func TestContext_ConcurrentProcessing(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer fh.Close() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + defer model.Close() + + ctx, err := model.NewContext() + assert.NoError(err) + assert.NotNil(ctx) + defer ctx.Close() + + err = ctx.Process(data, nil, nil, nil) + assert.NoError(err) + + seg, err := ctx.NextSegment() + assert.NoError(err) + assert.NotEmpty(seg.Text) +} + +// TestContext_Parallel_DifferentInputs tests concurrent processing with different inputs +// This validates that each context maintains isolated state for concurrent processing +func TestContext_Parallel_DifferentInputs(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer fh.Close() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + assert.Greater(len(data), 10) + + // Create half-sample (second half) + half := make([]float32, len(data)/2) + copy(half, data[len(data)/2:]) + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + defer model.Close() + + ctx1, err := model.NewContext() + assert.NoError(err) + defer ctx1.Close() + ctx2, err := model.NewContext() + assert.NoError(err) + defer ctx2.Close() + + // Run in parallel - each context has isolated whisper_state + var wg sync.WaitGroup + var first1, first2 string + var e1, e2 error + + wg.Add(2) + + // No mutex needed because each context is isolated by whisper_state + go func() { + defer wg.Done() + e1 = ctx1.Process(data, nil, nil, nil) + if e1 == nil { + seg, err := ctx1.NextSegment() + if err == nil { + first1 = seg.Text + } else { + e1 = err + } + } + }() + + go func() { + defer wg.Done() + e2 = ctx2.Process(half, nil, nil, nil) + if e2 == nil { + seg, err := ctx2.NextSegment() + if err == nil { + first2 = seg.Text + } else { + e2 = err + } + } + }() + + wg.Wait() + assert.NoError(e1) + assert.NoError(e2) + assert.NotEmpty(first1) + assert.NotEmpty(first2) + assert.NotEqual(first1, first2, "first segments should differ for different inputs") +} + +// TestContext_Close tests that Context.Close() properly frees resources +// and allows context to be used even after it has been closed +func TestContext_Close(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + defer model.Close() + + ctx, err := model.NewContext() + assert.NoError(err) + assert.NotNil(ctx) + + // Close the context + err = ctx.Close() + require.NoError(t, err) + + // Try to use closed context - should return errors + err = ctx.Process([]float32{0.1, 0.2, 0.3}, nil, nil, nil) + require.ErrorIs(t, err, whisper.ErrModelClosed) + + lang := ctx.DetectedLanguage() + require.Empty(t, lang) + + // Multiple closes should be safe + err = ctx.Close() + require.NoError(t, err) +} + +func Test_Close_Context_of_Closed_Model(t *testing.T) { + assert := assert.New(t) + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + + ctx, err := model.NewContext() + assert.NoError(err) + assert.NotNil(ctx) + + require.NoError(t, model.Close()) + require.NoError(t, ctx.Close()) +} diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index b4705ec1c67..30a41db3e50 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -3,6 +3,8 @@ package whisper import ( "io" "time" + + whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) /////////////////////////////////////////////////////////////////////////////// @@ -20,6 +22,32 @@ type ProgressCallback func(int) // continue processing. It is called during the Process function type EncoderBeginCallback func() bool +type TokenIdentifier interface { + // Test for "begin" token + IsBEG(Token) (bool, error) + + // Test for "start of transcription" token + IsSOT(Token) (bool, error) + + // Test for "end of transcription" token + IsEOT(Token) (bool, error) + + // Test for "start of prev" token + IsPREV(Token) (bool, error) + + // Test for "start of lm" token + IsSOLM(Token) (bool, error) + + // Test for "no timestamps" token + IsNOT(Token) (bool, error) + + // Test for token associated with a specific language + IsLANG(Token, string) (bool, error) + + // Test for text token + IsText(Token) (bool, error) +} + // Model is the interface to a whisper model. Create a new model with the // function whisper.New(string) type Model interface { @@ -33,32 +61,116 @@ type Model interface { // Return all languages supported. Languages() []string + + // Model performance timing methods + // Print model performance timings to stdout + PrintTimings() + + // Reset model performance timing counters + ResetTimings() + + // WhisperContext returns the memory-safe whisper context wrapper of the raw whisper context + WhisperContext() WhisperContext + + // Token identifier + TokenIdentifier() TokenIdentifier +} + +// Parameters configures decode / processing behavior +type Parameters interface { + SetTranslate(bool) + SetSplitOnWord(bool) + SetThreads(uint) + SetOffset(time.Duration) + SetDuration(time.Duration) + SetTokenThreshold(float32) + SetTokenSumThreshold(float32) + SetMaxSegmentLength(uint) + SetTokenTimestamps(bool) + SetMaxTokensPerSegment(uint) + SetAudioCtx(uint) + SetMaxContext(n int) + SetBeamSize(n int) + SetEntropyThold(t float32) + SetInitialPrompt(prompt string) + + // Set the temperature + SetTemperature(t float32) + + // Set the fallback temperature incrementation + // Pass -1.0 to disable this feature + SetTemperatureFallback(t float32) + SetLanguage(string) error + + // Set single segment mode + SetSingleSegment(bool) + + // Getter methods + Language() string + Threads() int + WhisperParams() *whisper.Params } // Context is the speech recognition context. type Context interface { - SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language. - SetTranslate(bool) // Set translate flag - IsMultilingual() bool // Return true if the model is multilingual. - Language() string // Get language - DetectedLanguage() string // Get detected language - - SetOffset(time.Duration) // Set offset - SetDuration(time.Duration) // Set duration - SetThreads(uint) // Set number of threads to use - SetSplitOnWord(bool) // Set split on word flag - SetTokenThreshold(float32) // Set timestamp token probability threshold - SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold - SetMaxSegmentLength(uint) // Set max segment length in characters - SetTokenTimestamps(bool) // Set token timestamps flag - SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit) - SetAudioCtx(uint) // Set audio encoder context - SetMaxContext(n int) // Set maximum number of text context tokens to store - SetBeamSize(n int) // Set Beam Size - SetEntropyThold(t float32) // Set Entropy threshold - SetInitialPrompt(prompt string) // Set initial prompt - SetTemperature(t float32) // Set temperature - SetTemperatureFallback(t float32) // Set temperature incrementation + io.Closer + // Deprecated: Use Params().SetLanguage() instead + SetLanguage(string) error + + // Deprecated: Use Params().SetTranslate() instead + SetTranslate(bool) + // Deprecated: Use Params().SetSplitOnWord() instead + SetSplitOnWord(bool) + // Deprecated: Use Params().SetThreads() instead + SetThreads(uint) + + // Deprecated: Use Params().SetOffset() instead + SetOffset(time.Duration) + // Deprecated: Use Params().SetDuration() instead + SetDuration(time.Duration) + // Deprecated: Use Params().SetTokenThreshold() instead + SetTokenThreshold(float32) + + // Deprecated: Use Params().SetTokenSumThreshold() instead + SetTokenSumThreshold(float32) + // Deprecated: Use Params().SetMaxSegmentLength() instead + SetMaxSegmentLength(uint) + // Deprecated: Use Params().SetTokenTimestamps() instead + SetTokenTimestamps(bool) + + // Deprecated: Use Params().SetMaxTokensPerSegment() instead + SetMaxTokensPerSegment(uint) + + // Deprecated: Use Params().SetAudioCtx() instead + SetAudioCtx(uint) + + // Deprecated: Use Params().SetMaxContext() instead + SetMaxContext(int) + + // Deprecated: Use Params().SetBeamSize() instead + SetBeamSize(int) + + // Deprecated: Use Params().SetEntropyThold() instead + SetEntropyThold(float32) + + // Deprecated: Use Params().SetTemperature() instead + SetTemperature(float32) + + // Deprecated: Use Params().SetTemperatureFallback() instead + SetTemperatureFallback(float32) + + // Deprecated: Use Params().SetInitialPrompt() instead + SetInitialPrompt(string) + + // Get language of the context parameters + // Deprecated: Use Params().Language() instead + Language() string + + // Return true if the model is multilingual. + IsMultilingual() bool + + // Get detected language + DetectedLanguage() string // Process mono audio data and return any errors. // If defined, newly generated segments are passed to the @@ -69,29 +181,41 @@ type Context interface { // is reached, when io.EOF is returned. NextSegment() (Segment, error) - IsBEG(Token) bool // Test for "begin" token - IsSOT(Token) bool // Test for "start of transcription" token - IsEOT(Token) bool // Test for "end of transcription" token - IsPREV(Token) bool // Test for "start of prev" token - IsSOLM(Token) bool // Test for "start of lm" token - IsNOT(Token) bool // Test for "No timestamps" token - IsLANG(Token, string) bool // Test for token associated with a specific language - IsText(Token) bool // Test for text token + // Deprecated token methods - use Model.IsBEG(), Model.IsSOT(), etc. instead + // Deprecated: Use Model.IsBEG() instead + IsBEG(Token) bool - // Timings + // Deprecated: Use Model.IsSOT() instead + IsSOT(Token) bool + + // Deprecated: Use Model.IsEOT() instead + IsEOT(Token) bool + + // Deprecated: Use Model.IsPREV() instead + IsPREV(Token) bool + + // Deprecated: Use Model.IsSOLM() instead + IsSOLM(Token) bool + + // Deprecated: Use Model.IsNOT() instead + IsNOT(Token) bool + + // Deprecated: Use Model.IsLANG() instead + IsLANG(Token, string) bool + + // Deprecated: Use Model.IsText() instead + IsText(Token) bool + + // Deprecated: Use Model.PrintTimings() instead - these are model-level performance metrics PrintTimings() + + // Deprecated: Use Model.ResetTimings() instead - these are model-level performance metrics ResetTimings() SystemInfo() string -} - -// State is a per-request speech recognition state which shares the loaded model -// but isolates recognition results. It embeds Context, so any state-specific -// methods can be added later without breaking existing API. -type State interface { - io.Closer - Context + // Params returns a high-level parameters wrapper - preferred method + Params() Parameters } // Segment is the text result of a speech recognition. diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 0142a787aa9..752c7cf02bb 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -9,20 +9,15 @@ import ( whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) -/////////////////////////////////////////////////////////////////////////////// -// TYPES - type model struct { - path string - ctx *whisper.Context + path string + ctx *whisperCtx + tokenIdentifier *tokenIdentifier } // Make sure model adheres to the interface var _ Model = (*model)(nil) -/////////////////////////////////////////////////////////////////////////////// -// LIFECYCLE - func New(path string) (Model, error) { model := new(model) if _, err := os.Stat(path); err != nil { @@ -30,7 +25,8 @@ func New(path string) (Model, error) { } else if ctx := whisper.Whisper_init(path); ctx == nil { return nil, ErrUnableToLoadModel } else { - model.ctx = ctx + model.ctx = newWhisperCtx(ctx) + model.tokenIdentifier = newTokenIdentifier(model.ctx) model.path = path } @@ -39,15 +35,11 @@ func New(path string) (Model, error) { } func (model *model) Close() error { - if model.ctx != nil { - model.ctx.Whisper_free() - } - - // Release resources - model.ctx = nil + return model.ctx.Close() +} - // Return success - return nil +func (model *model) WhisperContext() WhisperContext { + return model.ctx } /////////////////////////////////////////////////////////////////////////////// @@ -58,6 +50,7 @@ func (model *model) String() string { if model.ctx != nil { str += fmt.Sprintf(" model=%q", model.path) } + return str + ">" } @@ -66,28 +59,43 @@ func (model *model) String() string { // Return true if model is multilingual (language and translation options are supported) func (model *model) IsMultilingual() bool { - return model.ctx.Whisper_is_multilingual() != 0 + ctx, err := model.ctx.UnsafeContext() + if err != nil { + return false + } + + return ctx.Whisper_is_multilingual() != 0 } // Return all recognized languages. Initially it is set to auto-detect func (model *model) Languages() []string { + ctx, err := model.ctx.UnsafeContext() + if err != nil { + return nil + } + result := make([]string, 0, whisper.Whisper_lang_max_id()) for i := 0; i < whisper.Whisper_lang_max_id(); i++ { str := whisper.Whisper_lang_str(i) - if model.ctx.Whisper_lang_id(str) >= 0 { + if ctx.Whisper_lang_id(str) >= 0 { result = append(result, str) } } + return result } +// NewContext creates a new speech-to-text context. +// Each context is backed by an isolated whisper_state for safe concurrent processing. func (model *model) NewContext() (Context, error) { - if model.ctx == nil { - return nil, ErrInternalAppError + ctx, err := model.ctx.UnsafeContext() + if err != nil { + return nil, ErrModelClosed } - // Create new context - params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + // Create new context with default params + params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + params.SetTranslate(false) params.SetPrintSpecial(false) params.SetPrintProgress(false) @@ -96,22 +104,31 @@ func (model *model) NewContext() (Context, error) { params.SetThreads(runtime.NumCPU()) params.SetNoContext(true) - // Return new context + // Return new context (now state-backed) return newContext(model, params) } -// NewState returns a new per-request state sharing the loaded model -func (model *model) NewState() (State, error) { - if model.ctx == nil { - return nil, ErrInternalAppError +// PrintTimings prints the model performance timings to stdout. +func (model *model) PrintTimings() { + ctx, err := model.ctx.UnsafeContext() + if err != nil { + return } - params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) - params.SetTranslate(false) - params.SetPrintSpecial(false) - params.SetPrintProgress(false) - params.SetPrintRealtime(false) - params.SetPrintTimestamps(false) - params.SetThreads(runtime.NumCPU()) - params.SetNoContext(true) - return newState(model, params) + + ctx.Whisper_print_timings() +} + +// ResetTimings resets the model performance timing counters. +func (model *model) ResetTimings() { + ctx, err := model.ctx.UnsafeContext() + if err != nil { + return + } + + ctx.Whisper_reset_timings() +} + +// WhisperContext returns the low-level whisper context, or error if the model is closed. +func (model *model) TokenIdentifier() TokenIdentifier { + return model.tokenIdentifier } diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go new file mode 100644 index 00000000000..55a41453765 --- /dev/null +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -0,0 +1,69 @@ +package whisper + +import ( + "time" + + // Bindings + whisper "github.com/ggerganov/whisper.cpp/bindings/go" +) + +// parameters is a high-level wrapper that implements the Parameters interface +// and delegates to the underlying low-level whisper.Params. +type parameters struct { + p *whisper.Params +} + +func newParameters(p *whisper.Params) Parameters { return ¶meters{p: p} } + +func (w *parameters) SetTranslate(v bool) { w.p.SetTranslate(v) } +func (w *parameters) SetSplitOnWord(v bool) { w.p.SetSplitOnWord(v) } +func (w *parameters) SetThreads(v uint) { w.p.SetThreads(int(v)) } +func (w *parameters) SetOffset(d time.Duration) { w.p.SetOffset(int(d.Milliseconds())) } +func (w *parameters) SetDuration(d time.Duration) { w.p.SetDuration(int(d.Milliseconds())) } +func (w *parameters) SetTokenThreshold(t float32) { w.p.SetTokenThreshold(t) } +func (w *parameters) SetTokenSumThreshold(t float32) { w.p.SetTokenSumThreshold(t) } +func (w *parameters) SetMaxSegmentLength(n uint) { w.p.SetMaxSegmentLength(int(n)) } +func (w *parameters) SetTokenTimestamps(b bool) { w.p.SetTokenTimestamps(b) } +func (w *parameters) SetMaxTokensPerSegment(n uint) { w.p.SetMaxTokensPerSegment(int(n)) } +func (w *parameters) SetAudioCtx(n uint) { w.p.SetAudioCtx(int(n)) } +func (w *parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) } +func (w *parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) } +func (w *parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) } +func (w *parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) } +func (w *parameters) SetTemperature(t float32) { w.p.SetTemperature(t) } +func (w *parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) } + +func (w *parameters) SetLanguage(lang string) error { + if lang == "auto" { + return w.p.SetLanguage(-1) + } + id := whisper.Whisper_lang_id_str(lang) + if id < 0 { + return ErrUnsupportedLanguage + } + return w.p.SetLanguage(id) +} + +func (w *parameters) SetSingleSegment(v bool) { + w.p.SetSingleSegment(v) +} + +// Getter methods for Parameters interface +func (w *parameters) Language() string { + id := w.p.Language() + if id == -1 { + return "auto" + } + + return whisper.Whisper_lang_str(id) +} + +func (w *parameters) Threads() int { + return w.p.Threads() +} + +func (w *parameters) WhisperParams() *whisper.Params { + return w.p +} + +var _ Parameters = ¶meters{} diff --git a/bindings/go/pkg/whisper/state.go b/bindings/go/pkg/whisper/state.go deleted file mode 100644 index 4ae81baf790..00000000000 --- a/bindings/go/pkg/whisper/state.go +++ /dev/null @@ -1,125 +0,0 @@ -package whisper - -import ( - "io" - "strings" - "time" - - // Bindings - whisper "github.com/ggerganov/whisper.cpp/bindings/go" -) - -// state embeds context behavior and carries a low-level state pointer -// for isolated processing results. -type state struct { - *context - st *whisper.State -} - -// NewState creates a new per-request State from a Model without changing the Model interface. -func NewState(m Model) (State, error) { - impl, ok := m.(*model) - if !ok { - return nil, ErrInternalAppError - } - params := impl.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) - params.SetTranslate(false) - params.SetPrintSpecial(false) - params.SetPrintProgress(false) - params.SetPrintRealtime(false) - params.SetPrintTimestamps(false) - return newState(impl, params) -} - -// internal constructor used by model.NewState -func newState(model *model, params whisper.Params) (State, error) { - ctx := &context{model: model, params: params} - st := model.ctx.Whisper_init_state() - if st == nil { - return nil, ErrUnableToCreateState - } - return &state{context: ctx, st: st}, nil -} - -// Process using an isolated state for concurrency -func (s *state) Process( - data []float32, - callEncoderBegin EncoderBeginCallback, - callNewSegment SegmentCallback, - callProgress ProgressCallback, -) error { - if s.model.ctx == nil || s.st == nil { - return ErrInternalAppError - } - if callNewSegment != nil { - s.params.SetSingleSegment(true) - } - if err := s.model.ctx.Whisper_full_with_state(s.st, s.params, data, callEncoderBegin, - func(new int) { - if callNewSegment != nil { - num_segments := s.model.ctx.Whisper_full_n_segments_from_state(s.st) - s0 := num_segments - new - for i := s0; i < num_segments; i++ { - callNewSegment(toSegmentFromState(s.model.ctx, s.st, i)) - } - } - }, func(progress int) { - if callProgress != nil { - callProgress(progress) - } - }); err != nil { - return err - } - return nil -} - -// Return the next segment of tokens for state -func (s *state) NextSegment() (Segment, error) { - if s.model.ctx == nil { - return Segment{}, ErrInternalAppError - } - if s.n >= s.model.ctx.Whisper_full_n_segments_from_state(s.st) { - return Segment{}, io.EOF - } - result := toSegmentFromState(s.model.ctx, s.st, s.n) - s.n++ - return result, nil -} - -func (s *state) Close() error { - if s.st != nil { - s.st.Whisper_free_state() - s.st = nil - } - return nil -} - -// Helpers specific to state-based results -func toSegmentFromState(ctx *whisper.Context, st *whisper.State, n int) Segment { - return Segment{ - Num: n, - Text: stringsTrim(ctx.Whisper_full_get_segment_text_from_state(st, n)), - Start: duration10x(ctx.Whisper_full_get_segment_t0_from_state(st, n)), - End: duration10x(ctx.Whisper_full_get_segment_t1_from_state(st, n)), - Tokens: toTokensFromState(ctx, st, n), - } -} - -func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { - result := make([]Token, ctx.Whisper_full_n_tokens_from_state(st, n)) - for i := 0; i < len(result); i++ { - data := ctx.Whisper_full_get_token_data_from_state(st, n, i) - result[i] = Token{ - Id: int(ctx.Whisper_full_get_token_id_from_state(st, n, i)), - Text: ctx.Whisper_full_get_token_text_from_state(st, n, i), - P: ctx.Whisper_full_get_token_p_from_state(st, n, i), - Start: duration10x(data.T0()), - End: duration10x(data.T1()), - } - } - return result -} - -// small shared helpers to avoid importing time/strings here unnecessarily -func stringsTrim(s string) string { return strings.TrimSpace(s) } -func duration10x(ms10 int64) time.Duration { return time.Duration(ms10) * time.Millisecond * 10 } diff --git a/bindings/go/pkg/whisper/state_test.go b/bindings/go/pkg/whisper/state_test.go deleted file mode 100644 index f893b8d2b87..00000000000 --- a/bindings/go/pkg/whisper/state_test.go +++ /dev/null @@ -1,128 +0,0 @@ -package whisper_test - -import ( - "os" - "sync" - "testing" - - "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" - "github.com/go-audio/wav" - assert "github.com/stretchr/testify/assert" -) - -func TestState_Process(t *testing.T) { - assert := assert.New(t) - - if _, err := os.Stat(ModelPath); os.IsNotExist(err) { - t.Skip("Skipping test, model not found:", ModelPath) - } - if _, err := os.Stat(SamplePath); os.IsNotExist(err) { - t.Skip("Skipping test, sample not found:", SamplePath) - } - - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer fh.Close() - - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data - - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer model.Close() - - st, err := whisper.NewState(model) - assert.NoError(err) - assert.NotNil(st) - defer func() { _ = st.Close() }() - - err = st.Process(data, nil, nil, nil) - assert.NoError(err) - - seg, err := st.NextSegment() - assert.NoError(err) - assert.NotEmpty(seg.Text) -} - -func TestState_Parallel_DifferentInputs(t *testing.T) { - assert := assert.New(t) - - if _, err := os.Stat(ModelPath); os.IsNotExist(err) { - t.Skip("Skipping test, model not found:", ModelPath) - } - if _, err := os.Stat(SamplePath); os.IsNotExist(err) { - t.Skip("Skipping test, sample not found:", SamplePath) - } - - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer fh.Close() - - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data - assert.Greater(len(data), 10) - - // Create half-sample (second half) - half := make([]float32, len(data)/2) - copy(half, data[len(data)/2:]) - - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer model.Close() - - st1, err := whisper.NewState(model) - assert.NoError(err) - st2, err := whisper.NewState(model) - assert.NoError(err) - defer func() { _ = st1.Close() }() - defer func() { _ = st2.Close() }() - - // Run in parallel, but guard core call to respect context safety - var wg sync.WaitGroup - var first1, first2 string - var e1, e2 error - - wg.Add(2) - - // No mutex needed because each state is isolated - go func() { - defer wg.Done() - e1 = st1.Process(data, nil, nil, nil) - if e1 == nil { - seg, err := st1.NextSegment() - if err == nil { - first1 = seg.Text - } else { - e1 = err - } - } - }() - - go func() { - defer wg.Done() - e2 = st2.Process(half, nil, nil, nil) - if e2 == nil { - seg, err := st2.NextSegment() - if err == nil { - first2 = seg.Text - } else { - e2 = err - } - } - }() - - wg.Wait() - assert.NoError(e1) - assert.NoError(e2) - assert.NotEmpty(first1) - assert.NotEmpty(first2) - assert.NotEqual(first1, first2, "first segments should differ for different inputs") -} diff --git a/bindings/go/pkg/whisper/token_identifier.go b/bindings/go/pkg/whisper/token_identifier.go new file mode 100644 index 00000000000..059386c1e8d --- /dev/null +++ b/bindings/go/pkg/whisper/token_identifier.go @@ -0,0 +1,115 @@ +package whisper + +import whisper "github.com/ggerganov/whisper.cpp/bindings/go" + +type tokenIdentifier struct { + ctx *whisperCtx +} + +func newTokenIdentifier(whisperContext *whisperCtx) *tokenIdentifier { + return &tokenIdentifier{ + ctx: whisperContext, + } +} + +// Token type checking methods (model-specific vocabulary) +func (ti *tokenIdentifier) IsBEG(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_beg(), nil +} + +func (ti *tokenIdentifier) IsEOT(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_eot(), nil +} + +func (ti *tokenIdentifier) IsSOT(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_sot(), nil +} + +func (ti *tokenIdentifier) IsPREV(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_prev(), nil +} + +func (ti *tokenIdentifier) IsSOLM(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_solm(), nil +} + +func (ti *tokenIdentifier) IsNOT(t Token) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + return whisper.Token(t.Id) == ctx.Whisper_token_not(), nil +} + +func (ti *tokenIdentifier) IsLANG(t Token, lang string) (bool, error) { + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + if id := ctx.Whisper_lang_id(lang); id >= 0 { + return whisper.Token(t.Id) == ctx.Whisper_token_lang(id), nil + } + + return false, nil +} + +func (ti *tokenIdentifier) IsText(t Token) (bool, error) { + // Check if it's any of the special tokens + if isBeg, _ := ti.IsBEG(t); isBeg { + return false, nil + } + + if isSot, _ := ti.IsSOT(t); isSot { + return false, nil + } + + ctx, err := ti.ctx.UnsafeContext() + if err != nil { + return false, err + } + + if whisper.Token(t.Id) >= ctx.Whisper_token_eot() { + return false, nil + } + + if isPrev, _ := ti.IsPREV(t); isPrev { + return false, nil + } + + if isSolm, _ := ti.IsSOLM(t); isSolm { + return false, nil + } + + if isNot, _ := ti.IsNOT(t); isNot { + return false, nil + } + + return true, nil +} diff --git a/bindings/go/pkg/whisper/whisper_ctx.go b/bindings/go/pkg/whisper/whisper_ctx.go new file mode 100644 index 00000000000..7e570bc3d6f --- /dev/null +++ b/bindings/go/pkg/whisper/whisper_ctx.go @@ -0,0 +1,49 @@ +package whisper + +import whisper "github.com/ggerganov/whisper.cpp/bindings/go" + +type WhisperContext interface { + // Close closes the whisper context + Close() error + + // IsClosed returns true if the whisper context is closed + IsClosed() bool + + // UnsafeContext returns the raw whisper context + UnsafeContext() (*whisper.Context, error) +} + +type whisperCtx struct { + ctx *whisper.Context +} + +func newWhisperCtx(ctx *whisper.Context) *whisperCtx { + return &whisperCtx{ + ctx: ctx, + } +} + +func (ctx *whisperCtx) Close() error { + if ctx.ctx == nil { + return nil + } + + ctx.ctx.Whisper_free() + ctx.ctx = nil + + return nil +} + +func (ctx *whisperCtx) IsClosed() bool { + return ctx.ctx == nil +} + +func (ctx *whisperCtx) UnsafeContext() (*whisper.Context, error) { + if ctx.IsClosed() { + return nil, ErrModelClosed + } + + return ctx.ctx, nil +} + +var _ WhisperContext = (*whisperCtx)(nil) diff --git a/bindings/go/pkg/whisper/whisper_state.go b/bindings/go/pkg/whisper/whisper_state.go new file mode 100644 index 00000000000..24ca4ed44d7 --- /dev/null +++ b/bindings/go/pkg/whisper/whisper_state.go @@ -0,0 +1,37 @@ +package whisper + +import whisper "github.com/ggerganov/whisper.cpp/bindings/go" + +type WhisperState interface { + Close() error + UnsafeState() (*whisper.State, error) +} + +type whisperState struct { + state *whisper.State +} + +func newWhisperState(state *whisper.State) WhisperState { + return &whisperState{ + state: state, + } +} + +func (s *whisperState) Close() error { + if s.state == nil { + return nil + } + + s.state.Whisper_free_state() + s.state = nil + + return nil +} + +func (s *whisperState) UnsafeState() (*whisper.State, error) { + if s.state == nil { + return nil, ErrModelClosed + } + + return s.state, nil +} diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go index cb5907ffe06..b6ef48a8531 100644 --- a/bindings/go/whisper.go +++ b/bindings/go/whisper.go @@ -232,6 +232,10 @@ func (ctx *Context) Whisper_lang_id(lang string) int { return int(C.whisper_lang_id(C.CString(lang))) } +func Whisper_lang_id_str(lang string) int { + return int(C.whisper_lang_id(C.CString(lang))) +} + // Largest language id (i.e. number of available languages - 1) func Whisper_lang_max_id() int { return int(C.whisper_lang_max_id()) From 97e6ce2bc4155c28a07706cfc27c88e782eac7aa Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 13 Sep 2025 22:28:14 +0300 Subject: [PATCH 03/19] feat(go bindings): add VAD and Diarization parameters --- bindings/go/params.go | 38 +++++++++ bindings/go/pkg/whisper/consts.go | 7 ++ bindings/go/pkg/whisper/context.go | 17 ++-- bindings/go/pkg/whisper/context_test.go | 109 ++++++++++++++++++++---- bindings/go/pkg/whisper/interface.go | 32 ++++++- bindings/go/pkg/whisper/model.go | 65 ++++++++++++-- bindings/go/pkg/whisper/params_wrap.go | 26 +++++- bindings/go/whisper.go | 5 ++ 8 files changed, 264 insertions(+), 35 deletions(-) diff --git a/bindings/go/params.go b/bindings/go/params.go index 95c5bfaf934..8f669e34383 100644 --- a/bindings/go/params.go +++ b/bindings/go/params.go @@ -47,6 +47,44 @@ func (p *Params) SetPrintTimestamps(v bool) { p.print_timestamps = toBool(v) } +// Enable tinydiarize speaker turn detection +func (p *Params) SetDiarize(v bool) { + p.tdrz_enable = toBool(v) +} + +// Voice Activity Detection (VAD) +func (p *Params) SetVAD(v bool) { + p.vad = toBool(v) +} + +func (p *Params) SetVADModelPath(path string) { + p.vad_model_path = C.CString(path) +} + +func (p *Params) SetVADThreshold(t float32) { + p.vad_params.threshold = C.float(t) +} + +func (p *Params) SetVADMinSpeechMs(ms int) { + p.vad_params.min_speech_duration_ms = C.int(ms) +} + +func (p *Params) SetVADMinSilenceMs(ms int) { + p.vad_params.min_silence_duration_ms = C.int(ms) +} + +func (p *Params) SetVADMaxSpeechSec(s float32) { + p.vad_params.max_speech_duration_s = C.float(s) +} + +func (p *Params) SetVADSpeechPadMs(ms int) { + p.vad_params.speech_pad_ms = C.int(ms) +} + +func (p *Params) SetVADSamplesOverlap(sec float32) { + p.vad_params.samples_overlap = C.float(sec) +} + // Set language id func (p *Params) SetLanguage(lang int) error { if lang == -1 { diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index 0af45ee8c20..ee002cff047 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -28,3 +28,10 @@ const SampleRate = whisper.SampleRate // SampleBits is the number of bytes per sample. const SampleBits = whisper.SampleBits + +type SamplingStrategy whisper.SamplingStrategy + +const ( + SAMPLING_GREEDY SamplingStrategy = SamplingStrategy(whisper.SAMPLING_GREEDY) + SAMPLING_BEAM_SEARCH SamplingStrategy = SamplingStrategy(whisper.SAMPLING_BEAM_SEARCH) +) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 01a510fe742..09b35be68e7 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -19,11 +19,11 @@ type context struct { Parameters } -func newContext(model Model, params whisper.Params) (Context, error) { +func newContext(model Model, params Parameters) (Context, error) { c := new(context) c.model = model - c.params = newParameters(¶ms) + c.params = params c.Parameters = c.params // allocate isolated state per context @@ -132,7 +132,7 @@ func (context *context) Process( context.params.SetSingleSegment(true) } - lowLevelParams := context.params.WhisperParams() + lowLevelParams := context.params.UnsafeParams() if lowLevelParams == nil { return fmt.Errorf("lowLevelParams is nil: %w", ErrInternalAppError) } @@ -249,11 +249,12 @@ func (context *context) IsLANG(t Token, lang string) bool { // State-backed helper functions func toSegmentFromState(ctx *whisper.Context, st *whisper.State, n int) Segment { return Segment{ - Num: n, - Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text_from_state(st, n)), - Start: time.Duration(ctx.Whisper_full_get_segment_t0_from_state(st, n)) * time.Millisecond * 10, - End: time.Duration(ctx.Whisper_full_get_segment_t1_from_state(st, n)) * time.Millisecond * 10, - Tokens: toTokensFromState(ctx, st, n), + Num: n, + Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text_from_state(st, n)), + Start: time.Duration(ctx.Whisper_full_get_segment_t0_from_state(st, n)) * time.Millisecond * 10, + End: time.Duration(ctx.Whisper_full_get_segment_t1_from_state(st, n)) * time.Millisecond * 10, + Tokens: toTokensFromState(ctx, st, n), + SpeakerTurnNext: ctx.Whisper_full_get_segment_speaker_turn_next_from_state(st, n), } } diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index 305446624f9..f94c9139d6e 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -17,7 +17,7 @@ func TestSetLanguage(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -35,7 +35,7 @@ func TestContextModelIsMultilingual(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -54,7 +54,7 @@ func TestLanguage(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -72,7 +72,7 @@ func TestProcess(t *testing.T) { fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() // Decode the WAV file - load the full buffer dec := wav.NewDecoder(fh) @@ -85,7 +85,7 @@ func TestProcess(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -99,7 +99,7 @@ func TestDetectedLanguage(t *testing.T) { fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() // Decode the WAV file - load the full buffer dec := wav.NewDecoder(fh) @@ -112,7 +112,7 @@ func TestDetectedLanguage(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -139,7 +139,7 @@ func TestContext_ConcurrentProcessing(t *testing.T) { fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() dec := wav.NewDecoder(fh) buf, err := dec.FullPCMBuffer() @@ -150,12 +150,12 @@ func TestContext_ConcurrentProcessing(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() ctx, err := model.NewContext() assert.NoError(err) assert.NotNil(ctx) - defer ctx.Close() + defer func() { _ = ctx.Close() }() err = ctx.Process(data, nil, nil, nil) assert.NoError(err) @@ -179,7 +179,7 @@ func TestContext_Parallel_DifferentInputs(t *testing.T) { fh, err := os.Open(SamplePath) assert.NoError(err) - defer fh.Close() + defer func() { _ = fh.Close() }() dec := wav.NewDecoder(fh) buf, err := dec.FullPCMBuffer() @@ -195,14 +195,14 @@ func TestContext_Parallel_DifferentInputs(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() ctx1, err := model.NewContext() assert.NoError(err) - defer ctx1.Close() + defer func() { _ = ctx1.Close() }() ctx2, err := model.NewContext() assert.NoError(err) - defer ctx2.Close() + defer func() { _ = ctx2.Close() }() // Run in parallel - each context has isolated whisper_state var wg sync.WaitGroup @@ -258,7 +258,7 @@ func TestContext_Close(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() ctx, err := model.NewContext() assert.NoError(err) @@ -294,3 +294,82 @@ func Test_Close_Context_of_Closed_Model(t *testing.T) { require.NoError(t, model.Close()) require.NoError(t, ctx.Close()) } + +func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer func() { _ = fh.Close() }() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + + model, err := whisper.New(ModelPath) + assert.NoError(err) + defer func() { _ = model.Close() }() + + ctx, err := model.NewContext() + assert.NoError(err) + defer func() { _ = ctx.Close() }() + + p := ctx.Params() + p.SetDiarize(true) + p.SetVAD(true) + p.SetVADThreshold(0.5) + p.SetVADMinSpeechMs(200) + p.SetVADMinSilenceMs(100) + p.SetVADMaxSpeechSec(10) + p.SetVADSpeechPadMs(30) + p.SetVADSamplesOverlap(0.02) + + err = ctx.Process(data, nil, nil, nil) + assert.NoError(err) +} + +func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + fh, err := os.Open(SamplePath) + assert.NoError(err) + defer func() { _ = fh.Close() }() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.NoError(err) + assert.Equal(uint16(1), dec.NumChans) + data := buf.AsFloat32Buffer().Data + + model, err := whisper.New(ModelPath) + assert.NoError(err) + defer func() { _ = model.Close() }() + + ctx, err := model.NewContext() + assert.NoError(err) + defer func() { _ = ctx.Close() }() + + err = ctx.Process(data, nil, nil, nil) + assert.NoError(err) + + seg, err := ctx.NextSegment() + assert.NoError(err) + t.Logf("SpeakerTurnNext: %v", seg.SpeakerTurnNext) + _ = seg.SpeakerTurnNext // ensure field exists and is readable +} diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 30a41db3e50..4aa679c6666 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -48,6 +48,8 @@ type TokenIdentifier interface { IsText(Token) (bool, error) } +type ParamsConfigure func(Parameters) + // Model is the interface to a whisper model. Create a new model with the // function whisper.New(string) type Model interface { @@ -56,6 +58,11 @@ type Model interface { // Return a new speech-to-text context. NewContext() (Context, error) + NewParams( + sampling SamplingStrategy, + configure ParamsConfigure, + ) (Parameters, error) + // Return true if the model is multilingual. IsMultilingual() bool @@ -94,6 +101,25 @@ type Parameters interface { SetEntropyThold(t float32) SetInitialPrompt(prompt string) + SetNoContext(bool) + SetPrintSpecial(bool) + SetPrintProgress(bool) + SetPrintRealtime(bool) + SetPrintTimestamps(bool) + + // Diarization (tinydiarize) + SetDiarize(bool) + + // Voice Activity Detection (VAD) + SetVAD(bool) + SetVADModelPath(string) + SetVADThreshold(float32) + SetVADMinSpeechMs(int) + SetVADMinSilenceMs(int) + SetVADMaxSpeechSec(float32) + SetVADSpeechPadMs(int) + SetVADSamplesOverlap(float32) + // Set the temperature SetTemperature(t float32) @@ -108,7 +134,8 @@ type Parameters interface { // Getter methods Language() string Threads() int - WhisperParams() *whisper.Params + + UnsafeParams() *whisper.Params } // Context is the speech recognition context. @@ -231,6 +258,9 @@ type Segment struct { // The tokens of the segment. Tokens []Token + + // True if the next segment is predicted as a speaker turn (tinydiarize) + SpeakerTurnNext bool } // Token is a text or special token diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 752c7cf02bb..203ec7b20b3 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -3,7 +3,6 @@ package whisper import ( "fmt" "os" - "runtime" // Bindings whisper "github.com/ggerganov/whisper.cpp/bindings/go" @@ -88,24 +87,72 @@ func (model *model) Languages() []string { // NewContext creates a new speech-to-text context. // Each context is backed by an isolated whisper_state for safe concurrent processing. func (model *model) NewContext() (Context, error) { - ctx, err := model.ctx.UnsafeContext() + // Create new context with default params + params, err := model.newParams(SAMPLING_GREEDY, nil) if err != nil { - return nil, ErrModelClosed + return nil, err } - // Create new context with default params - params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY) + // Return new context (now state-backed) + return newContext( + model, + params, + ) +} + +func (model *model) NewParams( + sampling SamplingStrategy, + configure ParamsConfigure, +) (Parameters, error) { + return model.newParams(sampling, nil) +} + +// NewContextWithParams creates a new speech-to-text context and allows +// callers to customize the decoding parameters before the state is used. +// The resulting Context is backed by an isolated whisper_state for safe +// concurrent processing. +func (model *model) NewContextWithParams( + sampling SamplingStrategy, + configure ParamsConfigure, +) (Context, error) { + params, err := model.newParams(sampling, configure) + if err != nil { + return nil, err + } + return newContext( + model, + params, + ) +} + +func defaultParamsConfigure(params Parameters) { params.SetTranslate(false) params.SetPrintSpecial(false) params.SetPrintProgress(false) params.SetPrintRealtime(false) params.SetPrintTimestamps(false) - params.SetThreads(runtime.NumCPU()) - params.SetNoContext(true) +} - // Return new context (now state-backed) - return newContext(model, params) +func (m *model) newParams( + sampling SamplingStrategy, + configure ParamsConfigure, +) (Parameters, error) { + ctx, err := m.ctx.UnsafeContext() + if err != nil { + return nil, ErrModelClosed + } + + p := ctx.Whisper_full_default_params(whisper.SamplingStrategy(sampling)) + safeParams := newParameters(&p) + + defaultParamsConfigure(safeParams) + + if configure != nil { + configure(safeParams) + } + + return safeParams, nil } // PrintTimings prints the model performance timings to stdout. diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go index 55a41453765..ac3a1ccc1a9 100644 --- a/bindings/go/pkg/whisper/params_wrap.go +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -13,7 +13,11 @@ type parameters struct { p *whisper.Params } -func newParameters(p *whisper.Params) Parameters { return ¶meters{p: p} } +func newParameters(whisperParams *whisper.Params) Parameters { + return ¶meters{ + p: whisperParams, + } +} func (w *parameters) SetTranslate(v bool) { w.p.SetTranslate(v) } func (w *parameters) SetSplitOnWord(v bool) { w.p.SetSplitOnWord(v) } @@ -32,6 +36,24 @@ func (w *parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) func (w *parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) } func (w *parameters) SetTemperature(t float32) { w.p.SetTemperature(t) } func (w *parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) } +func (w *parameters) SetNoContext(v bool) { w.p.SetNoContext(v) } +func (w *parameters) SetPrintSpecial(v bool) { w.p.SetPrintSpecial(v) } +func (w *parameters) SetPrintProgress(v bool) { w.p.SetPrintProgress(v) } +func (w *parameters) SetPrintRealtime(v bool) { w.p.SetPrintRealtime(v) } +func (w *parameters) SetPrintTimestamps(v bool) { w.p.SetPrintTimestamps(v) } + +// Diarization (tinydiarize) +func (w *parameters) SetDiarize(v bool) { w.p.SetDiarize(v) } + +// Voice Activity Detection (VAD) +func (w *parameters) SetVAD(v bool) { w.p.SetVAD(v) } +func (w *parameters) SetVADModelPath(p string) { w.p.SetVADModelPath(p) } +func (w *parameters) SetVADThreshold(t float32) { w.p.SetVADThreshold(t) } +func (w *parameters) SetVADMinSpeechMs(ms int) { w.p.SetVADMinSpeechMs(ms) } +func (w *parameters) SetVADMinSilenceMs(ms int) { w.p.SetVADMinSilenceMs(ms) } +func (w *parameters) SetVADMaxSpeechSec(s float32) { w.p.SetVADMaxSpeechSec(s) } +func (w *parameters) SetVADSpeechPadMs(ms int) { w.p.SetVADSpeechPadMs(ms) } +func (w *parameters) SetVADSamplesOverlap(sec float32) { w.p.SetVADSamplesOverlap(sec) } func (w *parameters) SetLanguage(lang string) error { if lang == "auto" { @@ -62,7 +84,7 @@ func (w *parameters) Threads() int { return w.p.Threads() } -func (w *parameters) WhisperParams() *whisper.Params { +func (w *parameters) UnsafeParams() *whisper.Params { return w.p } diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go index b6ef48a8531..83089a26f12 100644 --- a/bindings/go/whisper.go +++ b/bindings/go/whisper.go @@ -533,6 +533,11 @@ func (ctx *Context) Whisper_get_logits_from_state(state *State) []float32 { return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_logits_from_state((*C.struct_whisper_state)(state))))[:ctx.Whisper_n_vocab()] } +// Get whether the next segment is predicted as a speaker turn (tinydiarize) +func (ctx *Context) Whisper_full_get_segment_speaker_turn_next_from_state(state *State, segment int) bool { + return bool(C.whisper_full_get_segment_speaker_turn_next_from_state((*C.struct_whisper_state)(state), C.int(segment))) +} + /////////////////////////////////////////////////////////////////////////////// // CALLBACKS From 2f16f8039d22c3c963c1fbf359282cf2bddfe915 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 00:20:10 +0300 Subject: [PATCH 04/19] refactor(go bindings): add diarization unit tests --- bindings/go/.gitignore | 1 + bindings/go/Makefile | 11 +++ .../go/examples/go-model-download/main.go | 14 +++- bindings/go/params.go | 5 ++ bindings/go/pkg/whisper/context.go | 3 + bindings/go/pkg/whisper/context_test.go | 71 +++++++++++++++++++ bindings/go/pkg/whisper/interface.go | 7 ++ bindings/go/pkg/whisper/params_wrap.go | 1 + bindings/go/pkg/whisper/util_test.go | 6 +- 9 files changed, 114 insertions(+), 5 deletions(-) diff --git a/bindings/go/.gitignore b/bindings/go/.gitignore index 036df1d3b0d..20a1b6e48e3 100644 --- a/bindings/go/.gitignore +++ b/bindings/go/.gitignore @@ -1,2 +1,3 @@ build models +samples/a13.wav diff --git a/bindings/go/Makefile b/bindings/go/Makefile index e4436a6a291..c9ab66255f1 100644 --- a/bindings/go/Makefile +++ b/bindings/go/Makefile @@ -49,6 +49,9 @@ examples: $(EXAMPLES_DIR) model-small: mkdir examples/go-model-download @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin +model-small-tdrz: mkdir examples/go-model-download + @${BUILD_DIR}/go-model-download -out models ggml-small.en-tdrz.bin + $(EXAMPLES_DIR): mkdir whisper modtidy @echo Build example $(notdir $@) ifeq ($(UNAME_S),Darwin) @@ -57,6 +60,14 @@ else @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@ endif +.PHONY: samples +samples: + @echo "Downloading samples..." + @mkdir -p samples + @wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3 + @ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav + @rm samples/a13.mp3 + mkdir: @echo Mkdir ${BUILD_DIR} @install -d ${BUILD_DIR} diff --git a/bindings/go/examples/go-model-download/main.go b/bindings/go/examples/go-model-download/main.go index 728c6df53d4..6ecd1a26840 100644 --- a/bindings/go/examples/go-model-download/main.go +++ b/bindings/go/examples/go-model-download/main.go @@ -18,9 +18,10 @@ import ( // CONSTANTS const ( - srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models - srcExt = ".bin" // Filename extension - bufSize = 1024 * 64 // Size of the buffer used for downloading the model + srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models + srcUrlTinydiarize = "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/" + srcExt = ".bin" // Filename extension + bufSize = 1024 * 64 // Size of the buffer used for downloading the model ) var ( @@ -38,6 +39,7 @@ var ( "large-v2", "large-v2-q5_0", "large-v2-q8_0", "large-v3", "large-v3-q5_0", "large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0", + "small.en-tdrz", } ) @@ -219,6 +221,12 @@ func URLForModel(model string) (string, error) { model += srcExt } + srcUrl := srcUrl + + if strings.Contains(model, "tdrz") { + srcUrl = srcUrlTinydiarize + } + // Parse the base URL url, err := url.Parse(srcUrl) if err != nil { diff --git a/bindings/go/params.go b/bindings/go/params.go index 8f669e34383..07801649300 100644 --- a/bindings/go/params.go +++ b/bindings/go/params.go @@ -47,6 +47,11 @@ func (p *Params) SetPrintTimestamps(v bool) { p.print_timestamps = toBool(v) } +// Enable extra debug information +func (p *Params) SetDebugMode(v bool) { + p.debug_mode = toBool(v) +} + // Enable tinydiarize speaker turn detection func (p *Params) SetDiarize(v bool) { p.tdrz_enable = toBool(v) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 09b35be68e7..7e20948156f 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -3,6 +3,7 @@ package whisper import ( "fmt" "io" + "log" "runtime" "strings" "time" @@ -137,6 +138,8 @@ func (context *context) Process( return fmt.Errorf("lowLevelParams is nil: %w", ErrInternalAppError) } + log.Println("lowLevelParams", lowLevelParams) + st, err := context.st.UnsafeState() if err != nil { return err diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index f94c9139d6e..3c3df9601a5 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -1,6 +1,7 @@ package whisper_test import ( + "io" "os" "sync" "testing" @@ -337,6 +338,76 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.NoError(err) } +func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { + fh, err := os.Open(MultiSpeakerSamplePath) + require.NoError(t, err) + defer func() { _ = fh.Close() }() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + assert.Equal(t, uint16(1), dec.NumChans) + require.NoError(t, err) + data := buf.AsFloat32Buffer().Data + + model, err := whisper.New(ModelTinydiarizePath) + require.NoError(t, err) + defer func() { _ = model.Close() }() + + // diarize ON with beam search and tighter segmentation + ctxOn, err := model.NewContextWithParams(whisper.SAMPLING_GREEDY, func(p whisper.Parameters) { + p.SetDiarize(true) + p.SetVAD(false) + p.SetSplitOnWord(true) + p.SetMaxSegmentLength(1) + p.SetMaxTokensPerSegment(64) + p.SetTokenTimestamps(true) + }) + require.NoError(t, err) + defer func() { _ = ctxOn.Close() }() + + require.NoError(t, ctxOn.Process(data, nil, nil, nil)) + var turnsOn int + for { + seg, err := ctxOn.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + if seg.SpeakerTurnNext { + turnsOn++ + } + } + require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled") + + // diarize OFF baseline with same segmentation and beam + ctxOff, err := model.NewContextWithParams(whisper.SAMPLING_BEAM_SEARCH, func(p whisper.Parameters) { + p.SetBeamSize(3) + p.SetDiarize(false) + p.SetVAD(false) + p.SetSplitOnWord(true) + p.SetMaxSegmentLength(40) + p.SetMaxTokensPerSegment(64) + p.SetTokenTimestamps(true) + }) + require.NoError(t, err) + defer func() { _ = ctxOff.Close() }() + + require.NoError(t, ctxOff.Process(data, nil, nil, nil)) + var turnsOff int + for { + seg, err := ctxOff.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + if seg.SpeakerTurnNext { + turnsOff++ + } + } + + require.GreaterOrEqual(t, turnsOn, turnsOff, "diarization should not reduce turn boundaries") +} + func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) { assert := assert.New(t) diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 4aa679c6666..9040718e013 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -63,6 +63,10 @@ type Model interface { configure ParamsConfigure, ) (Parameters, error) + // Return a new speech-to-text context configured via the provided function + // and sampling strategy. The context is backed by an isolated whisper_state. + NewContextWithParams(sampling SamplingStrategy, configure ParamsConfigure) (Context, error) + // Return true if the model is multilingual. IsMultilingual() bool @@ -107,6 +111,9 @@ type Parameters interface { SetPrintRealtime(bool) SetPrintTimestamps(bool) + // Enable extra debug info (e.g., dump log_mel) + SetDebugMode(bool) + // Diarization (tinydiarize) SetDiarize(bool) diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go index ac3a1ccc1a9..17b70b19e92 100644 --- a/bindings/go/pkg/whisper/params_wrap.go +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -41,6 +41,7 @@ func (w *parameters) SetPrintSpecial(v bool) { w.p.SetPrintSpecial(v) func (w *parameters) SetPrintProgress(v bool) { w.p.SetPrintProgress(v) } func (w *parameters) SetPrintRealtime(v bool) { w.p.SetPrintRealtime(v) } func (w *parameters) SetPrintTimestamps(v bool) { w.p.SetPrintTimestamps(v) } +func (w *parameters) SetDebugMode(v bool) { w.p.SetDebugMode(v) } // Diarization (tinydiarize) func (w *parameters) SetDiarize(v bool) { w.p.SetDiarize(v) } diff --git a/bindings/go/pkg/whisper/util_test.go b/bindings/go/pkg/whisper/util_test.go index 8ea2d5b4781..1b27255ae70 100644 --- a/bindings/go/pkg/whisper/util_test.go +++ b/bindings/go/pkg/whisper/util_test.go @@ -1,6 +1,8 @@ package whisper_test const ( - ModelPath = "../../models/ggml-small.en.bin" - SamplePath = "../../samples/jfk.wav" + ModelPath = "../../models/ggml-small.en.bin" + ModelTinydiarizePath = "../../models/ggml-small.en-tdrz.bin" + SamplePath = "../../samples/jfk.wav" + MultiSpeakerSamplePath = "../../samples/a13.wav" ) From 01f5c6b708121e5fd1381ea642e28f246afdf1ee Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 01:47:57 +0300 Subject: [PATCH 05/19] refactor(go bindings): remove public method accessing unsafe whisper --- bindings/go/pkg/whisper/context.go | 31 ++++--- bindings/go/pkg/whisper/interface.go | 73 +++++++++++----- bindings/go/pkg/whisper/model.go | 16 ++-- bindings/go/pkg/whisper/model_test.go | 8 +- bindings/go/pkg/whisper/params_wrap.go | 6 +- bindings/go/pkg/whisper/token_identifier.go | 16 ++-- bindings/go/pkg/whisper/whisper_ctx.go | 5 +- bindings/go/pkg/whisper/whisper_ctx_test.go | 85 +++++++++++++++++++ bindings/go/pkg/whisper/whisper_state_test.go | 53 ++++++++++++ 9 files changed, 233 insertions(+), 60 deletions(-) create mode 100644 bindings/go/pkg/whisper/whisper_ctx_test.go create mode 100644 bindings/go/pkg/whisper/whisper_state_test.go diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 7e20948156f..e48617114a7 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -3,7 +3,6 @@ package whisper import ( "fmt" "io" - "log" "runtime" "strings" "time" @@ -14,13 +13,13 @@ import ( type context struct { n int - model Model + model *model st WhisperState - params Parameters + params *parameters Parameters } -func newContext(model Model, params Parameters) (Context, error) { +func newContext(model *model, params *parameters) (Context, error) { c := new(context) c.model = model @@ -28,7 +27,7 @@ func newContext(model Model, params Parameters) (Context, error) { c.Parameters = c.params // allocate isolated state per context - ctx, err := model.WhisperContext().UnsafeContext() + ctx, err := model.whisperContext().unsafeContext() if err != nil { return nil, err } @@ -46,7 +45,7 @@ func newContext(model Model, params Parameters) (Context, error) { // DetectedLanguage returns the detected language for the current context data func (context *context) DetectedLanguage() string { - ctx, err := context.model.WhisperContext().UnsafeContext() + ctx, err := context.model.whisperContext().unsafeContext() if err != nil { return "" } @@ -98,7 +97,7 @@ func (context *context) SystemInfo() string { // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. // Returns the probabilities of all languages for this context's state. func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) { - ctx, err := context.model.WhisperContext().UnsafeContext() + ctx, err := context.model.whisperContext().unsafeContext() if err != nil { return nil, err } @@ -123,7 +122,7 @@ func (context *context) Process( callNewSegment SegmentCallback, callProgress ProgressCallback, ) error { - ctx, err := context.model.WhisperContext().UnsafeContext() + ctx, err := context.model.whisperContext().unsafeContext() if err != nil { return err } @@ -133,13 +132,11 @@ func (context *context) Process( context.params.SetSingleSegment(true) } - lowLevelParams := context.params.UnsafeParams() - if lowLevelParams == nil { - return fmt.Errorf("lowLevelParams is nil: %w", ErrInternalAppError) + lowLevelParams, err := context.params.unsafeParams() + if err != nil { + return err } - log.Println("lowLevelParams", lowLevelParams) - st, err := context.st.UnsafeState() if err != nil { return err @@ -168,7 +165,7 @@ func (context *context) Process( // NextSegment returns the next segment from the context buffer func (context *context) NextSegment() (Segment, error) { - ctx, err := context.model.WhisperContext().UnsafeContext() + ctx, err := context.model.whisperContext().unsafeContext() if err != nil { return Segment{}, err } @@ -277,3 +274,9 @@ func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { return result } + +func (context *context) Model() Model { + return context.model +} + +var _ Context = (*context)(nil) diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 9040718e013..56974769cea 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -3,8 +3,6 @@ package whisper import ( "io" "time" - - whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) /////////////////////////////////////////////////////////////////////////////// @@ -56,8 +54,13 @@ type Model interface { io.Closer // Return a new speech-to-text context. + // It may return an error is the model is not loaded or closed NewContext() (Context, error) + // Return a new parameters wrapper + // sampling is the sampling strategy to use + // configure is the function to configure the parameters + // It may return an error is the model is not loaded or closed NewParams( sampling SamplingStrategy, configure ParamsConfigure, @@ -65,9 +68,14 @@ type Model interface { // Return a new speech-to-text context configured via the provided function // and sampling strategy. The context is backed by an isolated whisper_state. - NewContextWithParams(sampling SamplingStrategy, configure ParamsConfigure) (Context, error) + // It may return an error is the model is not loaded or closed + NewContextWithParams( + sampling SamplingStrategy, + configure ParamsConfigure, + ) (Context, error) // Return true if the model is multilingual. + // It returns false if the model is not loaded or closed IsMultilingual() bool // Return all languages supported. @@ -81,6 +89,8 @@ type Model interface { ResetTimings() // WhisperContext returns the memory-safe whisper context wrapper of the raw whisper context + // You may need to use this to get the raw whisper context + // Ot check that the model's context is not closed WhisperContext() WhisperContext // Token identifier @@ -113,7 +123,6 @@ type Parameters interface { // Enable extra debug info (e.g., dump log_mel) SetDebugMode(bool) - // Diarization (tinydiarize) SetDiarize(bool) @@ -133,6 +142,9 @@ type Parameters interface { // Set the fallback temperature incrementation // Pass -1.0 to disable this feature SetTemperatureFallback(t float32) + + // Set the language + // If the model is not multilingual, this will return an error SetLanguage(string) error // Set single segment mode @@ -141,34 +153,39 @@ type Parameters interface { // Getter methods Language() string Threads() int - - UnsafeParams() *whisper.Params } // Context is the speech recognition context. type Context interface { io.Closer + // Deprecated: Use Params().SetLanguage() instead SetLanguage(string) error // Deprecated: Use Params().SetTranslate() instead SetTranslate(bool) + // Deprecated: Use Params().SetSplitOnWord() instead SetSplitOnWord(bool) + // Deprecated: Use Params().SetThreads() instead SetThreads(uint) // Deprecated: Use Params().SetOffset() instead SetOffset(time.Duration) + // Deprecated: Use Params().SetDuration() instead SetDuration(time.Duration) + // Deprecated: Use Params().SetTokenThreshold() instead SetTokenThreshold(float32) // Deprecated: Use Params().SetTokenSumThreshold() instead SetTokenSumThreshold(float32) // Deprecated: Use Params().SetMaxSegmentLength() instead + SetMaxSegmentLength(uint) + // Deprecated: Use Params().SetTokenTimestamps() instead SetTokenTimestamps(bool) @@ -200,7 +217,10 @@ type Context interface { // Deprecated: Use Params().Language() instead Language() string - // Return true if the model is multilingual. + // Return the model that the context is backed by + Model() Model + + // Deprecated: Use Model().IsMultilingual() instead IsMultilingual() bool // Get detected language @@ -215,37 +235,39 @@ type Context interface { // is reached, when io.EOF is returned. NextSegment() (Segment, error) - // Deprecated token methods - use Model.IsBEG(), Model.IsSOT(), etc. instead - // Deprecated: Use Model.IsBEG() instead + // Deprecated: Use Model().TokenIdentifier().IsBEG() instead IsBEG(Token) bool - // Deprecated: Use Model.IsSOT() instead + // Deprecated: Use Model().TokenIdentifier().IsSOT() instead IsSOT(Token) bool - // Deprecated: Use Model.IsEOT() instead + // Deprecated: Use Model().TokenIdentifier().IsEOT() instead IsEOT(Token) bool - // Deprecated: Use Model.IsPREV() instead + // Deprecated: Use Model().TokenIdentifier().IsPREV() instead IsPREV(Token) bool - // Deprecated: Use Model.IsSOLM() instead + // Deprecated: Use Model().TokenIdentifier().IsSOLM() instead IsSOLM(Token) bool - // Deprecated: Use Model.IsNOT() instead + // Deprecated: Use Model().TokenIdentifier().IsNOT() instead IsNOT(Token) bool - // Deprecated: Use Model.IsLANG() instead + // Deprecated: Use Model().TokenIdentifier().IsLANG() instead IsLANG(Token, string) bool - // Deprecated: Use Model.IsText() instead + // Deprecated: Use Model().TokenIdentifier().IsText() instead IsText(Token) bool - // Deprecated: Use Model.PrintTimings() instead - these are model-level performance metrics + // Deprecated: Use Model().PrintTimings() instead + // these are model-level performance metrics PrintTimings() - // Deprecated: Use Model.ResetTimings() instead - these are model-level performance metrics + // Deprecated: Use Model().ResetTimings() instead + // these are model-level performance metrics ResetTimings() + // SystemInfo returns the system information SystemInfo() string // Params returns a high-level parameters wrapper - preferred method @@ -267,13 +289,22 @@ type Segment struct { Tokens []Token // True if the next segment is predicted as a speaker turn (tinydiarize) + // It works only with the diarization supporting models (like small.en-tdrz.bin) with the diarization enabled + // using Parameters.SetDiarize(true) SpeakerTurnNext bool } // Token is a text or special token type Token struct { - Id int - Text string - P float32 + // ID of the token + Id int + + // Text of the token + Text string + + // Probability of the token + P float32 + + // Timestamp of the token Start, End time.Duration } diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 203ec7b20b3..988ae15224a 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -41,6 +41,10 @@ func (model *model) WhisperContext() WhisperContext { return model.ctx } +func (model *model) whisperContext() *whisperCtx { + return model.ctx +} + /////////////////////////////////////////////////////////////////////////////// // STRINGIFY @@ -58,7 +62,7 @@ func (model *model) String() string { // Return true if model is multilingual (language and translation options are supported) func (model *model) IsMultilingual() bool { - ctx, err := model.ctx.UnsafeContext() + ctx, err := model.ctx.unsafeContext() if err != nil { return false } @@ -68,7 +72,7 @@ func (model *model) IsMultilingual() bool { // Return all recognized languages. Initially it is set to auto-detect func (model *model) Languages() []string { - ctx, err := model.ctx.UnsafeContext() + ctx, err := model.ctx.unsafeContext() if err != nil { return nil } @@ -137,8 +141,8 @@ func defaultParamsConfigure(params Parameters) { func (m *model) newParams( sampling SamplingStrategy, configure ParamsConfigure, -) (Parameters, error) { - ctx, err := m.ctx.UnsafeContext() +) (*parameters, error) { + ctx, err := m.ctx.unsafeContext() if err != nil { return nil, ErrModelClosed } @@ -157,7 +161,7 @@ func (m *model) newParams( // PrintTimings prints the model performance timings to stdout. func (model *model) PrintTimings() { - ctx, err := model.ctx.UnsafeContext() + ctx, err := model.ctx.unsafeContext() if err != nil { return } @@ -167,7 +171,7 @@ func (model *model) PrintTimings() { // ResetTimings resets the model performance timing counters. func (model *model) ResetTimings() { - ctx, err := model.ctx.UnsafeContext() + ctx, err := model.ctx.unsafeContext() if err != nil { return } diff --git a/bindings/go/pkg/whisper/model_test.go b/bindings/go/pkg/whisper/model_test.go index 8797f0d0fd0..d080d8fe301 100644 --- a/bindings/go/pkg/whisper/model_test.go +++ b/bindings/go/pkg/whisper/model_test.go @@ -13,7 +13,7 @@ func TestNew(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() }) @@ -42,7 +42,7 @@ func TestNewContext(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() context, err := model.NewContext() assert.NoError(err) @@ -55,7 +55,7 @@ func TestIsMultilingual(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() isMultilingual := model.IsMultilingual() @@ -71,7 +71,7 @@ func TestLanguages(t *testing.T) { model, err := whisper.New(ModelPath) assert.NoError(err) assert.NotNil(model) - defer model.Close() + defer func() { _ = model.Close() }() expectedLanguages := []string{ "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go index 17b70b19e92..ad772d88b60 100644 --- a/bindings/go/pkg/whisper/params_wrap.go +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -13,7 +13,7 @@ type parameters struct { p *whisper.Params } -func newParameters(whisperParams *whisper.Params) Parameters { +func newParameters(whisperParams *whisper.Params) *parameters { return ¶meters{ p: whisperParams, } @@ -85,8 +85,8 @@ func (w *parameters) Threads() int { return w.p.Threads() } -func (w *parameters) UnsafeParams() *whisper.Params { - return w.p +func (w *parameters) unsafeParams() (*whisper.Params, error) { + return w.p, nil } var _ Parameters = ¶meters{} diff --git a/bindings/go/pkg/whisper/token_identifier.go b/bindings/go/pkg/whisper/token_identifier.go index 059386c1e8d..13867273d51 100644 --- a/bindings/go/pkg/whisper/token_identifier.go +++ b/bindings/go/pkg/whisper/token_identifier.go @@ -14,7 +14,7 @@ func newTokenIdentifier(whisperContext *whisperCtx) *tokenIdentifier { // Token type checking methods (model-specific vocabulary) func (ti *tokenIdentifier) IsBEG(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -23,7 +23,7 @@ func (ti *tokenIdentifier) IsBEG(t Token) (bool, error) { } func (ti *tokenIdentifier) IsEOT(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -32,7 +32,7 @@ func (ti *tokenIdentifier) IsEOT(t Token) (bool, error) { } func (ti *tokenIdentifier) IsSOT(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -41,7 +41,7 @@ func (ti *tokenIdentifier) IsSOT(t Token) (bool, error) { } func (ti *tokenIdentifier) IsPREV(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -50,7 +50,7 @@ func (ti *tokenIdentifier) IsPREV(t Token) (bool, error) { } func (ti *tokenIdentifier) IsSOLM(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -59,7 +59,7 @@ func (ti *tokenIdentifier) IsSOLM(t Token) (bool, error) { } func (ti *tokenIdentifier) IsNOT(t Token) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -68,7 +68,7 @@ func (ti *tokenIdentifier) IsNOT(t Token) (bool, error) { } func (ti *tokenIdentifier) IsLANG(t Token, lang string) (bool, error) { - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } @@ -90,7 +90,7 @@ func (ti *tokenIdentifier) IsText(t Token) (bool, error) { return false, nil } - ctx, err := ti.ctx.UnsafeContext() + ctx, err := ti.ctx.unsafeContext() if err != nil { return false, err } diff --git a/bindings/go/pkg/whisper/whisper_ctx.go b/bindings/go/pkg/whisper/whisper_ctx.go index 7e570bc3d6f..bdc166cf8b0 100644 --- a/bindings/go/pkg/whisper/whisper_ctx.go +++ b/bindings/go/pkg/whisper/whisper_ctx.go @@ -8,9 +8,6 @@ type WhisperContext interface { // IsClosed returns true if the whisper context is closed IsClosed() bool - - // UnsafeContext returns the raw whisper context - UnsafeContext() (*whisper.Context, error) } type whisperCtx struct { @@ -38,7 +35,7 @@ func (ctx *whisperCtx) IsClosed() bool { return ctx.ctx == nil } -func (ctx *whisperCtx) UnsafeContext() (*whisper.Context, error) { +func (ctx *whisperCtx) unsafeContext() (*whisper.Context, error) { if ctx.IsClosed() { return nil, ErrModelClosed } diff --git a/bindings/go/pkg/whisper/whisper_ctx_test.go b/bindings/go/pkg/whisper/whisper_ctx_test.go new file mode 100644 index 00000000000..68a094bd26b --- /dev/null +++ b/bindings/go/pkg/whisper/whisper_ctx_test.go @@ -0,0 +1,85 @@ +package whisper + +import ( + "os" + "testing" + + w "github.com/ggerganov/whisper.cpp/bindings/go" + assert "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const testModelPathCtx = "../../models/ggml-small.en.bin" + +func TestWhisperCtx_NilWrapper(t *testing.T) { + wctx := newWhisperCtx(nil) + + assert.True(t, wctx.IsClosed()) + + raw, err := wctx.unsafeContext() + assert.Nil(t, raw) + require.ErrorIs(t, err, ErrModelClosed) + + require.NoError(t, wctx.Close()) + // idempotent + require.NoError(t, wctx.Close()) +} + +func TestWhisperCtx_Lifecycle(t *testing.T) { + if _, err := os.Stat(testModelPathCtx); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", testModelPathCtx) + } + + raw := w.Whisper_init(testModelPathCtx) + require.NotNil(t, raw) + + wctx := newWhisperCtx(raw) + assert.False(t, wctx.IsClosed()) + + got, err := wctx.unsafeContext() + require.NoError(t, err) + require.NotNil(t, got) + + // close frees underlying ctx and marks closed + require.NoError(t, wctx.Close()) + assert.True(t, wctx.IsClosed()) + + got, err = wctx.unsafeContext() + assert.Nil(t, got) + require.ErrorIs(t, err, ErrModelClosed) + + // idempotent + require.NoError(t, wctx.Close()) + // no further free; raw already freed by wctx.Close() +} + +func TestWhisperCtx_FromModelLifecycle(t *testing.T) { + if _, err := os.Stat(testModelPathCtx); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", testModelPathCtx) + } + + modelNew, err := New(testModelPathCtx) + require.NoError(t, err) + require.NotNil(t, modelNew) + + model := modelNew.(*model) + + wc := model.whisperContext() + require.NotNil(t, wc) + + // Should be usable before model.Close + raw, err := wc.unsafeContext() + require.NoError(t, err) + require.NotNil(t, raw) + + // Close model should close underlying context + require.NoError(t, model.Close()) + + assert.True(t, wc.IsClosed()) + raw, err = wc.unsafeContext() + assert.Nil(t, raw) + require.ErrorIs(t, err, ErrModelClosed) + + // Idempotent close on wrapper + require.NoError(t, wc.Close()) +} diff --git a/bindings/go/pkg/whisper/whisper_state_test.go b/bindings/go/pkg/whisper/whisper_state_test.go new file mode 100644 index 00000000000..37d828acb37 --- /dev/null +++ b/bindings/go/pkg/whisper/whisper_state_test.go @@ -0,0 +1,53 @@ +package whisper + +import ( + "os" + "testing" + + w "github.com/ggerganov/whisper.cpp/bindings/go" + assert "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const testModelPathState = "../../models/ggml-small.en.bin" + +func TestWhisperState_NilWrapper(t *testing.T) { + ws := newWhisperState(nil) + + state, err := ws.UnsafeState() + assert.Nil(t, state) + require.ErrorIs(t, err, ErrModelClosed) + + require.NoError(t, ws.Close()) + // idempotent + require.NoError(t, ws.Close()) +} + +func TestWhisperState_Lifecycle(t *testing.T) { + if _, err := os.Stat(testModelPathState); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", testModelPathState) + } + + ctx := w.Whisper_init(testModelPathState) + require.NotNil(t, ctx) + defer ctx.Whisper_free() + + state := ctx.Whisper_init_state() + require.NotNil(t, state) + + ws := newWhisperState(state) + + got, err := ws.UnsafeState() + require.NoError(t, err) + require.NotNil(t, got) + + // close frees underlying state and marks closed + require.NoError(t, ws.Close()) + + got, err = ws.UnsafeState() + assert.Nil(t, got) + require.ErrorIs(t, err, ErrModelClosed) + + // idempotent + require.NoError(t, ws.Close()) +} From ba990ab0a89825b3a6b28f9c07a6859b1f81b134 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:04:06 +0300 Subject: [PATCH 06/19] refactor(go bindings): fix backward compatibility for error andling logic --- bindings/go/pkg/whisper/consts.go | 9 ++++++--- bindings/go/pkg/whisper/context.go | 5 +++++ bindings/go/pkg/whisper/context_test.go | 9 +++++++++ bindings/go/pkg/whisper/model_test.go | 14 ++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index ee002cff047..c6d5ad388d9 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -11,13 +11,16 @@ import ( // ERRORS var ( - ErrUnableToLoadModel = errors.New("unable to load model") - ErrInternalAppError = errors.New("internal application error") + ErrUnableToLoadModel = errors.New("unable to load model") + + // Deprecated: Use ErrModelClosed instead for checking the model is closed error + ErrInternalAppError = errors.New("internal application error") + ErrProcessingFailed = errors.New("processing failed") ErrUnsupportedLanguage = errors.New("unsupported language") ErrModelNotMultilingual = errors.New("model is not multilingual") ErrUnableToCreateState = errors.New("unable to create state") - ErrModelClosed = errors.New("model has been closed") + ErrModelClosed = errors.Join(errors.New("model has been closed"), ErrInternalAppError) ) /////////////////////////////////////////////////////////////////////////////// diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index e48617114a7..a18bb95ed18 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -233,6 +233,11 @@ func (context *context) IsNOT(t Token) bool { } func (context *context) SetLanguage(lang string) error { + if context.model.whisperContext().IsClosed() { + // TODO: remove this logic after deprecating the ErrInternalAppError + return ErrModelClosed + } + if !context.model.IsMultilingual() { return ErrModelNotMultilingual } diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index 3c3df9601a5..1cda693d9ec 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -273,9 +273,18 @@ func TestContext_Close(t *testing.T) { err = ctx.Process([]float32{0.1, 0.2, 0.3}, nil, nil, nil) require.ErrorIs(t, err, whisper.ErrModelClosed) + // TODO: remove this logic after deprecating the ErrInternalAppError + require.ErrorIs(t, err, whisper.ErrInternalAppError) + lang := ctx.DetectedLanguage() require.Empty(t, lang) + _, err = ctx.NextSegment() + assert.ErrorIs(err, whisper.ErrModelClosed) + + // TODO: remove this logic after deprecating the ErrInternalAppError + assert.ErrorIs(err, whisper.ErrInternalAppError) + // Multiple closes should be safe err = ctx.Close() require.NoError(t, err) diff --git a/bindings/go/pkg/whisper/model_test.go b/bindings/go/pkg/whisper/model_test.go index d080d8fe301..bd152088060 100644 --- a/bindings/go/pkg/whisper/model_test.go +++ b/bindings/go/pkg/whisper/model_test.go @@ -49,6 +49,20 @@ func TestNewContext(t *testing.T) { assert.NotNil(context) } +func TestNewContext_ClosedModel(t *testing.T) { + assert := assert.New(t) + + model, err := whisper.New(ModelPath) + assert.NoError(err) + assert.NotNil(model) + assert.NoError(model.Close()) + + context, err := model.NewContext() + assert.ErrorIs(err, whisper.ErrInternalAppError) + assert.ErrorIs(err, whisper.ErrModelClosed) + assert.Nil(context) +} + func TestIsMultilingual(t *testing.T) { assert := assert.New(t) From 221e93a5d7267798553daa9ab1ff9ade694a1e68 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:31:10 +0300 Subject: [PATCH 07/19] refactor(go bindings): fix backward compatibility for the interfaces --- bindings/go/pkg/whisper/context.go | 26 ++--- bindings/go/pkg/whisper/context_test.go | 23 ++-- bindings/go/pkg/whisper/interface.go | 149 ++++++++++-------------- bindings/go/pkg/whisper/model.go | 67 ++--------- bindings/go/pkg/whisper/params_wrap.go | 118 ++++++++++++------- 5 files changed, 164 insertions(+), 219 deletions(-) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index a18bb95ed18..6f162877ff8 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -12,19 +12,17 @@ import ( ) type context struct { - n int - model *model - st WhisperState - params *parameters - Parameters + n int + model *model + st WhisperState + *Parameters } -func newContext(model *model, params *parameters) (Context, error) { +func NewContext(model *model, params *Parameters) (*context, error) { c := new(context) c.model = model - c.params = params - c.Parameters = c.params + c.Parameters = params // allocate isolated state per context ctx, err := model.whisperContext().unsafeContext() @@ -68,8 +66,8 @@ func (context *context) Close() error { } // Params returns a high-level parameters wrapper -func (context *context) Params() Parameters { - return context.params +func (context *context) Params() *Parameters { + return context.Parameters } // ResetTimings resets the model performance timing counters. @@ -87,7 +85,7 @@ func (context *context) PrintTimings() { // SystemInfo returns the system information func (context *context) SystemInfo() string { return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n", - context.params.Threads(), + context.Parameters.Threads(), runtime.NumCPU(), whisper.Whisper_print_system_info(), ) @@ -129,10 +127,10 @@ func (context *context) Process( // If the callback is defined then we force on single_segment mode if callNewSegment != nil { - context.params.SetSingleSegment(true) + context.Parameters.SetSingleSegment(true) } - lowLevelParams, err := context.params.unsafeParams() + lowLevelParams, err := context.Parameters.unsafeParams() if err != nil { return err } @@ -242,7 +240,7 @@ func (context *context) SetLanguage(lang string) error { return ErrModelNotMultilingual } - return context.params.SetLanguage(lang) + return context.Parameters.SetLanguage(lang) } // Deprecated: Use Model.IsLANG() instead - token checking is model-specific. diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index 1cda693d9ec..70f941b2983 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -325,11 +325,11 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.Equal(uint16(1), dec.NumChans) data := buf.AsFloat32Buffer().Data - model, err := whisper.New(ModelPath) + model, err := whisper.NewModel(ModelPath) assert.NoError(err) defer func() { _ = model.Close() }() - ctx, err := model.NewContext() + ctx, err := whisper.NewContext(model, nil) assert.NoError(err) defer func() { _ = ctx.Close() }() @@ -358,12 +358,11 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { require.NoError(t, err) data := buf.AsFloat32Buffer().Data - model, err := whisper.New(ModelTinydiarizePath) + model, err := whisper.NewModel(ModelTinydiarizePath) require.NoError(t, err) defer func() { _ = model.Close() }() - // diarize ON with beam search and tighter segmentation - ctxOn, err := model.NewContextWithParams(whisper.SAMPLING_GREEDY, func(p whisper.Parameters) { + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) { p.SetDiarize(true) p.SetVAD(false) p.SetSplitOnWord(true) @@ -372,6 +371,10 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { p.SetTokenTimestamps(true) }) require.NoError(t, err) + + // diarize ON with beam search and tighter segmentation + ctxOn, err := whisper.NewContext(model, params) + require.NoError(t, err) defer func() { _ = ctxOn.Close() }() require.NoError(t, ctxOn.Process(data, nil, nil, nil)) @@ -389,15 +392,7 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled") // diarize OFF baseline with same segmentation and beam - ctxOff, err := model.NewContextWithParams(whisper.SAMPLING_BEAM_SEARCH, func(p whisper.Parameters) { - p.SetBeamSize(3) - p.SetDiarize(false) - p.SetVAD(false) - p.SetSplitOnWord(true) - p.SetMaxSegmentLength(40) - p.SetMaxTokensPerSegment(64) - p.SetTokenTimestamps(true) - }) + ctxOff, err := whisper.NewContext(model, params) require.NoError(t, err) defer func() { _ = ctxOff.Close() }() diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 56974769cea..9a0cecfb810 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -46,34 +46,19 @@ type TokenIdentifier interface { IsText(Token) (bool, error) } -type ParamsConfigure func(Parameters) +type ParamsConfigure func(*Parameters) // Model is the interface to a whisper model. Create a new model with the // function whisper.New(string) +// Deprecated: Use NewModel implementation struct instead of relying on this interface type Model interface { io.Closer // Return a new speech-to-text context. // It may return an error is the model is not loaded or closed + // Deprecated: Use NewContext implementation struct instead of relying on this interface NewContext() (Context, error) - // Return a new parameters wrapper - // sampling is the sampling strategy to use - // configure is the function to configure the parameters - // It may return an error is the model is not loaded or closed - NewParams( - sampling SamplingStrategy, - configure ParamsConfigure, - ) (Parameters, error) - - // Return a new speech-to-text context configured via the provided function - // and sampling strategy. The context is backed by an isolated whisper_state. - // It may return an error is the model is not loaded or closed - NewContextWithParams( - sampling SamplingStrategy, - configure ParamsConfigure, - ) (Context, error) - // Return true if the model is multilingual. // It returns false if the model is not loaded or closed IsMultilingual() bool @@ -87,73 +72,65 @@ type Model interface { // Reset model performance timing counters ResetTimings() - - // WhisperContext returns the memory-safe whisper context wrapper of the raw whisper context - // You may need to use this to get the raw whisper context - // Ot check that the model's context is not closed - WhisperContext() WhisperContext - - // Token identifier - TokenIdentifier() TokenIdentifier } -// Parameters configures decode / processing behavior -type Parameters interface { - SetTranslate(bool) - SetSplitOnWord(bool) - SetThreads(uint) - SetOffset(time.Duration) - SetDuration(time.Duration) - SetTokenThreshold(float32) - SetTokenSumThreshold(float32) - SetMaxSegmentLength(uint) - SetTokenTimestamps(bool) - SetMaxTokensPerSegment(uint) - SetAudioCtx(uint) - SetMaxContext(n int) - SetBeamSize(n int) - SetEntropyThold(t float32) - SetInitialPrompt(prompt string) - - SetNoContext(bool) - SetPrintSpecial(bool) - SetPrintProgress(bool) - SetPrintRealtime(bool) - SetPrintTimestamps(bool) - - // Enable extra debug info (e.g., dump log_mel) - SetDebugMode(bool) - // Diarization (tinydiarize) - SetDiarize(bool) - - // Voice Activity Detection (VAD) - SetVAD(bool) - SetVADModelPath(string) - SetVADThreshold(float32) - SetVADMinSpeechMs(int) - SetVADMinSilenceMs(int) - SetVADMaxSpeechSec(float32) - SetVADSpeechPadMs(int) - SetVADSamplesOverlap(float32) - - // Set the temperature - SetTemperature(t float32) - - // Set the fallback temperature incrementation - // Pass -1.0 to disable this feature - SetTemperatureFallback(t float32) - - // Set the language - // If the model is not multilingual, this will return an error - SetLanguage(string) error - - // Set single segment mode - SetSingleSegment(bool) - - // Getter methods - Language() string - Threads() int -} +// // Parameters configures decode / processing behavior +// type Parameters interface { +// SetTranslate(bool) +// SetSplitOnWord(bool) +// SetThreads(uint) +// SetOffset(time.Duration) +// SetDuration(time.Duration) +// SetTokenThreshold(float32) +// SetTokenSumThreshold(float32) +// SetMaxSegmentLength(uint) +// SetTokenTimestamps(bool) +// SetMaxTokensPerSegment(uint) +// SetAudioCtx(uint) +// SetMaxContext(n int) +// SetBeamSize(n int) +// SetEntropyThold(t float32) +// SetInitialPrompt(prompt string) + +// SetNoContext(bool) +// SetPrintSpecial(bool) +// SetPrintProgress(bool) +// SetPrintRealtime(bool) +// SetPrintTimestamps(bool) + +// // Enable extra debug info (e.g., dump log_mel) +// SetDebugMode(bool) +// // Diarization (tinydiarize) +// SetDiarize(bool) + +// // Voice Activity Detection (VAD) +// SetVAD(bool) +// SetVADModelPath(string) +// SetVADThreshold(float32) +// SetVADMinSpeechMs(int) +// SetVADMinSilenceMs(int) +// SetVADMaxSpeechSec(float32) +// SetVADSpeechPadMs(int) +// SetVADSamplesOverlap(float32) + +// // Set the temperature +// SetTemperature(t float32) + +// // Set the fallback temperature incrementation +// // Pass -1.0 to disable this feature +// SetTemperatureFallback(t float32) + +// // Set the language +// // If the model is not multilingual, this will return an error +// SetLanguage(string) error + +// // Set single segment mode +// SetSingleSegment(bool) + +// // Getter methods +// Language() string +// Threads() int +// } // Context is the speech recognition context. type Context interface { @@ -217,9 +194,6 @@ type Context interface { // Deprecated: Use Params().Language() instead Language() string - // Return the model that the context is backed by - Model() Model - // Deprecated: Use Model().IsMultilingual() instead IsMultilingual() bool @@ -269,9 +243,6 @@ type Context interface { // SystemInfo returns the system information SystemInfo() string - - // Params returns a high-level parameters wrapper - preferred method - Params() Parameters } // Segment is the text result of a speech recognition. diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 988ae15224a..d9aabdaf7a0 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -17,7 +17,15 @@ type model struct { // Make sure model adheres to the interface var _ Model = (*model)(nil) +// Deprecated: Use NewModel instead func New(path string) (Model, error) { + return NewModel(path) +} + +// NewModel creates a new model without initializing the context +func NewModel( + path string, +) (*model, error) { model := new(model) if _, err := os.Stat(path); err != nil { return nil, err @@ -92,73 +100,18 @@ func (model *model) Languages() []string { // Each context is backed by an isolated whisper_state for safe concurrent processing. func (model *model) NewContext() (Context, error) { // Create new context with default params - params, err := model.newParams(SAMPLING_GREEDY, nil) + params, err := NewParameters(model, SAMPLING_GREEDY, nil) if err != nil { return nil, err } // Return new context (now state-backed) - return newContext( - model, - params, - ) -} - -func (model *model) NewParams( - sampling SamplingStrategy, - configure ParamsConfigure, -) (Parameters, error) { - return model.newParams(sampling, nil) -} - -// NewContextWithParams creates a new speech-to-text context and allows -// callers to customize the decoding parameters before the state is used. -// The resulting Context is backed by an isolated whisper_state for safe -// concurrent processing. -func (model *model) NewContextWithParams( - sampling SamplingStrategy, - configure ParamsConfigure, -) (Context, error) { - params, err := model.newParams(sampling, configure) - if err != nil { - return nil, err - } - - return newContext( + return NewContext( model, params, ) } -func defaultParamsConfigure(params Parameters) { - params.SetTranslate(false) - params.SetPrintSpecial(false) - params.SetPrintProgress(false) - params.SetPrintRealtime(false) - params.SetPrintTimestamps(false) -} - -func (m *model) newParams( - sampling SamplingStrategy, - configure ParamsConfigure, -) (*parameters, error) { - ctx, err := m.ctx.unsafeContext() - if err != nil { - return nil, ErrModelClosed - } - - p := ctx.Whisper_full_default_params(whisper.SamplingStrategy(sampling)) - safeParams := newParameters(&p) - - defaultParamsConfigure(safeParams) - - if configure != nil { - configure(safeParams) - } - - return safeParams, nil -} - // PrintTimings prints the model performance timings to stdout. func (model *model) PrintTimings() { ctx, err := model.ctx.unsafeContext() diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go index ad772d88b60..05a1f04bceb 100644 --- a/bindings/go/pkg/whisper/params_wrap.go +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -1,62 +1,92 @@ package whisper import ( + "runtime" "time" // Bindings whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) -// parameters is a high-level wrapper that implements the Parameters interface +// Parameters is a high-level wrapper that implements the Parameters interface // and delegates to the underlying low-level whisper.Params. -type parameters struct { +type Parameters struct { p *whisper.Params } -func newParameters(whisperParams *whisper.Params) *parameters { - return ¶meters{ - p: whisperParams, +func defaultParamsConfigure(params *Parameters) { + params.SetTranslate(false) + params.SetPrintSpecial(false) + params.SetPrintProgress(false) + params.SetPrintRealtime(false) + params.SetPrintTimestamps(false) + // Default behavior backward compatibility + params.SetThreads(uint(runtime.NumCPU())) + params.SetNoContext(true) +} + +func NewParameters( + model *model, + sampling SamplingStrategy, + configure ParamsConfigure, +) (*Parameters, error) { + ctx, err := model.ctx.unsafeContext() + if err != nil { + return nil, ErrModelClosed + } + + p := ctx.Whisper_full_default_params(whisper.SamplingStrategy(sampling)) + safeParams := &Parameters{ + p: &p, + } + + defaultParamsConfigure(safeParams) + + if configure != nil { + configure(safeParams) } + + return safeParams, nil } -func (w *parameters) SetTranslate(v bool) { w.p.SetTranslate(v) } -func (w *parameters) SetSplitOnWord(v bool) { w.p.SetSplitOnWord(v) } -func (w *parameters) SetThreads(v uint) { w.p.SetThreads(int(v)) } -func (w *parameters) SetOffset(d time.Duration) { w.p.SetOffset(int(d.Milliseconds())) } -func (w *parameters) SetDuration(d time.Duration) { w.p.SetDuration(int(d.Milliseconds())) } -func (w *parameters) SetTokenThreshold(t float32) { w.p.SetTokenThreshold(t) } -func (w *parameters) SetTokenSumThreshold(t float32) { w.p.SetTokenSumThreshold(t) } -func (w *parameters) SetMaxSegmentLength(n uint) { w.p.SetMaxSegmentLength(int(n)) } -func (w *parameters) SetTokenTimestamps(b bool) { w.p.SetTokenTimestamps(b) } -func (w *parameters) SetMaxTokensPerSegment(n uint) { w.p.SetMaxTokensPerSegment(int(n)) } -func (w *parameters) SetAudioCtx(n uint) { w.p.SetAudioCtx(int(n)) } -func (w *parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) } -func (w *parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) } -func (w *parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) } -func (w *parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) } -func (w *parameters) SetTemperature(t float32) { w.p.SetTemperature(t) } -func (w *parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) } -func (w *parameters) SetNoContext(v bool) { w.p.SetNoContext(v) } -func (w *parameters) SetPrintSpecial(v bool) { w.p.SetPrintSpecial(v) } -func (w *parameters) SetPrintProgress(v bool) { w.p.SetPrintProgress(v) } -func (w *parameters) SetPrintRealtime(v bool) { w.p.SetPrintRealtime(v) } -func (w *parameters) SetPrintTimestamps(v bool) { w.p.SetPrintTimestamps(v) } -func (w *parameters) SetDebugMode(v bool) { w.p.SetDebugMode(v) } +func (w *Parameters) SetTranslate(v bool) { w.p.SetTranslate(v) } +func (w *Parameters) SetSplitOnWord(v bool) { w.p.SetSplitOnWord(v) } +func (w *Parameters) SetThreads(v uint) { w.p.SetThreads(int(v)) } +func (w *Parameters) SetOffset(d time.Duration) { w.p.SetOffset(int(d.Milliseconds())) } +func (w *Parameters) SetDuration(d time.Duration) { w.p.SetDuration(int(d.Milliseconds())) } +func (w *Parameters) SetTokenThreshold(t float32) { w.p.SetTokenThreshold(t) } +func (w *Parameters) SetTokenSumThreshold(t float32) { w.p.SetTokenSumThreshold(t) } +func (w *Parameters) SetMaxSegmentLength(n uint) { w.p.SetMaxSegmentLength(int(n)) } +func (w *Parameters) SetTokenTimestamps(b bool) { w.p.SetTokenTimestamps(b) } +func (w *Parameters) SetMaxTokensPerSegment(n uint) { w.p.SetMaxTokensPerSegment(int(n)) } +func (w *Parameters) SetAudioCtx(n uint) { w.p.SetAudioCtx(int(n)) } +func (w *Parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) } +func (w *Parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) } +func (w *Parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) } +func (w *Parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) } +func (w *Parameters) SetTemperature(t float32) { w.p.SetTemperature(t) } +func (w *Parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) } +func (w *Parameters) SetNoContext(v bool) { w.p.SetNoContext(v) } +func (w *Parameters) SetPrintSpecial(v bool) { w.p.SetPrintSpecial(v) } +func (w *Parameters) SetPrintProgress(v bool) { w.p.SetPrintProgress(v) } +func (w *Parameters) SetPrintRealtime(v bool) { w.p.SetPrintRealtime(v) } +func (w *Parameters) SetPrintTimestamps(v bool) { w.p.SetPrintTimestamps(v) } +func (w *Parameters) SetDebugMode(v bool) { w.p.SetDebugMode(v) } // Diarization (tinydiarize) -func (w *parameters) SetDiarize(v bool) { w.p.SetDiarize(v) } +func (w *Parameters) SetDiarize(v bool) { w.p.SetDiarize(v) } // Voice Activity Detection (VAD) -func (w *parameters) SetVAD(v bool) { w.p.SetVAD(v) } -func (w *parameters) SetVADModelPath(p string) { w.p.SetVADModelPath(p) } -func (w *parameters) SetVADThreshold(t float32) { w.p.SetVADThreshold(t) } -func (w *parameters) SetVADMinSpeechMs(ms int) { w.p.SetVADMinSpeechMs(ms) } -func (w *parameters) SetVADMinSilenceMs(ms int) { w.p.SetVADMinSilenceMs(ms) } -func (w *parameters) SetVADMaxSpeechSec(s float32) { w.p.SetVADMaxSpeechSec(s) } -func (w *parameters) SetVADSpeechPadMs(ms int) { w.p.SetVADSpeechPadMs(ms) } -func (w *parameters) SetVADSamplesOverlap(sec float32) { w.p.SetVADSamplesOverlap(sec) } - -func (w *parameters) SetLanguage(lang string) error { +func (w *Parameters) SetVAD(v bool) { w.p.SetVAD(v) } +func (w *Parameters) SetVADModelPath(p string) { w.p.SetVADModelPath(p) } +func (w *Parameters) SetVADThreshold(t float32) { w.p.SetVADThreshold(t) } +func (w *Parameters) SetVADMinSpeechMs(ms int) { w.p.SetVADMinSpeechMs(ms) } +func (w *Parameters) SetVADMinSilenceMs(ms int) { w.p.SetVADMinSilenceMs(ms) } +func (w *Parameters) SetVADMaxSpeechSec(s float32) { w.p.SetVADMaxSpeechSec(s) } +func (w *Parameters) SetVADSpeechPadMs(ms int) { w.p.SetVADSpeechPadMs(ms) } +func (w *Parameters) SetVADSamplesOverlap(sec float32) { w.p.SetVADSamplesOverlap(sec) } + +func (w *Parameters) SetLanguage(lang string) error { if lang == "auto" { return w.p.SetLanguage(-1) } @@ -67,12 +97,12 @@ func (w *parameters) SetLanguage(lang string) error { return w.p.SetLanguage(id) } -func (w *parameters) SetSingleSegment(v bool) { +func (w *Parameters) SetSingleSegment(v bool) { w.p.SetSingleSegment(v) } // Getter methods for Parameters interface -func (w *parameters) Language() string { +func (w *Parameters) Language() string { id := w.p.Language() if id == -1 { return "auto" @@ -81,12 +111,10 @@ func (w *parameters) Language() string { return whisper.Whisper_lang_str(id) } -func (w *parameters) Threads() int { +func (w *Parameters) Threads() int { return w.p.Threads() } -func (w *parameters) unsafeParams() (*whisper.Params, error) { +func (w *Parameters) unsafeParams() (*whisper.Params, error) { return w.p, nil } - -var _ Parameters = ¶meters{} From b751ec1f5579fe443f2182d5e6c9bbdb3be5e419 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:41:28 +0300 Subject: [PATCH 08/19] fix(go bindings): unit tests and exported interfaces --- bindings/go/pkg/whisper/consts.go | 6 +- bindings/go/pkg/whisper/context.go | 40 +++++---- bindings/go/pkg/whisper/context_test.go | 6 +- bindings/go/pkg/whisper/interface.go | 85 +------------------ bindings/go/pkg/whisper/model.go | 25 ++---- bindings/go/pkg/whisper/whisper_ctx.go | 16 +--- bindings/go/pkg/whisper/whisper_ctx_test.go | 18 ++-- bindings/go/pkg/whisper/whisper_state.go | 11 +-- bindings/go/pkg/whisper/whisper_state_test.go | 14 +-- 9 files changed, 64 insertions(+), 157 deletions(-) diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index c6d5ad388d9..eab223ce582 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -19,8 +19,12 @@ var ( ErrProcessingFailed = errors.New("processing failed") ErrUnsupportedLanguage = errors.New("unsupported language") ErrModelNotMultilingual = errors.New("model is not multilingual") - ErrUnableToCreateState = errors.New("unable to create state") ErrModelClosed = errors.Join(errors.New("model has been closed"), ErrInternalAppError) + + // Private errors + errParametersRequired = errors.New("parameters are required") + errModelRequired = errors.New("model is required") + errUnableToCreateState = errors.New("unable to create state") ) /////////////////////////////////////////////////////////////////////////////// diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 6f162877ff8..ad0ce5c46ad 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -14,11 +14,19 @@ import ( type context struct { n int model *model - st WhisperState + st *whisperState *Parameters } func NewContext(model *model, params *Parameters) (*context, error) { + if model == nil { + return nil, errModelRequired + } + + if params == nil { + return nil, errParametersRequired + } + c := new(context) c.model = model @@ -32,7 +40,7 @@ func NewContext(model *model, params *Parameters) (*context, error) { st := ctx.Whisper_init_state() if st == nil { - return nil, ErrUnableToCreateState + return nil, errUnableToCreateState } c.st = newWhisperState(st) @@ -48,7 +56,7 @@ func (context *context) DetectedLanguage() string { return "" } - st, err := context.st.UnsafeState() + st, err := context.st.unsafeState() if err != nil { return "" } @@ -62,7 +70,7 @@ func (context *context) DetectedLanguage() string { // Close frees the whisper state and marks the context as closed. func (context *context) Close() error { - return context.st.Close() + return context.st.close() } // Params returns a high-level parameters wrapper @@ -100,7 +108,7 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f return nil, err } - st, err := context.st.UnsafeState() + st, err := context.st.unsafeState() if err != nil { return nil, err } @@ -135,7 +143,7 @@ func (context *context) Process( return err } - st, err := context.st.UnsafeState() + st, err := context.st.unsafeState() if err != nil { return err } @@ -168,7 +176,7 @@ func (context *context) NextSegment() (Segment, error) { return Segment{}, err } - st, err := context.st.UnsafeState() + st, err := context.st.unsafeState() if err != nil { return Segment{}, err } @@ -190,48 +198,48 @@ func (context *context) IsMultilingual() bool { // Token helpers // Deprecated: Use Model.IsText() instead - token checking is model-specific. func (context *context) IsText(t Token) bool { - result, _ := context.model.TokenIdentifier().IsText(t) + result, _ := context.model.tokenIdentifier().IsText(t) return result } // Deprecated: Use Model.IsBEG() instead - token checking is model-specific. func (context *context) IsBEG(t Token) bool { - result, _ := context.model.TokenIdentifier().IsBEG(t) + result, _ := context.model.tokenIdentifier().IsBEG(t) return result } // Deprecated: Use Model.IsSOT() instead - token checking is model-specific. func (context *context) IsSOT(t Token) bool { - result, _ := context.model.TokenIdentifier().IsSOT(t) + result, _ := context.model.tokenIdentifier().IsSOT(t) return result } // Deprecated: Use Model.IsEOT() instead - token checking is model-specific. func (context *context) IsEOT(t Token) bool { - result, _ := context.model.TokenIdentifier().IsEOT(t) + result, _ := context.model.tokenIdentifier().IsEOT(t) return result } // Deprecated: Use Model.IsPREV() instead - token checking is model-specific. func (context *context) IsPREV(t Token) bool { - result, _ := context.model.TokenIdentifier().IsPREV(t) + result, _ := context.model.tokenIdentifier().IsPREV(t) return result } // Deprecated: Use Model.IsSOLM() instead - token checking is model-specific. func (context *context) IsSOLM(t Token) bool { - result, _ := context.model.TokenIdentifier().IsSOLM(t) + result, _ := context.model.tokenIdentifier().IsSOLM(t) return result } // Deprecated: Use Model.IsNOT() instead - token checking is model-specific. func (context *context) IsNOT(t Token) bool { - result, _ := context.model.TokenIdentifier().IsNOT(t) + result, _ := context.model.tokenIdentifier().IsNOT(t) return result } func (context *context) SetLanguage(lang string) error { - if context.model.whisperContext().IsClosed() { + if context.model.whisperContext().isClosed() { // TODO: remove this logic after deprecating the ErrInternalAppError return ErrModelClosed } @@ -245,7 +253,7 @@ func (context *context) SetLanguage(lang string) error { // Deprecated: Use Model.IsLANG() instead - token checking is model-specific. func (context *context) IsLANG(t Token, lang string) bool { - result, _ := context.model.TokenIdentifier().IsLANG(t, lang) + result, _ := context.model.tokenIdentifier().IsLANG(t, lang) return result } diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index 70f941b2983..02f918b38b3 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -329,7 +329,11 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.NoError(err) defer func() { _ = model.Close() }() - ctx, err := whisper.NewContext(model, nil) + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, nil) + assert.NoError(err) + assert.NotNil(params) + + ctx, err := whisper.NewContext(model, params) assert.NoError(err) defer func() { _ = ctx.Close() }() diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 9a0cecfb810..a4d7db8b086 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -20,32 +20,6 @@ type ProgressCallback func(int) // continue processing. It is called during the Process function type EncoderBeginCallback func() bool -type TokenIdentifier interface { - // Test for "begin" token - IsBEG(Token) (bool, error) - - // Test for "start of transcription" token - IsSOT(Token) (bool, error) - - // Test for "end of transcription" token - IsEOT(Token) (bool, error) - - // Test for "start of prev" token - IsPREV(Token) (bool, error) - - // Test for "start of lm" token - IsSOLM(Token) (bool, error) - - // Test for "no timestamps" token - IsNOT(Token) (bool, error) - - // Test for token associated with a specific language - IsLANG(Token, string) (bool, error) - - // Test for text token - IsText(Token) (bool, error) -} - type ParamsConfigure func(*Parameters) // Model is the interface to a whisper model. Create a new model with the @@ -74,65 +48,8 @@ type Model interface { ResetTimings() } -// // Parameters configures decode / processing behavior -// type Parameters interface { -// SetTranslate(bool) -// SetSplitOnWord(bool) -// SetThreads(uint) -// SetOffset(time.Duration) -// SetDuration(time.Duration) -// SetTokenThreshold(float32) -// SetTokenSumThreshold(float32) -// SetMaxSegmentLength(uint) -// SetTokenTimestamps(bool) -// SetMaxTokensPerSegment(uint) -// SetAudioCtx(uint) -// SetMaxContext(n int) -// SetBeamSize(n int) -// SetEntropyThold(t float32) -// SetInitialPrompt(prompt string) - -// SetNoContext(bool) -// SetPrintSpecial(bool) -// SetPrintProgress(bool) -// SetPrintRealtime(bool) -// SetPrintTimestamps(bool) - -// // Enable extra debug info (e.g., dump log_mel) -// SetDebugMode(bool) -// // Diarization (tinydiarize) -// SetDiarize(bool) - -// // Voice Activity Detection (VAD) -// SetVAD(bool) -// SetVADModelPath(string) -// SetVADThreshold(float32) -// SetVADMinSpeechMs(int) -// SetVADMinSilenceMs(int) -// SetVADMaxSpeechSec(float32) -// SetVADSpeechPadMs(int) -// SetVADSamplesOverlap(float32) - -// // Set the temperature -// SetTemperature(t float32) - -// // Set the fallback temperature incrementation -// // Pass -1.0 to disable this feature -// SetTemperatureFallback(t float32) - -// // Set the language -// // If the model is not multilingual, this will return an error -// SetLanguage(string) error - -// // Set single segment mode -// SetSingleSegment(bool) - -// // Getter methods -// Language() string -// Threads() int -// } - // Context is the speech recognition context. +// Deprecated: Use NewContext implementation struct instead of relying on this interface type Context interface { io.Closer diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index d9aabdaf7a0..7ce31432462 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -9,9 +9,9 @@ import ( ) type model struct { - path string - ctx *whisperCtx - tokenIdentifier *tokenIdentifier + path string + ctx *whisperCtx + tokId *tokenIdentifier } // Make sure model adheres to the interface @@ -33,7 +33,7 @@ func NewModel( return nil, ErrUnableToLoadModel } else { model.ctx = newWhisperCtx(ctx) - model.tokenIdentifier = newTokenIdentifier(model.ctx) + model.tokId = newTokenIdentifier(model.ctx) model.path = path } @@ -42,20 +42,13 @@ func NewModel( } func (model *model) Close() error { - return model.ctx.Close() -} - -func (model *model) WhisperContext() WhisperContext { - return model.ctx + return model.ctx.close() } func (model *model) whisperContext() *whisperCtx { return model.ctx } -/////////////////////////////////////////////////////////////////////////////// -// STRINGIFY - func (model *model) String() string { str := "" } -/////////////////////////////////////////////////////////////////////////////// -// PUBLIC METHODS - // Return true if model is multilingual (language and translation options are supported) func (model *model) IsMultilingual() bool { ctx, err := model.ctx.unsafeContext() @@ -132,7 +122,6 @@ func (model *model) ResetTimings() { ctx.Whisper_reset_timings() } -// WhisperContext returns the low-level whisper context, or error if the model is closed. -func (model *model) TokenIdentifier() TokenIdentifier { - return model.tokenIdentifier +func (model *model) tokenIdentifier() *tokenIdentifier { + return model.tokId } diff --git a/bindings/go/pkg/whisper/whisper_ctx.go b/bindings/go/pkg/whisper/whisper_ctx.go index bdc166cf8b0..d84e8460579 100644 --- a/bindings/go/pkg/whisper/whisper_ctx.go +++ b/bindings/go/pkg/whisper/whisper_ctx.go @@ -2,14 +2,6 @@ package whisper import whisper "github.com/ggerganov/whisper.cpp/bindings/go" -type WhisperContext interface { - // Close closes the whisper context - Close() error - - // IsClosed returns true if the whisper context is closed - IsClosed() bool -} - type whisperCtx struct { ctx *whisper.Context } @@ -20,7 +12,7 @@ func newWhisperCtx(ctx *whisper.Context) *whisperCtx { } } -func (ctx *whisperCtx) Close() error { +func (ctx *whisperCtx) close() error { if ctx.ctx == nil { return nil } @@ -31,16 +23,14 @@ func (ctx *whisperCtx) Close() error { return nil } -func (ctx *whisperCtx) IsClosed() bool { +func (ctx *whisperCtx) isClosed() bool { return ctx.ctx == nil } func (ctx *whisperCtx) unsafeContext() (*whisper.Context, error) { - if ctx.IsClosed() { + if ctx.isClosed() { return nil, ErrModelClosed } return ctx.ctx, nil } - -var _ WhisperContext = (*whisperCtx)(nil) diff --git a/bindings/go/pkg/whisper/whisper_ctx_test.go b/bindings/go/pkg/whisper/whisper_ctx_test.go index 68a094bd26b..0a3679db29b 100644 --- a/bindings/go/pkg/whisper/whisper_ctx_test.go +++ b/bindings/go/pkg/whisper/whisper_ctx_test.go @@ -14,15 +14,15 @@ const testModelPathCtx = "../../models/ggml-small.en.bin" func TestWhisperCtx_NilWrapper(t *testing.T) { wctx := newWhisperCtx(nil) - assert.True(t, wctx.IsClosed()) + assert.True(t, wctx.isClosed()) raw, err := wctx.unsafeContext() assert.Nil(t, raw) require.ErrorIs(t, err, ErrModelClosed) - require.NoError(t, wctx.Close()) + require.NoError(t, wctx.close()) // idempotent - require.NoError(t, wctx.Close()) + require.NoError(t, wctx.close()) } func TestWhisperCtx_Lifecycle(t *testing.T) { @@ -34,22 +34,22 @@ func TestWhisperCtx_Lifecycle(t *testing.T) { require.NotNil(t, raw) wctx := newWhisperCtx(raw) - assert.False(t, wctx.IsClosed()) + assert.False(t, wctx.isClosed()) got, err := wctx.unsafeContext() require.NoError(t, err) require.NotNil(t, got) // close frees underlying ctx and marks closed - require.NoError(t, wctx.Close()) - assert.True(t, wctx.IsClosed()) + require.NoError(t, wctx.close()) + assert.True(t, wctx.isClosed()) got, err = wctx.unsafeContext() assert.Nil(t, got) require.ErrorIs(t, err, ErrModelClosed) // idempotent - require.NoError(t, wctx.Close()) + require.NoError(t, wctx.close()) // no further free; raw already freed by wctx.Close() } @@ -75,11 +75,11 @@ func TestWhisperCtx_FromModelLifecycle(t *testing.T) { // Close model should close underlying context require.NoError(t, model.Close()) - assert.True(t, wc.IsClosed()) + assert.True(t, wc.isClosed()) raw, err = wc.unsafeContext() assert.Nil(t, raw) require.ErrorIs(t, err, ErrModelClosed) // Idempotent close on wrapper - require.NoError(t, wc.Close()) + require.NoError(t, wc.close()) } diff --git a/bindings/go/pkg/whisper/whisper_state.go b/bindings/go/pkg/whisper/whisper_state.go index 24ca4ed44d7..cee48948731 100644 --- a/bindings/go/pkg/whisper/whisper_state.go +++ b/bindings/go/pkg/whisper/whisper_state.go @@ -2,22 +2,17 @@ package whisper import whisper "github.com/ggerganov/whisper.cpp/bindings/go" -type WhisperState interface { - Close() error - UnsafeState() (*whisper.State, error) -} - type whisperState struct { state *whisper.State } -func newWhisperState(state *whisper.State) WhisperState { +func newWhisperState(state *whisper.State) *whisperState { return &whisperState{ state: state, } } -func (s *whisperState) Close() error { +func (s *whisperState) close() error { if s.state == nil { return nil } @@ -28,7 +23,7 @@ func (s *whisperState) Close() error { return nil } -func (s *whisperState) UnsafeState() (*whisper.State, error) { +func (s *whisperState) unsafeState() (*whisper.State, error) { if s.state == nil { return nil, ErrModelClosed } diff --git a/bindings/go/pkg/whisper/whisper_state_test.go b/bindings/go/pkg/whisper/whisper_state_test.go index 37d828acb37..2c4c6dd305e 100644 --- a/bindings/go/pkg/whisper/whisper_state_test.go +++ b/bindings/go/pkg/whisper/whisper_state_test.go @@ -14,13 +14,13 @@ const testModelPathState = "../../models/ggml-small.en.bin" func TestWhisperState_NilWrapper(t *testing.T) { ws := newWhisperState(nil) - state, err := ws.UnsafeState() + state, err := ws.unsafeState() assert.Nil(t, state) require.ErrorIs(t, err, ErrModelClosed) - require.NoError(t, ws.Close()) + require.NoError(t, ws.close()) // idempotent - require.NoError(t, ws.Close()) + require.NoError(t, ws.close()) } func TestWhisperState_Lifecycle(t *testing.T) { @@ -37,17 +37,17 @@ func TestWhisperState_Lifecycle(t *testing.T) { ws := newWhisperState(state) - got, err := ws.UnsafeState() + got, err := ws.unsafeState() require.NoError(t, err) require.NotNil(t, got) // close frees underlying state and marks closed - require.NoError(t, ws.Close()) + require.NoError(t, ws.close()) - got, err = ws.UnsafeState() + got, err = ws.unsafeState() assert.Nil(t, got) require.ErrorIs(t, err, ErrModelClosed) // idempotent - require.NoError(t, ws.Close()) + require.NoError(t, ws.close()) } From 99722972d6b2a4ab5ef13e3ea24df1e8505fb22c Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:47:51 +0300 Subject: [PATCH 09/19] refactor(go bindings): remove public Parameters from context --- bindings/go/pkg/whisper/context.go | 127 ++++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 11 deletions(-) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index ad0ce5c46ad..8a3a5ec101a 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -12,10 +12,10 @@ import ( ) type context struct { - n int - model *model - st *whisperState - *Parameters + n int + model *model + st *whisperState + params *Parameters } func NewContext(model *model, params *Parameters) (*context, error) { @@ -29,8 +29,7 @@ func NewContext(model *model, params *Parameters) (*context, error) { c := new(context) c.model = model - - c.Parameters = params + c.params = params // allocate isolated state per context ctx, err := model.whisperContext().unsafeContext() @@ -75,7 +74,7 @@ func (context *context) Close() error { // Params returns a high-level parameters wrapper func (context *context) Params() *Parameters { - return context.Parameters + return context.params } // ResetTimings resets the model performance timing counters. @@ -93,7 +92,7 @@ func (context *context) PrintTimings() { // SystemInfo returns the system information func (context *context) SystemInfo() string { return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n", - context.Parameters.Threads(), + context.params.Threads(), runtime.NumCPU(), whisper.Whisper_print_system_info(), ) @@ -135,10 +134,10 @@ func (context *context) Process( // If the callback is defined then we force on single_segment mode if callNewSegment != nil { - context.Parameters.SetSingleSegment(true) + context.params.SetSingleSegment(true) } - lowLevelParams, err := context.Parameters.unsafeParams() + lowLevelParams, err := context.params.unsafeParams() if err != nil { return err } @@ -248,7 +247,7 @@ func (context *context) SetLanguage(lang string) error { return ErrModelNotMultilingual } - return context.Parameters.SetLanguage(lang) + return context.params.SetLanguage(lang) } // Deprecated: Use Model.IsLANG() instead - token checking is model-specific. @@ -290,4 +289,110 @@ func (context *context) Model() Model { return context.model } +// Deprecated: Use Params().Language() instead +func (context *context) Language() string { + return context.params.Language() +} + +// Deprecated: Use Params().SetAudioCtx() instead +func (context *context) SetAudioCtx(n uint) { + context.params.SetAudioCtx(n) +} + +// SetBeamSize implements Context. +// Deprecated: Use Params().SetBeamSize() instead +func (context *context) SetBeamSize(v int) { + context.params.SetBeamSize(v) +} + +// SetDuration implements Context. +// Deprecated: Use Params().SetDuration() instead +func (context *context) SetDuration(v time.Duration) { + context.params.SetDuration(v) +} + +// SetEntropyThold implements Context. +// Deprecated: Use Params().SetEntropyThold() instead +func (context *context) SetEntropyThold(v float32) { + context.params.SetEntropyThold(v) +} + +// SetInitialPrompt implements Context. +// Deprecated: Use Params().SetInitialPrompt() instead +func (context *context) SetInitialPrompt(v string) { + context.params.SetInitialPrompt(v) +} + +// SetMaxContext implements Context. +// Deprecated: Use Params().SetMaxContext() instead +func (context *context) SetMaxContext(v int) { + context.params.SetMaxContext(v) +} + +// SetMaxSegmentLength implements Context. +// Deprecated: Use Params().SetMaxSegmentLength() instead +func (context *context) SetMaxSegmentLength(v uint) { + context.params.SetMaxSegmentLength(v) +} + +// SetMaxTokensPerSegment implements Context. +// Deprecated: Use Params().SetMaxTokensPerSegment() instead +func (context *context) SetMaxTokensPerSegment(v uint) { + context.params.SetMaxTokensPerSegment(v) +} + +// SetOffset implements Context. +// Deprecated: Use Params().SetOffset() instead +func (context *context) SetOffset(v time.Duration) { + context.params.SetOffset(v) +} + +// SetSplitOnWord implements Context. +// Deprecated: Use Params().SetSplitOnWord() instead +func (context *context) SetSplitOnWord(v bool) { + context.params.SetSplitOnWord(v) +} + +// SetTemperature implements Context. +// Deprecated: Use Params().SetTemperature() instead +func (context *context) SetTemperature(v float32) { + context.params.SetTemperature(v) +} + +// SetTemperatureFallback implements Context. +// Deprecated: Use Params().SetTemperatureFallback() instead +func (context *context) SetTemperatureFallback(v float32) { + context.params.SetTemperatureFallback(v) +} + +// SetThreads implements Context. +// Deprecated: Use Params().SetThreads() instead +func (context *context) SetThreads(v uint) { + context.params.SetThreads(v) +} + +// SetTokenSumThreshold implements Context. +// Deprecated: Use Params().SetTokenSumThreshold() instead +func (context *context) SetTokenSumThreshold(v float32) { + context.params.SetTokenSumThreshold(v) +} + +// SetTokenThreshold implements Context. +// Deprecated: Use Params().SetTokenThreshold() instead +func (context *context) SetTokenThreshold(v float32) { + context.params.SetTokenThreshold(v) +} + +// SetTokenTimestamps implements Context. +// Deprecated: Use Params().SetTokenTimestamps() instead +func (context *context) SetTokenTimestamps(v bool) { + context.params.SetTokenTimestamps(v) +} + +// SetTranslate implements Context. +// Deprecated: Use Params().SetTranslate() instead +func (context *context) SetTranslate(v bool) { + context.params.SetTranslate(v) +} + var _ Context = (*context)(nil) From f943c8f1d4be6c01a62dbccc71390518704398fd Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:53:13 +0300 Subject: [PATCH 10/19] refactor(go bindings): remove public Model() method --- bindings/go/pkg/whisper/context.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 8a3a5ec101a..866ccff793d 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -285,10 +285,6 @@ func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { return result } -func (context *context) Model() Model { - return context.model -} - // Deprecated: Use Params().Language() instead func (context *context) Language() string { return context.params.Language() From 125ea6122be7da8c26a6a09c067a1d14eab782d1 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 02:59:42 +0300 Subject: [PATCH 11/19] refactor(go bindings): remove new methods from the Model interface --- bindings/go/pkg/whisper/interface.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index a4d7db8b086..4bd0262be06 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -39,13 +39,6 @@ type Model interface { // Return all languages supported. Languages() []string - - // Model performance timing methods - // Print model performance timings to stdout - PrintTimings() - - // Reset model performance timing counters - ResetTimings() } // Context is the speech recognition context. From 8f9ad60fcaaca63d37aa6f052884c6fb3fab1ab0 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sun, 14 Sep 2025 04:39:26 +0300 Subject: [PATCH 12/19] refactor(go bindings): make ModelContext and StatefulContext public --- bindings/go/pkg/whisper/context.go | 97 +++++++++++---------- bindings/go/pkg/whisper/context_test.go | 10 +-- bindings/go/pkg/whisper/model.go | 56 ++++++------ bindings/go/pkg/whisper/params_wrap.go | 4 +- bindings/go/pkg/whisper/token_identifier.go | 20 ++--- bindings/go/pkg/whisper/whisper_ctx.go | 12 +-- bindings/go/pkg/whisper/whisper_ctx_test.go | 18 ++-- 7 files changed, 109 insertions(+), 108 deletions(-) diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 866ccff793d..4b178f9646b 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -11,14 +11,15 @@ import ( whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) -type context struct { +type StatefulContext struct { n int - model *model + model *ModelContext st *whisperState params *Parameters } -func NewContext(model *model, params *Parameters) (*context, error) { +// NewStatefulContext creates a new stateful context +func NewStatefulContext(model *ModelContext, params *Parameters) (*StatefulContext, error) { if model == nil { return nil, errModelRequired } @@ -27,12 +28,12 @@ func NewContext(model *model, params *Parameters) (*context, error) { return nil, errParametersRequired } - c := new(context) + c := new(StatefulContext) c.model = model c.params = params // allocate isolated state per context - ctx, err := model.whisperContext().unsafeContext() + ctx, err := model.ctxAccessor().context() if err != nil { return nil, err } @@ -49,8 +50,8 @@ func NewContext(model *model, params *Parameters) (*context, error) { } // DetectedLanguage returns the detected language for the current context data -func (context *context) DetectedLanguage() string { - ctx, err := context.model.whisperContext().unsafeContext() +func (context *StatefulContext) DetectedLanguage() string { + ctx, err := context.model.ctxAccessor().context() if err != nil { return "" } @@ -68,29 +69,29 @@ func (context *context) DetectedLanguage() string { } // Close frees the whisper state and marks the context as closed. -func (context *context) Close() error { +func (context *StatefulContext) Close() error { return context.st.close() } // Params returns a high-level parameters wrapper -func (context *context) Params() *Parameters { +func (context *StatefulContext) Params() *Parameters { return context.params } // ResetTimings resets the model performance timing counters. // Deprecated: Use Model.ResetTimings() instead - these are model-level performance metrics. -func (context *context) ResetTimings() { +func (context *StatefulContext) ResetTimings() { context.model.ResetTimings() } // PrintTimings prints the model performance timings to stdout. // Deprecated: Use Model.PrintTimings() instead - these are model-level performance metrics. -func (context *context) PrintTimings() { +func (context *StatefulContext) PrintTimings() { context.model.PrintTimings() } // SystemInfo returns the system information -func (context *context) SystemInfo() string { +func (context *StatefulContext) SystemInfo() string { return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n", context.params.Threads(), runtime.NumCPU(), @@ -101,8 +102,8 @@ func (context *context) SystemInfo() string { // Use mel data at offset_ms to try and auto-detect the spoken language // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. // Returns the probabilities of all languages for this context's state. -func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) { - ctx, err := context.model.whisperContext().unsafeContext() +func (context *StatefulContext) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) { + ctx, err := context.model.ctxAccessor().context() if err != nil { return nil, err } @@ -121,13 +122,13 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f } // Process new sample data and return any errors -func (context *context) Process( +func (context *StatefulContext) Process( data []float32, callEncoderBegin EncoderBeginCallback, callNewSegment SegmentCallback, callProgress ProgressCallback, ) error { - ctx, err := context.model.whisperContext().unsafeContext() + ctx, err := context.model.ctxAccessor().context() if err != nil { return err } @@ -169,8 +170,8 @@ func (context *context) Process( } // NextSegment returns the next segment from the context buffer -func (context *context) NextSegment() (Segment, error) { - ctx, err := context.model.whisperContext().unsafeContext() +func (context *StatefulContext) NextSegment() (Segment, error) { + ctx, err := context.model.ctxAccessor().context() if err != nil { return Segment{}, err } @@ -190,55 +191,55 @@ func (context *context) NextSegment() (Segment, error) { return result, nil } -func (context *context) IsMultilingual() bool { +func (context *StatefulContext) IsMultilingual() bool { return context.model.IsMultilingual() } // Token helpers // Deprecated: Use Model.IsText() instead - token checking is model-specific. -func (context *context) IsText(t Token) bool { +func (context *StatefulContext) IsText(t Token) bool { result, _ := context.model.tokenIdentifier().IsText(t) return result } // Deprecated: Use Model.IsBEG() instead - token checking is model-specific. -func (context *context) IsBEG(t Token) bool { +func (context *StatefulContext) IsBEG(t Token) bool { result, _ := context.model.tokenIdentifier().IsBEG(t) return result } // Deprecated: Use Model.IsSOT() instead - token checking is model-specific. -func (context *context) IsSOT(t Token) bool { +func (context *StatefulContext) IsSOT(t Token) bool { result, _ := context.model.tokenIdentifier().IsSOT(t) return result } // Deprecated: Use Model.IsEOT() instead - token checking is model-specific. -func (context *context) IsEOT(t Token) bool { +func (context *StatefulContext) IsEOT(t Token) bool { result, _ := context.model.tokenIdentifier().IsEOT(t) return result } // Deprecated: Use Model.IsPREV() instead - token checking is model-specific. -func (context *context) IsPREV(t Token) bool { +func (context *StatefulContext) IsPREV(t Token) bool { result, _ := context.model.tokenIdentifier().IsPREV(t) return result } // Deprecated: Use Model.IsSOLM() instead - token checking is model-specific. -func (context *context) IsSOLM(t Token) bool { +func (context *StatefulContext) IsSOLM(t Token) bool { result, _ := context.model.tokenIdentifier().IsSOLM(t) return result } // Deprecated: Use Model.IsNOT() instead - token checking is model-specific. -func (context *context) IsNOT(t Token) bool { +func (context *StatefulContext) IsNOT(t Token) bool { result, _ := context.model.tokenIdentifier().IsNOT(t) return result } -func (context *context) SetLanguage(lang string) error { - if context.model.whisperContext().isClosed() { +func (context *StatefulContext) SetLanguage(lang string) error { + if context.model.ctxAccessor().isClosed() { // TODO: remove this logic after deprecating the ErrInternalAppError return ErrModelClosed } @@ -251,7 +252,7 @@ func (context *context) SetLanguage(lang string) error { } // Deprecated: Use Model.IsLANG() instead - token checking is model-specific. -func (context *context) IsLANG(t Token, lang string) bool { +func (context *StatefulContext) IsLANG(t Token, lang string) bool { result, _ := context.model.tokenIdentifier().IsLANG(t, lang) return result } @@ -286,109 +287,109 @@ func toTokensFromState(ctx *whisper.Context, st *whisper.State, n int) []Token { } // Deprecated: Use Params().Language() instead -func (context *context) Language() string { +func (context *StatefulContext) Language() string { return context.params.Language() } // Deprecated: Use Params().SetAudioCtx() instead -func (context *context) SetAudioCtx(n uint) { +func (context *StatefulContext) SetAudioCtx(n uint) { context.params.SetAudioCtx(n) } // SetBeamSize implements Context. // Deprecated: Use Params().SetBeamSize() instead -func (context *context) SetBeamSize(v int) { +func (context *StatefulContext) SetBeamSize(v int) { context.params.SetBeamSize(v) } // SetDuration implements Context. // Deprecated: Use Params().SetDuration() instead -func (context *context) SetDuration(v time.Duration) { +func (context *StatefulContext) SetDuration(v time.Duration) { context.params.SetDuration(v) } // SetEntropyThold implements Context. // Deprecated: Use Params().SetEntropyThold() instead -func (context *context) SetEntropyThold(v float32) { +func (context *StatefulContext) SetEntropyThold(v float32) { context.params.SetEntropyThold(v) } // SetInitialPrompt implements Context. // Deprecated: Use Params().SetInitialPrompt() instead -func (context *context) SetInitialPrompt(v string) { +func (context *StatefulContext) SetInitialPrompt(v string) { context.params.SetInitialPrompt(v) } // SetMaxContext implements Context. // Deprecated: Use Params().SetMaxContext() instead -func (context *context) SetMaxContext(v int) { +func (context *StatefulContext) SetMaxContext(v int) { context.params.SetMaxContext(v) } // SetMaxSegmentLength implements Context. // Deprecated: Use Params().SetMaxSegmentLength() instead -func (context *context) SetMaxSegmentLength(v uint) { +func (context *StatefulContext) SetMaxSegmentLength(v uint) { context.params.SetMaxSegmentLength(v) } // SetMaxTokensPerSegment implements Context. // Deprecated: Use Params().SetMaxTokensPerSegment() instead -func (context *context) SetMaxTokensPerSegment(v uint) { +func (context *StatefulContext) SetMaxTokensPerSegment(v uint) { context.params.SetMaxTokensPerSegment(v) } // SetOffset implements Context. // Deprecated: Use Params().SetOffset() instead -func (context *context) SetOffset(v time.Duration) { +func (context *StatefulContext) SetOffset(v time.Duration) { context.params.SetOffset(v) } // SetSplitOnWord implements Context. // Deprecated: Use Params().SetSplitOnWord() instead -func (context *context) SetSplitOnWord(v bool) { +func (context *StatefulContext) SetSplitOnWord(v bool) { context.params.SetSplitOnWord(v) } // SetTemperature implements Context. // Deprecated: Use Params().SetTemperature() instead -func (context *context) SetTemperature(v float32) { +func (context *StatefulContext) SetTemperature(v float32) { context.params.SetTemperature(v) } // SetTemperatureFallback implements Context. // Deprecated: Use Params().SetTemperatureFallback() instead -func (context *context) SetTemperatureFallback(v float32) { +func (context *StatefulContext) SetTemperatureFallback(v float32) { context.params.SetTemperatureFallback(v) } // SetThreads implements Context. // Deprecated: Use Params().SetThreads() instead -func (context *context) SetThreads(v uint) { +func (context *StatefulContext) SetThreads(v uint) { context.params.SetThreads(v) } // SetTokenSumThreshold implements Context. // Deprecated: Use Params().SetTokenSumThreshold() instead -func (context *context) SetTokenSumThreshold(v float32) { +func (context *StatefulContext) SetTokenSumThreshold(v float32) { context.params.SetTokenSumThreshold(v) } // SetTokenThreshold implements Context. // Deprecated: Use Params().SetTokenThreshold() instead -func (context *context) SetTokenThreshold(v float32) { +func (context *StatefulContext) SetTokenThreshold(v float32) { context.params.SetTokenThreshold(v) } // SetTokenTimestamps implements Context. // Deprecated: Use Params().SetTokenTimestamps() instead -func (context *context) SetTokenTimestamps(v bool) { +func (context *StatefulContext) SetTokenTimestamps(v bool) { context.params.SetTokenTimestamps(v) } // SetTranslate implements Context. // Deprecated: Use Params().SetTranslate() instead -func (context *context) SetTranslate(v bool) { +func (context *StatefulContext) SetTranslate(v bool) { context.params.SetTranslate(v) } -var _ Context = (*context)(nil) +var _ Context = (*StatefulContext)(nil) diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index 02f918b38b3..a56d355ae2a 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -325,7 +325,7 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.Equal(uint16(1), dec.NumChans) data := buf.AsFloat32Buffer().Data - model, err := whisper.NewModel(ModelPath) + model, err := whisper.NewModelContext(ModelPath) assert.NoError(err) defer func() { _ = model.Close() }() @@ -333,7 +333,7 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.NoError(err) assert.NotNil(params) - ctx, err := whisper.NewContext(model, params) + ctx, err := whisper.NewStatefulContext(model, params) assert.NoError(err) defer func() { _ = ctx.Close() }() @@ -362,7 +362,7 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { require.NoError(t, err) data := buf.AsFloat32Buffer().Data - model, err := whisper.NewModel(ModelTinydiarizePath) + model, err := whisper.NewModelContext(ModelTinydiarizePath) require.NoError(t, err) defer func() { _ = model.Close() }() @@ -377,7 +377,7 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { require.NoError(t, err) // diarize ON with beam search and tighter segmentation - ctxOn, err := whisper.NewContext(model, params) + ctxOn, err := whisper.NewStatefulContext(model, params) require.NoError(t, err) defer func() { _ = ctxOn.Close() }() @@ -396,7 +396,7 @@ func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled") // diarize OFF baseline with same segmentation and beam - ctxOff, err := whisper.NewContext(model, params) + ctxOff, err := whisper.NewStatefulContext(model, params) require.NoError(t, err) defer func() { _ = ctxOff.Close() }() diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index 7ce31432462..a16d6a13476 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -8,32 +8,32 @@ import ( whisper "github.com/ggerganov/whisper.cpp/bindings/go" ) -type model struct { +type ModelContext struct { path string - ctx *whisperCtx + ca *ctxAccessor tokId *tokenIdentifier } // Make sure model adheres to the interface -var _ Model = (*model)(nil) +var _ Model = (*ModelContext)(nil) -// Deprecated: Use NewModel instead +// Deprecated: Use NewModelContext instead func New(path string) (Model, error) { - return NewModel(path) + return NewModelContext(path) } -// NewModel creates a new model without initializing the context -func NewModel( +// NewModelContext creates a new model context +func NewModelContext( path string, -) (*model, error) { - model := new(model) +) (*ModelContext, error) { + model := new(ModelContext) if _, err := os.Stat(path); err != nil { return nil, err } else if ctx := whisper.Whisper_init(path); ctx == nil { return nil, ErrUnableToLoadModel } else { - model.ctx = newWhisperCtx(ctx) - model.tokId = newTokenIdentifier(model.ctx) + model.ca = newCtxAccessor(ctx) + model.tokId = newTokenIdentifier(model.ca) model.path = path } @@ -41,17 +41,17 @@ func NewModel( return model, nil } -func (model *model) Close() error { - return model.ctx.close() +func (model *ModelContext) Close() error { + return model.ca.close() } -func (model *model) whisperContext() *whisperCtx { - return model.ctx +func (model *ModelContext) ctxAccessor() *ctxAccessor { + return model.ca } -func (model *model) String() string { +func (model *ModelContext) String() string { str := " Date: Sat, 20 Sep 2025 15:56:48 +0300 Subject: [PATCH 13/19] feat: split context into stateful/stateless; add concurrency gate, added model context params, benchmarks, silence mode for the ggml --- bindings/go/.gitignore | 1 + bindings/go/Makefile | 7 + bindings/go/go.mod | 2 +- bindings/go/pkg/whisper/concurrency_gate.go | 58 ++ bindings/go/pkg/whisper/consts.go | 1 + .../go/pkg/whisper/context_benchmark_test.go | 239 +++++++ bindings/go/pkg/whisper/context_test.go | 631 +++++++++++------- bindings/go/pkg/whisper/log.go | 9 + bindings/go/pkg/whisper/model.go | 76 ++- .../go/pkg/whisper/model_context_params.go | 27 + .../{context.go => stateful_context.go} | 2 + .../go/pkg/whisper/stateful_context_test.go | 81 +++ bindings/go/pkg/whisper/stateless_context.go | 377 +++++++++++ .../go/pkg/whisper/stateless_context_test.go | 52 ++ bindings/go/pkg/whisper/test_helpers_test.go | 129 ++++ bindings/go/pkg/whisper/util_test.go | 10 + bindings/go/whisper.go | 81 +++ bindings/go/whisper_test.go | 5 + .../download/golang.org/x/tools/gopls/@v/list | 1 + .../golang.org/x/tools/gopls/@v/v0.20.0.info | 1 + .../golang.org/x/tools/gopls/@v/v0.20.0.mod | 32 + .../lookup/golang.org/x/tools/gopls@v0.20.0 | 9 + .../sumdb/sum.golang.org/tile/8/0/x161/441 | Bin 0 -> 8192 bytes .../sum.golang.org/tile/8/0/x170/113.p/24 | Bin 0 -> 768 bytes .../sumdb/sum.golang.org/tile/8/1/630 | Bin 0 -> 8192 bytes .../sumdb/sum.golang.org/tile/8/1/664.p/129 | Bin 0 -> 4128 bytes .../sumdb/sum.golang.org/tile/8/2/002.p/152 | Bin 0 -> 4864 bytes .../sumdb/sum.golang.org/tile/8/3/000.p/2 | 3 + pkg/sumdb/sum.golang.org/latest | 5 + 29 files changed, 1575 insertions(+), 264 deletions(-) create mode 100644 bindings/go/pkg/whisper/concurrency_gate.go create mode 100644 bindings/go/pkg/whisper/context_benchmark_test.go create mode 100644 bindings/go/pkg/whisper/log.go create mode 100644 bindings/go/pkg/whisper/model_context_params.go rename bindings/go/pkg/whisper/{context.go => stateful_context.go} (98%) create mode 100644 bindings/go/pkg/whisper/stateful_context_test.go create mode 100644 bindings/go/pkg/whisper/stateless_context.go create mode 100644 bindings/go/pkg/whisper/stateless_context_test.go create mode 100644 bindings/go/pkg/whisper/test_helpers_test.go create mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list create mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info create mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 create mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/3/000.p/2 create mode 100644 pkg/sumdb/sum.golang.org/latest diff --git a/bindings/go/.gitignore b/bindings/go/.gitignore index 20a1b6e48e3..bfdd3e3c790 100644 --- a/bindings/go/.gitignore +++ b/bindings/go/.gitignore @@ -1,3 +1,4 @@ build models samples/a13.wav +samples/benchmark_out.wav diff --git a/bindings/go/Makefile b/bindings/go/Makefile index c9ab66255f1..fb57d0fc9f8 100644 --- a/bindings/go/Makefile +++ b/bindings/go/Makefile @@ -46,6 +46,13 @@ endif examples: $(EXAMPLES_DIR) +benchmark: model-small whisper modtidy +ifeq ($(UNAME_S),Darwin) + @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench=BenchmarkContextProcess -benchmem -run '^$$' ./pkg/whisper/... +else + @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -benchmem -run '^$$' ./pkg/whisper/... +endif + model-small: mkdir examples/go-model-download @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin diff --git a/bindings/go/go.mod b/bindings/go/go.mod index 7c92c7b4890..5cfd3268af1 100644 --- a/bindings/go/go.mod +++ b/bindings/go/go.mod @@ -3,13 +3,13 @@ module github.com/ggerganov/whisper.cpp/bindings/go go 1.23 require ( + github.com/go-audio/audio v1.0.0 github.com/go-audio/wav v1.1.0 github.com/stretchr/testify v1.9.0 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/go-audio/audio v1.0.0 // indirect github.com/go-audio/riff v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/bindings/go/pkg/whisper/concurrency_gate.go b/bindings/go/pkg/whisper/concurrency_gate.go new file mode 100644 index 00000000000..03469a29ce7 --- /dev/null +++ b/bindings/go/pkg/whisper/concurrency_gate.go @@ -0,0 +1,58 @@ +package whisper + +import ( + "sync" + "sync/atomic" + + // Bindings + whisper "github.com/ggerganov/whisper.cpp/bindings/go" +) + +// Gate provides a simple acquire/release contract per key. +// The default implementation is a single-entry lock per key (limit=1). +type Gate interface { + // Acquire returns true if the key was acquired; false if already held + Acquire(key any) bool + // Release releases the key if currently held + Release(key any) +} + +// singleFlightGate is a minimal lock with limit=1 per key +type singleFlightGate struct { + m sync.Map // key -> *int32 (0 available, 1 held) +} + +func (g *singleFlightGate) Acquire(key any) bool { + ptr, _ := g.m.LoadOrStore(key, new(int32)) + busy := ptr.(*int32) + return atomic.CompareAndSwapInt32(busy, 0, 1) +} + +func (g *singleFlightGate) Release(key any) { + if v, ok := g.m.Load(key); ok { + atomic.StoreInt32(v.(*int32), 0) + } +} + +var defaultGate Gate = &singleFlightGate{} + +// SetGate allows applications to override the default gate (e.g., for custom policies) +// Passing nil resets to the default singleFlightGate. +func SetGate(g Gate) { + if g == nil { + defaultGate = &singleFlightGate{} + return + } + defaultGate = g +} + +func gate() Gate { return defaultGate } + +// modelKey derives a stable key per underlying model context for guarding stateless ops +func modelKey(model *ModelContext) *whisper.Context { + if model == nil || model.ctxAccessor() == nil { + return nil + } + ctx, _ := model.ctxAccessor().context() + return ctx +} diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index eab223ce582..fbdd9c310a5 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -20,6 +20,7 @@ var ( ErrUnsupportedLanguage = errors.New("unsupported language") ErrModelNotMultilingual = errors.New("model is not multilingual") ErrModelClosed = errors.Join(errors.New("model has been closed"), ErrInternalAppError) + ErrStatelessBusy = errors.New("stateless context is busy; concurrent processing not supported") // Private errors errParametersRequired = errors.New("parameters are required") diff --git a/bindings/go/pkg/whisper/context_benchmark_test.go b/bindings/go/pkg/whisper/context_benchmark_test.go new file mode 100644 index 00000000000..04cf5c8977f --- /dev/null +++ b/bindings/go/pkg/whisper/context_benchmark_test.go @@ -0,0 +1,239 @@ +package whisper_test + +import ( + "fmt" + "math" + "os" + "runtime" + "testing" + "time" + + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + "github.com/go-audio/audio" + wav "github.com/go-audio/wav" +) + +// benchProcessVariants runs the common benchmark matrix across context kinds, +// thread sets, and callback modes, for given samples. If singleIteration is true +// it runs only one iteration regardless of b.N. If printTimings is true, +// model timings and custom ms_process metric are reported for NoCallback runs. +func benchProcessVariants( + b *testing.B, + samples []float32, + singleIteration bool, + printTimings bool, + useGPU bool, +) { + threadSets := []uint{1, 2, 4, uint(runtime.NumCPU())} + + device := "cpu" + if useGPU { + device = "gpu" + } + + // Initialize model per device mode + mp := whisper.NewModelContextParams() + mp.SetUseGPU(useGPU) + model, err := whisper.NewModelContextWithParams(ModelPath, mp) + if err != nil { + b.Fatalf("load model (%s): %v", device, err) + } + defer func() { _ = model.Close() }() + + // Context kinds: stateless and stateful + ctxKinds := []struct { + name string + new func() (whisper.Context, error) + }{ + { + name: "stateless", + new: func() (whisper.Context, error) { + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) {}) + if err != nil { + return nil, err + } + return whisper.NewStatelessContext(model, params) + }, + }, + { + name: "stateful", + new: func() (whisper.Context, error) { + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, nil) + if err != nil { + return nil, err + } + return whisper.NewStatefulContext(model, params) + }, + }, + } + + for _, kind := range ctxKinds { + b.Run(device+"/"+kind.name, func(b *testing.B) { + for _, threads := range threadSets { + b.Run(fmt.Sprintf("threads=%d/NoCallback", threads), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(samples) * 4)) + ctx, err := kind.new() + if err != nil { + b.Fatalf("new %s context: %v", kind.name, err) + } + defer func() { _ = ctx.Close() }() + ctx.SetThreads(threads) + + iters := b.N + if singleIteration { + iters = 1 + } + + b.ResetTimer() + for i := 0; i < iters; i++ { + if printTimings { + model.ResetTimings() + } + start := time.Now() + if err := ctx.Process(samples, nil, nil, nil); err != nil { + b.Fatalf("process: %v", err) + } + if printTimings { + elapsed := time.Since(start) + model.PrintTimings() + b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") + } + } + }) + + b.Run(fmt.Sprintf("threads=%d/WithSegmentCallback", threads), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(samples) * 4)) + ctx, err := kind.new() + if err != nil { + b.Fatalf("new %s context: %v", kind.name, err) + } + defer func() { _ = ctx.Close() }() + ctx.SetThreads(threads) + + iters := b.N + if singleIteration { + iters = 1 + } + + b.ResetTimer() + for i := 0; i < iters; i++ { + start := time.Now() + // Passing a segment callback forces single-segment mode and exercises token extraction + if err := ctx.Process(samples, nil, func(seg whisper.Segment) {}, nil); err != nil { + b.Fatalf("process with callback: %v", err) + } + if printTimings { + elapsed := time.Since(start) + b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") + } + } + }) + } + }) + } +} + +// BenchmarkContextProcess runs the high-level Context.Process across +// different thread counts, with and without segment callbacks. +func BenchmarkContextProcessCPU(b *testing.B) { + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + b.Skipf("model not found: %s", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + b.Skipf("sample not found: %s", SamplePath) + } + + // Load audio once (reuse helper) + data := helperLoadSample(b, SamplePath) + + benchProcessVariants(b, data, false, true, false) +} + +// BenchmarkContextProcessBig runs one single iteration over a big input +// (the short sample concatenated 10x) to simulate long audio processing. +// This is complementary to BenchmarkContextProcess which runs many iterations +// over the short sample. +func BenchmarkContextProcessBigCPU(b *testing.B) { + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + b.Skipf("model not found: %s", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + b.Skipf("sample not found: %s", SamplePath) + } + + // Load audio once (reuse helper with meta) + data, sampleRate, numChans := helperLoadSampleWithMeta(b, SamplePath) + + // Build big dataset: input concatenated 10x + bigData := make([]float32, len(data)*10) + for i := 0; i < 10; i++ { + copy(bigData[i*len(data):(i+1)*len(data)], data) + } + + // Write the big dataset to a wav file for inspection + outPath := "../../samples/benchmark_out.wav" + fout, err := os.Create(outPath) + if err != nil { + b.Fatalf("create output wav: %v", err) + } + enc := wav.NewEncoder(fout, sampleRate, 16, numChans, 1) + intBuf := &audio.IntBuffer{ + Format: &audio.Format{NumChannels: numChans, SampleRate: sampleRate}, + SourceBitDepth: 16, + Data: make([]int, len(bigData)), + } + for i, s := range bigData { + v := int(math.Round(float64(s) * 32767.0)) + if v > 32767 { + v = 32767 + } else if v < -32768 { + v = -32768 + } + intBuf.Data[i] = v + } + if err := enc.Write(intBuf); err != nil { + _ = fout.Close() + b.Fatalf("encode wav: %v", err) + } + if err := enc.Close(); err != nil { + _ = fout.Close() + b.Fatalf("close encoder: %v", err) + } + _ = fout.Close() + + benchProcessVariants(b, bigData, true, true, false) +} + +// GPU variants reuse model-level GPU enablement via model params +func BenchmarkContextProcessGPU(b *testing.B) { + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + b.Skipf("model not found: %s", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + b.Skipf("sample not found: %s", SamplePath) + } + + data := helperLoadSample(b, SamplePath) + + benchProcessVariants(b, data, false, true, true) +} + +func BenchmarkContextProcessBigGPU(b *testing.B) { + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + b.Skipf("model not found: %s", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + b.Skipf("sample not found: %s", SamplePath) + } + + data, _, _ := helperLoadSampleWithMeta(b, SamplePath) + + bigData := make([]float32, len(data)*10) + for i := 0; i < 10; i++ { + copy(bigData[i*len(data):(i+1)*len(data)], data) + } + + benchProcessVariants(b, bigData, true, true, true) +} diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index a56d355ae2a..f18238ba54b 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -3,11 +3,9 @@ package whisper_test import ( "io" "os" - "sync" "testing" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" - "github.com/go-audio/wav" assert "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -15,120 +13,133 @@ import ( func TestSetLanguage(t *testing.T) { assert := assert.New(t) - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - context, err := model.NewContext() - assert.NoError(err) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() - // This returns an error since - // the model 'models/ggml-small.en.bin' - // that is loaded is not multilingual - err = context.SetLanguage("en") - assert.Error(err) + // This returns an error since the small.en model is not multilingual + err := ctx.SetLanguage("en") + assert.Error(err) + }) + } } func TestContextModelIsMultilingual(t *testing.T) { assert := assert.New(t) - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() - - context, err := model.NewContext() - assert.NoError(err) - - isMultilingual := context.IsMultilingual() + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - // This returns false since - // the model 'models/ggml-small.en.bin' - // that is loaded is not multilingual - assert.False(isMultilingual) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + assert.False(ctx.IsMultilingual()) + }) + } } func TestLanguage(t *testing.T) { assert := assert.New(t) - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() - - context, err := model.NewContext() - assert.NoError(err) + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - // This always returns en since - // the model 'models/ggml-small.en.bin' - // that is loaded is not multilingual - expectedLanguage := "en" - actualLanguage := context.Language() - assert.Equal(expectedLanguage, actualLanguage) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + expectedLanguage := "en" + actualLanguage := ctx.Language() + assert.Equal(expectedLanguage, actualLanguage) + }) + } } -func TestProcess(t *testing.T) { +// Generic behavior: Language() and DetectedLanguage() match for both context types +func TestContext_Generic_LanguageAndDetectedLanguage(t *testing.T) { assert := assert.New(t) - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() - - // Decode the WAV file - load the full buffer - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } - data := buf.AsFloat32Buffer().Data + data := helperLoadSample(t, SamplePath) - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - context, err := model.NewContext() - assert.NoError(err) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() - err = context.Process(data, nil, nil, nil) - assert.NoError(err) + langBefore := ctx.Language() + assert.NoError(ctx.Process(data, nil, nil, nil)) + detected := ctx.DetectedLanguage() + assert.Equal(langBefore, detected) + }) + } } -func TestDetectedLanguage(t *testing.T) { +func TestProcess(t *testing.T) { assert := assert.New(t) - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() - - // Decode the WAV file - load the full buffer - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - - data := buf.AsFloat32Buffer().Data - - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } - context, err := model.NewContext() - assert.NoError(err) + data := helperLoadSample(t, SamplePath) - err = context.Process(data, nil, nil, nil) - assert.NoError(err) + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - expectedLanguage := "en" - actualLanguage := context.DetectedLanguage() - assert.Equal(expectedLanguage, actualLanguage) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + err := ctx.Process(data, nil, nil, nil) + assert.NoError(err) + }) + } } -// TestContext_ConcurrentProcessing tests that multiple contexts can process concurrently -// without interfering with each other (validates the whisper_state isolation fix) -func TestContext_ConcurrentProcessing(t *testing.T) { +func TestDetectedLanguage(t *testing.T) { assert := assert.New(t) if _, err := os.Stat(ModelPath); os.IsNotExist(err) { @@ -138,37 +149,32 @@ func TestContext_ConcurrentProcessing(t *testing.T) { t.Skip("Skipping test, sample not found:", SamplePath) } - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() - - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data - - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() - - ctx, err := model.NewContext() - assert.NoError(err) - assert.NotNil(ctx) - defer func() { _ = ctx.Close() }() + data := helperLoadSample(t, SamplePath) - err = ctx.Process(data, nil, nil, nil) - assert.NoError(err) + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - seg, err := ctx.NextSegment() - assert.NoError(err) - assert.NotEmpty(seg.Text) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + err := ctx.Process(data, nil, nil, nil) + assert.NoError(err) + expectedLanguage := "en" + actualLanguage := ctx.DetectedLanguage() + assert.Equal(expectedLanguage, actualLanguage) + }) + } } -// TestContext_Parallel_DifferentInputs tests concurrent processing with different inputs -// This validates that each context maintains isolated state for concurrent processing -func TestContext_Parallel_DifferentInputs(t *testing.T) { +// TestContext_ConcurrentProcessing tests that multiple contexts can process concurrently +// without interfering with each other (validates the whisper_state isolation fix) +func TestContext_ConcurrentProcessing(t *testing.T) { assert := assert.New(t) if _, err := os.Stat(ModelPath); os.IsNotExist(err) { @@ -178,73 +184,29 @@ func TestContext_Parallel_DifferentInputs(t *testing.T) { t.Skip("Skipping test, sample not found:", SamplePath) } - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() + data := helperLoadSample(t, SamplePath) - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data - assert.Greater(len(data), 10) + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - // Create half-sample (second half) - half := make([]float32, len(data)/2) - copy(half, data[len(data)/2:]) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() + err := ctx.Process(data, nil, nil, nil) + assert.NoError(err) - ctx1, err := model.NewContext() - assert.NoError(err) - defer func() { _ = ctx1.Close() }() - ctx2, err := model.NewContext() - assert.NoError(err) - defer func() { _ = ctx2.Close() }() - - // Run in parallel - each context has isolated whisper_state - var wg sync.WaitGroup - var first1, first2 string - var e1, e2 error - - wg.Add(2) - - // No mutex needed because each context is isolated by whisper_state - go func() { - defer wg.Done() - e1 = ctx1.Process(data, nil, nil, nil) - if e1 == nil { - seg, err := ctx1.NextSegment() - if err == nil { - first1 = seg.Text - } else { - e1 = err - } - } - }() - - go func() { - defer wg.Done() - e2 = ctx2.Process(half, nil, nil, nil) - if e2 == nil { - seg, err := ctx2.NextSegment() - if err == nil { - first2 = seg.Text - } else { - e2 = err - } - } - }() - - wg.Wait() - assert.NoError(e1) - assert.NoError(e2) - assert.NotEmpty(first1) - assert.NotEmpty(first2) - assert.NotEqual(first1, first2, "first segments should differ for different inputs") + seg, err := ctx.NextSegment() + assert.NoError(err) + assert.NotEmpty(seg.Text) + }) + } } // TestContext_Close tests that Context.Close() properly frees resources @@ -256,53 +218,72 @@ func TestContext_Close(t *testing.T) { t.Skip("Skipping test, model not found:", ModelPath) } - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) - defer func() { _ = model.Close() }() - - ctx, err := model.NewContext() - assert.NoError(err) - assert.NotNil(ctx) - - // Close the context - err = ctx.Close() - require.NoError(t, err) - - // Try to use closed context - should return errors - err = ctx.Process([]float32{0.1, 0.2, 0.3}, nil, nil, nil) - require.ErrorIs(t, err, whisper.ErrModelClosed) - - // TODO: remove this logic after deprecating the ErrInternalAppError - require.ErrorIs(t, err, whisper.ErrInternalAppError) - - lang := ctx.DetectedLanguage() - require.Empty(t, lang) - - _, err = ctx.NextSegment() - assert.ErrorIs(err, whisper.ErrModelClosed) - - // TODO: remove this logic after deprecating the ErrInternalAppError - assert.ErrorIs(err, whisper.ErrInternalAppError) + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - // Multiple closes should be safe - err = ctx.Close() - require.NoError(t, err) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + + // Close the context + err := ctx.Close() + require.NoError(t, err) + + // Try to use closed context - should return errors + err = ctx.Process([]float32{0.1, 0.2, 0.3}, nil, nil, nil) + require.ErrorIs(t, err, whisper.ErrModelClosed) + // TODO: remove this logic after deprecating the ErrInternalAppError + require.ErrorIs(t, err, whisper.ErrInternalAppError) + + lang := ctx.DetectedLanguage() + require.Empty(t, lang) + + _, err = ctx.NextSegment() + assert.ErrorIs(err, whisper.ErrModelClosed) + // TODO: remove this logic after deprecating the ErrInternalAppError + assert.ErrorIs(err, whisper.ErrInternalAppError) + + // Multiple closes should be safe + err = ctx.Close() + require.NoError(t, err) + }) + } } func Test_Close_Context_of_Closed_Model(t *testing.T) { assert := assert.New(t) - model, err := whisper.New(ModelPath) - assert.NoError(err) - assert.NotNil(model) + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } - ctx, err := model.NewContext() - assert.NoError(err) - assert.NotNil(ctx) + t.Run("stateless", func(t *testing.T) { + model, err := whisper.NewModelContext(ModelPath) + assert.NoError(err) + defer func() { _ = model.Close() }() + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatelessContext(model, params) + assert.NoError(err) + require.NoError(t, model.Close()) + require.NoError(t, ctx.Close()) + }) - require.NoError(t, model.Close()) - require.NoError(t, ctx.Close()) + t.Run("stateful", func(t *testing.T) { + model, err := whisper.NewModelContext(ModelPath) + assert.NoError(err) + defer func() { _ = model.Close() }() + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatefulContext(model, params) + assert.NoError(err) + require.NoError(t, model.Close()) + require.NoError(t, ctx.Close()) + }) } func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { @@ -315,15 +296,7 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { t.Skip("Skipping test, sample not found:", SamplePath) } - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() - - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data + data := helperLoadSample(t, SamplePath) model, err := whisper.NewModelContext(ModelPath) assert.NoError(err) @@ -352,15 +325,7 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { } func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { - fh, err := os.Open(MultiSpeakerSamplePath) - require.NoError(t, err) - defer func() { _ = fh.Close() }() - - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.Equal(t, uint16(1), dec.NumChans) - require.NoError(t, err) - data := buf.AsFloat32Buffer().Data + data := helperLoadSample(t, MultiSpeakerSamplePath) model, err := whisper.NewModelContext(ModelTinydiarizePath) require.NoError(t, err) @@ -426,29 +391,193 @@ func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) { t.Skip("Skipping test, sample not found:", SamplePath) } - fh, err := os.Open(SamplePath) - assert.NoError(err) - defer func() { _ = fh.Close() }() + data := helperLoadSample(t, SamplePath) - dec := wav.NewDecoder(fh) - buf, err := dec.FullPCMBuffer() - assert.NoError(err) - assert.Equal(uint16(1), dec.NumChans) - data := buf.AsFloat32Buffer().Data + cases := []struct { + name string + new func(t *testing.T) (whisper.Context, func()) + }{ + {name: "stateless", new: helperNewStatelessContext}, + {name: "stateful", new: helperNewStatefulContext}, + } - model, err := whisper.New(ModelPath) - assert.NoError(err) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cleanup := tc.new(t) + defer cleanup() + + err := ctx.Process(data, nil, nil, nil) + assert.NoError(err) + + seg, err := ctx.NextSegment() + assert.NoError(err) + t.Logf("SpeakerTurnNext: %v", seg.SpeakerTurnNext) + _ = seg.SpeakerTurnNext + }) + } +} + +// Ensure Process produces at least one segment for both stateless and stateful contexts +func TestContext_Process_ProducesSegments_BothKinds(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + data := helperLoadSample(t, SamplePath) + + // Stateless + stateless, cleanupS := helperNewStatelessContext(t) + defer cleanupS() + require.NoError(t, stateless.Process(data, nil, nil, nil)) + var statelessCount int + for { + _, err := stateless.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + statelessCount++ + } + assert.Greater(statelessCount, 0, "stateless should produce at least one segment") + + // Stateful + stateful, cleanupSt := helperNewStatefulContext(t) + defer cleanupSt() + require.NoError(t, stateful.Process(data, nil, nil, nil)) + var statefulCount int + for { + _, err := stateful.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + statefulCount++ + } + assert.Greater(statefulCount, 0, "stateful should produce at least one segment") +} + +// With temperature=0 (greedy), stateless and stateful should produce identical segments +func TestContext_Process_SameResults_TemperatureZero(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + data := helperLoadSample(t, SamplePath) + + // Use a single model to avoid environment differences + model, err := whisper.NewModelContext(ModelPath) + require.NoError(t, err) defer func() { _ = model.Close() }() - ctx, err := model.NewContext() - assert.NoError(err) - defer func() { _ = ctx.Close() }() + // Independent params with temperature=0 for determinism + p := helperNewParams(t, model, func(p *whisper.Parameters) { + p.SetTemperature(0) + p.SetThreads(1) + }) - err = ctx.Process(data, nil, nil, nil) - assert.NoError(err) + stateless, err := whisper.NewStatelessContext(model, p) + require.NoError(t, err) + defer func() { _ = stateless.Close() }() - seg, err := ctx.NextSegment() - assert.NoError(err) - t.Logf("SpeakerTurnNext: %v", seg.SpeakerTurnNext) - _ = seg.SpeakerTurnNext // ensure field exists and is readable + stateful, err := whisper.NewStatefulContext(model, p) + require.NoError(t, err) + defer func() { _ = stateful.Close() }() + + require.NoError(t, stateless.Process(data, nil, nil, nil)) + require.NoError(t, stateful.Process(data, nil, nil, nil)) + + // Collect segment texts + var segsStateless, segsStateful []string + for { + seg, err := stateless.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + segsStateless = append(segsStateless, seg.Text) + } + for { + seg, err := stateful.NextSegment() + if err == io.EOF { + break + } + require.NoError(t, err) + segsStateful = append(segsStateful, seg.Text) + } + + // Both should have at least one segment and be identical + require.Greater(t, len(segsStateless), 0) + require.Greater(t, len(segsStateful), 0) + assert.Equal(len(segsStateful), len(segsStateless)) + for i := range segsStateless { + assert.Equal(segsStateless[i], segsStateful[i], "segment %d text differs", i) + } +} + +// Model.GetTimings: stateless processing updates model timings (non-zero), +// stateful processing does not (zero timings) +func TestModel_GetTimings_Stateless_NonZero_Stateful_Zero(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + data := helperLoadSample(t, SamplePath) + + model, err := whisper.NewModelContext(ModelPath) + require.NoError(t, err) + defer func() { _ = model.Close() }() + + // Stateless should produce non-zero timings + t.Run("stateless", func(t *testing.T) { + model.ResetTimings() + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatelessContext(model, params) + require.NoError(t, err) + defer func() { _ = ctx.Close() }() + + require.NoError(t, ctx.Process(data, nil, nil, nil)) + + timings, ok := model.GetTimings() + require.True(t, ok, "expected timings to be available after stateless processing") + nonZero := timings.SampleMS > 0 || timings.EncodeMS > 0 || timings.DecodeMS > 0 || timings.BatchdMS > 0 || timings.PromptMS > 0 + assert.True(nonZero, "expected at least one non-zero timing after stateless processing: %#v", timings) + }) + + // Stateful should keep model-level timings at zero + t.Run("stateful", func(t *testing.T) { + model.ResetTimings() + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatefulContext(model, params) + require.NoError(t, err) + defer func() { _ = ctx.Close() }() + + require.NoError(t, ctx.Process(data, nil, nil, nil)) + + timings, ok := model.GetTimings() + // Expect timings present but all zero; if not present at all, treat as zero-equivalent + if ok { + assert.Equal(float32(0), timings.SampleMS) + assert.Equal(float32(0), timings.EncodeMS) + assert.Equal(float32(0), timings.DecodeMS) + assert.Equal(float32(0), timings.BatchdMS) + assert.Equal(float32(0), timings.PromptMS) + } else { + t.Log("timings not available for stateful processing; treating as zero") + } + }) } diff --git a/bindings/go/pkg/whisper/log.go b/bindings/go/pkg/whisper/log.go new file mode 100644 index 00000000000..66eb0d5c78c --- /dev/null +++ b/bindings/go/pkg/whisper/log.go @@ -0,0 +1,9 @@ +package whisper + +import low "github.com/ggerganov/whisper.cpp/bindings/go" + +// DisableLogs disables all C-side logging from whisper.cpp and ggml. +// Call once early in your program before creating models/contexts. +func DisableLogs() { + low.DisableLogs() +} diff --git a/bindings/go/pkg/whisper/model.go b/bindings/go/pkg/whisper/model.go index a16d6a13476..c22490fb84c 100644 --- a/bindings/go/pkg/whisper/model.go +++ b/bindings/go/pkg/whisper/model.go @@ -5,7 +5,7 @@ import ( "os" // Bindings - whisper "github.com/ggerganov/whisper.cpp/bindings/go" + low "github.com/ggerganov/whisper.cpp/bindings/go" ) type ModelContext struct { @@ -17,27 +17,50 @@ type ModelContext struct { // Make sure model adheres to the interface var _ Model = (*ModelContext)(nil) +// Timings is a compact, high-level timing snapshot in milliseconds +type Timings struct { + SampleMS float32 + EncodeMS float32 + DecodeMS float32 + BatchdMS float32 + PromptMS float32 +} + // Deprecated: Use NewModelContext instead func New(path string) (Model, error) { return NewModelContext(path) } // NewModelContext creates a new model context + func NewModelContext( path string, +) (*ModelContext, error) { + return NewModelContextWithParams( + path, + NewModelContextParams(), + ) +} + +// NewModelContextWithParams creates a new model context with custom initialization params +func NewModelContextWithParams( + path string, + params ModelContextParams, ) (*ModelContext, error) { model := new(ModelContext) if _, err := os.Stat(path); err != nil { return nil, err - } else if ctx := whisper.Whisper_init(path); ctx == nil { + } + + ctx := low.Whisper_init_with_params(path, params.toLow()) + if ctx == nil { return nil, ErrUnableToLoadModel - } else { - model.ca = newCtxAccessor(ctx) - model.tokId = newTokenIdentifier(model.ca) - model.path = path } - // Return success + model.ca = newCtxAccessor(ctx) + model.tokId = newTokenIdentifier(model.ca) + model.path = path + return model, nil } @@ -75,9 +98,9 @@ func (model *ModelContext) Languages() []string { return nil } - result := make([]string, 0, whisper.Whisper_lang_max_id()) - for i := 0; i < whisper.Whisper_lang_max_id(); i++ { - str := whisper.Whisper_lang_str(i) + result := make([]string, 0, low.Whisper_lang_max_id()) + for i := 0; i < low.Whisper_lang_max_id(); i++ { + str := low.Whisper_lang_str(i) if ctx.Whisper_lang_id(str) >= 0 { result = append(result, str) } @@ -95,8 +118,8 @@ func (model *ModelContext) NewContext() (Context, error) { return nil, err } - // Return new context (now state-backed) - return NewStatefulContext( + // Return new context (stateless for backward compatibility with timings) + return NewStatelessContext( model, params, ) @@ -122,6 +145,35 @@ func (model *ModelContext) ResetTimings() { ctx.Whisper_reset_timings() } +// GetTimings returns a compact snapshot of model-level processing timings. +// +// Behavior notes: +// - Stateless contexts (created via ModelContext.NewContext or NewStatelessContext) +// update model-level timings during Process. After a stateless Process call, +// the returned timings are expected to be non-zero (ok == true). +// - Stateful contexts (created via NewStatefulContext) use a per-state backend +// and do not affect model-level timings. After a stateful Process call, +// the returned timings are expected to be zero values (fields equal 0) or +// the call may return ok == false depending on the underlying implementation. +// +// Use ResetTimings before measurement to clear previous values. +func (model *ModelContext) GetTimings() (Timings, bool) { + ctx, err := model.ca.context() + if err != nil { + return Timings{}, false + } + if t, ok := ctx.Whisper_get_timings_go(); ok { + return Timings{ + SampleMS: t.SampleMS, + EncodeMS: t.EncodeMS, + DecodeMS: t.DecodeMS, + BatchdMS: t.BatchdMS, + PromptMS: t.PromptMS, + }, true + } + return Timings{}, false +} + func (model *ModelContext) tokenIdentifier() *tokenIdentifier { return model.tokId } diff --git a/bindings/go/pkg/whisper/model_context_params.go b/bindings/go/pkg/whisper/model_context_params.go new file mode 100644 index 00000000000..62733b9bbd1 --- /dev/null +++ b/bindings/go/pkg/whisper/model_context_params.go @@ -0,0 +1,27 @@ +package whisper + +import ( + low "github.com/ggerganov/whisper.cpp/bindings/go" +) + +type ModelContextParams struct { + p low.ContextParams +} + +func NewModelContextParams() ModelContextParams { + return ModelContextParams{ + p: low.Whisper_context_default_params(), + } +} + +func (p *ModelContextParams) SetUseGPU(v bool) { + p.p.SetUseGPU(v) +} + +func (p *ModelContextParams) SetGPUDevice(n int) { + p.p.SetGPUDevice(n) +} + +func (p *ModelContextParams) toLow() low.ContextParams { + return p.p +} diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/stateful_context.go similarity index 98% rename from bindings/go/pkg/whisper/context.go rename to bindings/go/pkg/whisper/stateful_context.go index 4b178f9646b..08e04094c09 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/stateful_context.go @@ -392,4 +392,6 @@ func (context *StatefulContext) SetTranslate(v bool) { context.params.SetTranslate(v) } +// Make stateful context compatible with the old deprecated interface for +// the simple migration into multi-threaded processing. var _ Context = (*StatefulContext)(nil) diff --git a/bindings/go/pkg/whisper/stateful_context_test.go b/bindings/go/pkg/whisper/stateful_context_test.go new file mode 100644 index 00000000000..0062aed10fa --- /dev/null +++ b/bindings/go/pkg/whisper/stateful_context_test.go @@ -0,0 +1,81 @@ +package whisper_test + +import ( + "os" + "sync" + "testing" + + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + assert "github.com/stretchr/testify/assert" +) + +// Stateful-specific: parallel processing supported +func TestContext_Parallel_DifferentInputs_Stateful(t *testing.T) { + assert := assert.New(t) + + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + if _, err := os.Stat(SamplePath); os.IsNotExist(err) { + t.Skip("Skipping test, sample not found:", SamplePath) + } + + data := helperLoadSample(t, SamplePath) + assert.Greater(len(data), 10) + + // Create half-sample (second half) + half := make([]float32, len(data)/2) + copy(half, data[len(data)/2:]) + + model, err := whisper.NewModelContext(ModelPath) + assert.NoError(err) + defer func() { _ = model.Close() }() + + params1 := helperNewParams(t, model, nil) + params2 := helperNewParams(t, model, nil) + + ctx1, err := whisper.NewStatefulContext(model, params1) + assert.NoError(err) + defer func() { _ = ctx1.Close() }() + ctx2, err := whisper.NewStatefulContext(model, params2) + assert.NoError(err) + defer func() { _ = ctx2.Close() }() + + var wg sync.WaitGroup + var first1, first2 string + var e1, e2 error + wg.Add(2) + + go func() { + defer wg.Done() + e1 = ctx1.Process(data, nil, nil, nil) + if e1 == nil { + seg, err := ctx1.NextSegment() + if err == nil { + first1 = seg.Text + } else { + e1 = err + } + } + }() + + go func() { + defer wg.Done() + e2 = ctx2.Process(half, nil, nil, nil) + if e2 == nil { + seg, err := ctx2.NextSegment() + if err == nil { + first2 = seg.Text + } else { + e2 = err + } + } + }() + + wg.Wait() + assert.NoError(e1) + assert.NoError(e2) + assert.NotEmpty(first1) + assert.NotEmpty(first2) + assert.NotEqual(first1, first2, "first segments should differ for different inputs") +} diff --git a/bindings/go/pkg/whisper/stateless_context.go b/bindings/go/pkg/whisper/stateless_context.go new file mode 100644 index 00000000000..7dbe8be29f7 --- /dev/null +++ b/bindings/go/pkg/whisper/stateless_context.go @@ -0,0 +1,377 @@ +package whisper + +import ( + "fmt" + "io" + "runtime" + "strings" + "time" + + // Bindings + whisper "github.com/ggerganov/whisper.cpp/bindings/go" +) + +type StatelessContext struct { + n int + model *ModelContext + params *Parameters + closed bool +} + +// NewStatelessContext creates a new stateless context backed by the model's context +func NewStatelessContext(model *ModelContext, params *Parameters) (*StatelessContext, error) { + if model == nil { + return nil, errModelRequired + } + + if params == nil { + return nil, errParametersRequired + } + + // Ensure model context is available + if _, err := model.ctxAccessor().context(); err != nil { + return nil, err + } + + c := new(StatelessContext) + c.model = model + c.params = params + + return c, nil +} + +// DetectedLanguage returns the detected language for the current context data +func (context *StatelessContext) DetectedLanguage() string { + if context.closed { + return "" + } + ctx, err := context.model.ctxAccessor().context() + if err != nil { + return "" + } + return whisper.Whisper_lang_str(ctx.Whisper_full_lang_id()) +} + +// Close marks the context as closed. +func (context *StatelessContext) Close() error { + context.closed = true + return nil +} + +// Params returns a high-level parameters wrapper +func (context *StatelessContext) Params() *Parameters { + return context.params +} + +// ResetTimings resets the model performance timing counters. +// Deprecated: Use Model.ResetTimings() instead - these are model-level performance metrics. +func (context *StatelessContext) ResetTimings() { + context.model.ResetTimings() +} + +// PrintTimings prints the model performance timings to stdout. +// Deprecated: Use Model.PrintTimings() instead - these are model-level performance metrics. +func (context *StatelessContext) PrintTimings() { + context.model.PrintTimings() +} + +// SystemInfo returns the system information +func (context *StatelessContext) SystemInfo() string { + return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n", + context.params.Threads(), + runtime.NumCPU(), + whisper.Whisper_print_system_info(), + ) +} + +// Use mel data at offset_ms to try and auto-detect the spoken language +// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. +// Returns the probabilities of all languages for this context. +func (context *StatelessContext) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) { + if context.closed { + return nil, ErrModelClosed + } + ctx, err := context.model.ctxAccessor().context() + if err != nil { + return nil, err + } + langProbs, err := ctx.Whisper_lang_auto_detect(offset_ms, n_threads) + if err != nil { + return nil, err + } + return langProbs, nil +} + +// Process new sample data and return any errors +func (context *StatelessContext) Process( + data []float32, + callEncoderBegin EncoderBeginCallback, + callNewSegment SegmentCallback, + callProgress ProgressCallback, +) error { + if context.closed { + return ErrModelClosed + } + ctx, err := context.model.ctxAccessor().context() + if err != nil { + return err + } + // Concurrency guard: prevent concurrent stateless processing on shared model ctx + k := modelKey(context.model) + if !gate().Acquire(k) { + return ErrStatelessBusy + } + defer gate().Release(k) + + // If the callback is defined then we force on single_segment mode + if callNewSegment != nil { + context.params.SetSingleSegment(true) + } + + lowLevelParams, err := context.params.unsafeParams() + if err != nil { + return err + } + + if err := ctx.Whisper_full(*lowLevelParams, data, callEncoderBegin, + func(new int) { + if callNewSegment != nil { + num_segments := ctx.Whisper_full_n_segments() + s0 := num_segments - new + for i := s0; i < num_segments; i++ { + callNewSegment(toSegmentFromContext(ctx, i)) + } + } + }, func(progress int) { + if callProgress != nil { + callProgress(progress) + } + }); err != nil { + return err + } + + // Return success + return nil +} + +// NextSegment returns the next segment from the context buffer +func (context *StatelessContext) NextSegment() (Segment, error) { + if context.closed { + return Segment{}, ErrModelClosed + } + ctx, err := context.model.ctxAccessor().context() + if err != nil { + return Segment{}, err + } + + if context.n >= ctx.Whisper_full_n_segments() { + return Segment{}, io.EOF + } + + result := toSegmentFromContext(ctx, context.n) + context.n++ + + return result, nil +} + +func (context *StatelessContext) IsMultilingual() bool { + return context.model.IsMultilingual() +} + +// Token helpers +// Deprecated: Use Model.IsText() instead - token checking is model-specific. +func (context *StatelessContext) IsText(t Token) bool { + result, _ := context.model.tokenIdentifier().IsText(t) + return result +} + +// Deprecated: Use Model.IsBEG() instead - token checking is model-specific. +func (context *StatelessContext) IsBEG(t Token) bool { + result, _ := context.model.tokenIdentifier().IsBEG(t) + return result +} + +// Deprecated: Use Model.IsSOT() instead - token checking is model-specific. +func (context *StatelessContext) IsSOT(t Token) bool { + result, _ := context.model.tokenIdentifier().IsSOT(t) + return result +} + +// Deprecated: Use Model.IsEOT() instead - token checking is model-specific. +func (context *StatelessContext) IsEOT(t Token) bool { + result, _ := context.model.tokenIdentifier().IsEOT(t) + return result +} + +// Deprecated: Use Model.IsPREV() instead - token checking is model-specific. +func (context *StatelessContext) IsPREV(t Token) bool { + result, _ := context.model.tokenIdentifier().IsPREV(t) + return result +} + +// Deprecated: Use Model.IsSOLM() instead - token checking is model-specific. +func (context *StatelessContext) IsSOLM(t Token) bool { + result, _ := context.model.tokenIdentifier().IsSOLM(t) + return result +} + +// Deprecated: Use Model.IsNOT() instead - token checking is model-specific. +func (context *StatelessContext) IsNOT(t Token) bool { + result, _ := context.model.tokenIdentifier().IsNOT(t) + return result +} + +func (context *StatelessContext) SetLanguage(lang string) error { + if context.closed || context.model.ctxAccessor().isClosed() { + return ErrModelClosed + } + + if !context.model.IsMultilingual() { + return ErrModelNotMultilingual + } + + return context.params.SetLanguage(lang) +} + +// Deprecated: Use Model.IsLANG() instead - token checking is model-specific. +func (context *StatelessContext) IsLANG(t Token, lang string) bool { + result, _ := context.model.tokenIdentifier().IsLANG(t, lang) + return result +} + +// Context-backed helper functions +func toSegmentFromContext(ctx *whisper.Context, n int) Segment { + return Segment{ + Num: n, + Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)), + Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10, + End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10, + Tokens: toTokensFromContext(ctx, n), + SpeakerTurnNext: false, // speaker turn available only with state-backed accessors + } +} + +func toTokensFromContext(ctx *whisper.Context, n int) []Token { + result := make([]Token, ctx.Whisper_full_n_tokens(n)) + + for i := 0; i < len(result); i++ { + data := ctx.Whisper_full_get_token_data(n, i) + result[i] = Token{ + Id: int(ctx.Whisper_full_get_token_id(n, i)), + Text: ctx.Whisper_full_get_token_text(n, i), + P: ctx.Whisper_full_get_token_p(n, i), + Start: time.Duration(data.T0()) * time.Millisecond * 10, + End: time.Duration(data.T1()) * time.Millisecond * 10, + } + } + + return result +} + +// Deprecated: Use Params().Language() instead +func (context *StatelessContext) Language() string { + return context.params.Language() +} + +// Deprecated: Use Params().SetAudioCtx() instead +func (context *StatelessContext) SetAudioCtx(n uint) { + context.params.SetAudioCtx(n) +} + +// SetBeamSize implements Context. +// Deprecated: Use Params().SetBeamSize() instead +func (context *StatelessContext) SetBeamSize(v int) { + context.params.SetBeamSize(v) +} + +// SetDuration implements Context. +// Deprecated: Use Params().SetDuration() instead +func (context *StatelessContext) SetDuration(v time.Duration) { + context.params.SetDuration(v) +} + +// SetEntropyThold implements Context. +// Deprecated: Use Params().SetEntropyThold() instead +func (context *StatelessContext) SetEntropyThold(v float32) { + context.params.SetEntropyThold(v) +} + +// SetInitialPrompt implements Context. +// Deprecated: Use Params().SetInitialPrompt() instead +func (context *StatelessContext) SetInitialPrompt(v string) { + context.params.SetInitialPrompt(v) +} + +// SetMaxContext implements Context. +// Deprecated: Use Params().SetMaxContext() instead +func (context *StatelessContext) SetMaxContext(v int) { + context.params.SetMaxContext(v) +} + +// SetMaxSegmentLength implements Context. +// Deprecated: Use Params().SetMaxSegmentLength() instead +func (context *StatelessContext) SetMaxSegmentLength(v uint) { + context.params.SetMaxSegmentLength(v) +} + +// SetMaxTokensPerSegment implements Context. +// Deprecated: Use Params().SetMaxTokensPerSegment() instead +func (context *StatelessContext) SetMaxTokensPerSegment(v uint) { + context.params.SetMaxTokensPerSegment(v) +} + +// SetOffset implements Context. +// Deprecated: Use Params().SetOffset() instead +func (context *StatelessContext) SetOffset(v time.Duration) { + context.params.SetOffset(v) +} + +// SetSplitOnWord implements Context. +// Deprecated: Use Params().SetSplitOnWord() instead +func (context *StatelessContext) SetSplitOnWord(v bool) { + context.params.SetSplitOnWord(v) +} + +// SetTemperature implements Context. +// Deprecated: Use Params().SetTemperature() instead +func (context *StatelessContext) SetTemperature(v float32) { + context.params.SetTemperature(v) +} + +// SetTemperatureFallback implements Context. +// Deprecated: Use Params().SetTemperatureFallback() instead +func (context *StatelessContext) SetTemperatureFallback(v float32) { + context.params.SetTemperatureFallback(v) +} + +// SetThreads implements Context. +// Deprecated: Use Params().SetThreads() instead +func (context *StatelessContext) SetThreads(v uint) { + context.params.SetThreads(v) +} + +// SetTokenSumThreshold implements Context. +// Deprecated: Use Params().SetTokenSumThreshold() instead +func (context *StatelessContext) SetTokenSumThreshold(v float32) { + context.params.SetTokenSumThreshold(v) +} + +// SetTokenThreshold implements Context. +// Deprecated: Use Params().SetTokenThreshold() instead +func (context *StatelessContext) SetTokenThreshold(v float32) { + context.params.SetTokenThreshold(v) +} + +// SetTokenTimestamps implements Context. +// Deprecated: Use Params().SetTokenTimestamps() instead +func (context *StatelessContext) SetTokenTimestamps(v bool) { + context.params.SetTokenTimestamps(v) +} + +// SetTranslate implements Context. +// Deprecated: Use Params().SetTranslate() instead +func (context *StatelessContext) SetTranslate(v bool) { + context.params.SetTranslate(v) +} + +var _ Context = (*StatelessContext)(nil) diff --git a/bindings/go/pkg/whisper/stateless_context_test.go b/bindings/go/pkg/whisper/stateless_context_test.go new file mode 100644 index 00000000000..0eb867d1914 --- /dev/null +++ b/bindings/go/pkg/whisper/stateless_context_test.go @@ -0,0 +1,52 @@ +package whisper_test + +import ( + "sync" + "testing" + + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + assert "github.com/stretchr/testify/assert" +) + +// Ensure stateless contexts cannot process in parallel without isolation +func TestStatelessContext_NotParallelSafe(t *testing.T) { + data := helperLoadSample(t, SamplePath) + + model, closeModel := helperNewModelContext(t) + defer closeModel() + + params := helperNewParams(t, model, nil) + + // Create two stateless contexts sharing the same underlying model context + ctx1, err := whisper.NewStatelessContext(model, params) + assert.NoError(t, err) + defer func() { _ = ctx1.Close() }() + + ctx2, err := whisper.NewStatelessContext(model, params) + assert.NoError(t, err) + defer func() { _ = ctx2.Close() }() + + // Run both in parallel - expect a panic or error from underlying whisper_full + // We capture panics to assert the behavior. + var wg sync.WaitGroup + wg.Add(2) + + var err1, err2 error + + go func() { + defer wg.Done() + err1 = ctx1.Process(data, nil, nil, nil) + }() + + go func() { + defer wg.Done() + err2 = ctx2.Process(data, nil, nil, nil) + }() + + wg.Wait() + + // At least one should return ErrStatelessBusy + if err1 != whisper.ErrStatelessBusy && err2 != whisper.ErrStatelessBusy { + t.Fatalf("expected ErrStatelessBusy when processing in parallel with StatelessContext, got err1=%v err2=%v", err1, err2) + } +} diff --git a/bindings/go/pkg/whisper/test_helpers_test.go b/bindings/go/pkg/whisper/test_helpers_test.go new file mode 100644 index 00000000000..15fedc9613a --- /dev/null +++ b/bindings/go/pkg/whisper/test_helpers_test.go @@ -0,0 +1,129 @@ +package whisper_test + +import ( + "os" + "testing" + + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + wav "github.com/go-audio/wav" +) + +func helperLoadSample(tb testing.TB, path string) []float32 { + tb.Helper() + fh, err := os.Open(path) + if err != nil { + tb.Fatalf("open sample: %v", err) + } + defer func() { _ = fh.Close() }() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + if err != nil { + tb.Fatalf("decode wav: %v", err) + } + if dec.NumChans != 1 { + tb.Fatalf("expected mono wav, got channels=%d", dec.NumChans) + } + return buf.AsFloat32Buffer().Data +} + +// helperLoadSampleWithMeta loads wav and returns samples with sample rate and channels +func helperLoadSampleWithMeta(tb testing.TB, path string) ([]float32, int, int) { + tb.Helper() + fh, err := os.Open(path) + if err != nil { + tb.Fatalf("open sample: %v", err) + } + defer func() { _ = fh.Close() }() + + dec := wav.NewDecoder(fh) + buf, err := dec.FullPCMBuffer() + if err != nil { + tb.Fatalf("decode wav: %v", err) + } + if dec.NumChans != 1 { + tb.Fatalf("expected mono wav, got channels=%d", dec.NumChans) + } + return buf.AsFloat32Buffer().Data, int(dec.SampleRate), int(dec.NumChans) +} + +func helperNewModel(t *testing.T) (whisper.Model, func()) { + t.Helper() + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + model, err := whisper.New(ModelPath) + if err != nil { + t.Fatalf("load model: %v", err) + } + return model, func() { _ = model.Close() } +} + +func helperNewModelContext(t *testing.T) (*whisper.ModelContext, func()) { + t.Helper() + if _, err := os.Stat(ModelPath); os.IsNotExist(err) { + t.Skip("Skipping test, model not found:", ModelPath) + } + model, err := whisper.NewModelContext(ModelPath) + if err != nil { + t.Fatalf("load model ctx: %v", err) + } + return model, func() { _ = model.Close() } +} + +func helperNewParams(t *testing.T, model *whisper.ModelContext, configure whisper.ParamsConfigure) *whisper.Parameters { + t.Helper() + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, configure) + if err != nil { + t.Fatalf("new params: %v", err) + } + return params +} + +func helperProcessOnce(t *testing.T, ctx whisper.Context, data []float32) { + t.Helper() + if err := ctx.Process(data, nil, nil, nil); err != nil { + t.Fatalf("process: %v", err) + } +} + +func helperFirstSegmentText(t *testing.T, ctx whisper.Context) string { + t.Helper() + seg, err := ctx.NextSegment() + if err != nil { + t.Fatalf("next segment: %v", err) + } + return seg.Text +} + +// helperNewStatelessContext creates a fresh stateless context and returns a cleanup func +func helperNewStatelessContext(t *testing.T) (whisper.Context, func()) { + t.Helper() + model, closeModel := helperNewModelContext(t) + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatelessContext(model, params) + if err != nil { + t.Fatalf("new stateless context: %v", err) + } + cleanup := func() { + _ = ctx.Close() + closeModel() + } + return ctx, cleanup +} + +// helperNewStatefulContext creates a fresh stateful context and returns a cleanup func +func helperNewStatefulContext(t *testing.T) (whisper.Context, func()) { + t.Helper() + model, closeModel := helperNewModelContext(t) + params := helperNewParams(t, model, nil) + ctx, err := whisper.NewStatefulContext(model, params) + if err != nil { + t.Fatalf("new stateful context: %v", err) + } + cleanup := func() { + _ = ctx.Close() + closeModel() + } + return ctx, cleanup +} diff --git a/bindings/go/pkg/whisper/util_test.go b/bindings/go/pkg/whisper/util_test.go index 1b27255ae70..a2fadca5885 100644 --- a/bindings/go/pkg/whisper/util_test.go +++ b/bindings/go/pkg/whisper/util_test.go @@ -1,8 +1,18 @@ package whisper_test +import ( + "os" + "testing" +) + const ( ModelPath = "../../models/ggml-small.en.bin" ModelTinydiarizePath = "../../models/ggml-small.en-tdrz.bin" SamplePath = "../../samples/jfk.wav" MultiSpeakerSamplePath = "../../samples/a13.wav" ) + +func TestMain(m *testing.M) { + // whisper.DisableLogs() + os.Exit(m.Run()) +} diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go index 83089a26f12..023a33d26db 100644 --- a/bindings/go/whisper.go +++ b/bindings/go/whisper.go @@ -15,6 +15,7 @@ import ( #cgo darwin LDFLAGS: -lggml-metal -lggml-blas #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics #include +#include #include extern void callNewSegment(void* user_data, int new); @@ -60,6 +61,22 @@ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_ params.progress_callback_user_data = (void*)(ctx); return params; } + +// Disable all C-side logging (whisper.cpp and ggml) +static void go_cb_log_disable(enum ggml_log_level level, const char * text, void * user_data) { + (void) level; (void) text; (void) user_data; +} + +static void whisper_log_disable_all(void) { + ggml_log_set(go_cb_log_disable, NULL); + whisper_log_set(go_cb_log_disable, NULL); +} + +// Enable default logging (stdout) for whisper.cpp and ggml +static void whisper_log_enable_default(void) { + ggml_log_set(NULL, NULL); + whisper_log_set(NULL, NULL); +} */ import "C" @@ -73,6 +90,8 @@ type ( TokenData C.struct_whisper_token_data SamplingStrategy C.enum_whisper_sampling_strategy Params C.struct_whisper_full_params + Timings C.struct_whisper_timings + ContextParams C.struct_whisper_context_params ) /////////////////////////////////////////////////////////////////////////////// @@ -98,6 +117,12 @@ var ( ErrInvalidLanguage = errors.New("invalid language") ) +// DisableLogs disables all logging coming from the C libraries (whisper.cpp and ggml). +// Call once early in program startup if you want to silence device/backend prints. +func DisableLogs() { + C.whisper_log_disable_all() +} + /////////////////////////////////////////////////////////////////////////////// // PUBLIC METHODS @@ -113,6 +138,36 @@ func Whisper_init(path string) *Context { } } +// Whisper_context_default_params returns default model context params +func Whisper_context_default_params() ContextParams { + return ContextParams(C.whisper_context_default_params()) +} + +// SetUseGPU enables or disables GPU acceleration on the model context (if available) +func (p *ContextParams) SetUseGPU(v bool) { + if v { + p.use_gpu = C.bool(true) + } else { + p.use_gpu = C.bool(false) + } +} + +// SetGPUDevice selects the GPU device index for the model context (CUDA) +func (p *ContextParams) SetGPUDevice(n int) { + p.gpu_device = C.int(n) +} + +// Whisper_init_with_params allocates and initializes a model using custom context params +func Whisper_init_with_params(path string, params ContextParams) *Context { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + if ctx := C.whisper_init_from_file_with_params(cPath, (C.struct_whisper_context_params)(params)); ctx != nil { + return (*Context)(ctx) + } else { + return nil + } +} + // Frees all memory allocated by the model. func (ctx *Context) Whisper_free() { C.whisper_free((*C.struct_whisper_context)(ctx)) @@ -355,6 +410,32 @@ func (ctx *Context) Whisper_reset_timings() { C.whisper_reset_timings((*C.struct_whisper_context)(ctx)) } +// TimingsGo is a Go-friendly copy of whisper_timings +type TimingsGo struct { + SampleMS float32 + EncodeMS float32 + DecodeMS float32 + BatchdMS float32 + PromptMS float32 +} + +// Whisper_get_timings_go retrieves timing counters and converts them to TimingsGo +func (ctx *Context) Whisper_get_timings_go() (TimingsGo, bool) { + t := C.whisper_get_timings((*C.struct_whisper_context)(ctx)) + if t == nil { + return TimingsGo{}, false + } + // The C struct is 5 consecutive floats; reinterpret and copy + arr := (*[5]C.float)(unsafe.Pointer(t)) + return TimingsGo{ + SampleMS: float32(arr[0]), + EncodeMS: float32(arr[1]), + DecodeMS: float32(arr[2]), + BatchdMS: float32(arr[3]), + PromptMS: float32(arr[4]), + }, true +} + // Print system information func Whisper_print_system_info() string { return C.GoString(C.whisper_print_system_info()) diff --git a/bindings/go/whisper_test.go b/bindings/go/whisper_test.go index 23bbfbff01a..330981fb453 100644 --- a/bindings/go/whisper_test.go +++ b/bindings/go/whisper_test.go @@ -19,6 +19,11 @@ const ( SamplePath = "samples/jfk.wav" ) +func TestMain(m *testing.M) { + whisper.DisableLogs() + os.Exit(m.Run()) +} + func Test_Whisper_000(t *testing.T) { assert := assert.New(t) if _, err := os.Stat(ModelPath); os.IsNotExist(err) { diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list new file mode 100644 index 00000000000..1847373e96d --- /dev/null +++ b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list @@ -0,0 +1 @@ +v0.20.0 diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info new file mode 100644 index 00000000000..08057762fab --- /dev/null +++ b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info @@ -0,0 +1 @@ +{"Version":"v0.20.0","Time":"2025-07-28T18:28:48Z","Origin":{"VCS":"git","URL":"https://go.googlesource.com/tools","Subdir":"gopls","Hash":"2e31135b736b96cd609904370c71563ce5447826","Ref":"refs/tags/gopls/v0.20.0"}} \ No newline at end of file diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod new file mode 100644 index 00000000000..47caff989be --- /dev/null +++ b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod @@ -0,0 +1,32 @@ +module golang.org/x/tools/gopls + +go 1.24.2 + +require ( + github.com/fatih/gomodifytags v1.17.1-0.20250423142747-f3939df9aa3c + github.com/fsnotify/fsnotify v1.9.0 + github.com/google/go-cmp v0.7.0 + github.com/jba/templatecheck v0.7.1 + golang.org/x/mod v0.26.0 + golang.org/x/sync v0.16.0 + golang.org/x/telemetry v0.0.0-20250710130107-8d8967aff50b + golang.org/x/text v0.27.0 + golang.org/x/tools v0.35.1-0.20250728180453-01a3475a31bc + golang.org/x/vuln v1.1.4 + gopkg.in/yaml.v3 v3.0.1 + honnef.co/go/tools v0.7.0-0.dev.0.20250523013057-bbc2f4dd71ea + mvdan.cc/gofumpt v0.8.0 + mvdan.cc/xurls/v2 v2.6.0 +) + +require ( + github.com/BurntSushi/toml v1.5.0 // indirect + github.com/fatih/camelcase v1.0.0 // indirect + github.com/fatih/structtag v1.2.0 // indirect + github.com/google/safehtml v0.1.0 // indirect + golang.org/x/exp/typeparams v0.0.0-20250620022241-b7579e27df2b // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/tools/go/expect v0.1.1-deprecated // indirect + golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect +) diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 b/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 new file mode 100644 index 00000000000..fd63ef44621 --- /dev/null +++ b/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 @@ -0,0 +1,9 @@ +41328958 +golang.org/x/tools/gopls v0.20.0 h1:fxOYZXKl6IsOTKIh6IgjDbIDHlr5btOtOUkrGOgFDB4= +golang.org/x/tools/gopls v0.20.0/go.mod h1:vxYUZ8l4swjbvTQJJONmVfbHsd1ovixCwB7sodBbTYI= + +go.sum database tree +43548952 +nX6jrsdthQ8kDPrwxKP2h/3CAC+o/Tzl00DK+QUiDxE= + +— sum.golang.org Az3grtVCRqi+V2+TLDpRvXhgZDzixz81eDxCTse8HVQFKkxvm3+CBHWwrkincl2+LzuJetgKkMzjLg5M1SI/XmJT7AQ= diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 new file mode 100644 index 0000000000000000000000000000000000000000..20eff0ee6d9284c82c0e98d3804f9791854c6d3b GIT binary patch literal 8192 zcmV+bAphSdj%vR;)I5Uxx2s1R{E`}CgGoWpa$z1`_VHr5Xo*bqDD!60P{&nKyEaOu}g9`1-<@zKxWDndZ?G6<8GoIJOl_BDJ zTw!bs>AeP8*Y?43ZPk*$@=g_E$jHzPp2-L?wGq0Uw}A>Ml2eoF1AZr(#KT**d)Gif zn&U}6$UP_32NE`cNbzhYJHg#|x>ql(bq1*B3Tf|E*ZgwXHsbrGcvSiB%Xy4Ze5Xa- zwLc;-1LxZcMrNis++#OA@f(jD>8&^F&)wI&s zxetlaT&wmppG;EA zH;?U7MA~=M#J0bH!Z_Z`tp0i>SK^!<-j0{($J;5q_3&FkZQNZ*-WHHJosP9hry!EI z?dQl5n(5?WC&gwaS4XTm{-1mgfkOn2#Sq(T9LRJhZWDhl{57olH=e@!y)PvQfZD4K z+uq5O&myZvv?jhRpH4@)IJNgwI`GeW(MlGp%>f6t7-9FRM=_1&R$M&%kHEZv&-Wa; zCEjAC&i3k4h^G>8K5zVI0-l5b3}c5B$sJgA1FGyIS;sV&p#%oh@X&}M;Q^3ZAiqa_ z`j_8nz)G0_1To@cf5c^P6PAn=ards`hT<==-PjCckh+!nw@Bu6!q}0Z`n(84?I8Gw z*1Lk&g5#el6b7#cC4a&Z%8k$&*F}g#?yJ72Y$KFB-B?omz>>3A?nUDy@UhSHB$!xIG07YXOE^%wbF_$xIK^_6~P{t?_>_-du`nSFTlYoCcu>0 zFS(2Kbj$I7Q=Y8Y zxwd?Wb!R1h6In2VQfsif%k1#&e!8FBMZw>pe*uB9Rm!1p>4y2r0MlND2LO1p-6MQ0v=KE0q4AYlccTkN%i&7(+Z+CM!& z`Y9-c_DX?h4M1BbyV2xBcigf)c4r*{Jl8Ekn_HUq&9-5}C#`3E?7Ux2>p2N+Uv9o> zXveFmuAoudMk;U7UEIj;&PbkSk&A^P>mTiVtQ>x^=Z2oX@NKR|=pZIRt(4%8mpI8l zK&`jIK(Xs7%W#2$+(!)j82L!CrKqZy#AiXK)x`YR2>B7Tqc=?QcHRu!MkmKYd9;NQ z(+jw_!;53Ghjc-G|ylac` zsUyJE*GdER%`R%hCI)PsvaVY-_jOZA!jz#L=%F6e%8N^CEeD4;bzmly)=wpLTD(>% zd!qaUr(>rQx`nAx7k+TTQNi0rSDD{Q7P47p^Vm4}D=lj*v*6dxYTqX&_`UW=;RezhLp-!-RJCbILzGQ4 zRJ(S$+r}96Og09k^tSXIsxeJW5*y?i=kkh5;qF<==HuGd-_oQCm7cQ7HT=`y0rc^_ydm_7ni-8y&c zBGS32A?{>x0^O{g6Ru4NiJXl7-x8LPJ4gkOwZc5@EEW>!Qt+J2>hWeu>t*tmM1;>j z@vYw)XUY-q4Ov;5dO7I1H3b5H#qKgi$!9k>7SQ-@(Pt&SA#-(2d5TAyBN2i|z;I^{ zsa}x9q2g9mnqSnK*v}jbKL(;+T5gG^Znjjl;Ub7-OjI zD2wCesjV2H>IytL9^v|6a9a-6gpMC`HnVT{VtlrPB8ULo#Fm<>(57MvxJ%T;@3V00 zCkH~iCm!+~e2Cz!q!aAt?dWFKlsF-thUlOMUBMapr6n$@_`yR7KqZV|%bwy#>gT8p5>YDU;}MpZlxp%?Q1(<4+CV^3m96^d%H79WDeZblh?%1j z*IV`!!S!^dr=!@>bN0OC?Je0n8Py3OAlhf}S_Qew!C^)-{HZ( z`04mSeO0R;mdj1%0zJcCH~sh9`-H@?yz}v|XXZ@UyY1REFtg=Qhwxj!^V9^lEK6D; zfU611fYK~p8@qiSUBDB(D*v5aL#%1*M;4U!_1?LPWcI$_%``QQxtdubD)SL4swqDu z091~YT>I0v$FxtuK^%p#!OPI6N+BnlA~K?iDkDU!>QO?pt`A6GGYp7`=|)l)9x<4E zjUeQB8$#Lknr=om;C!SMMDw(&7+nUZw4dS{+%&+U<0htUkHS0{Yutw*29Qn)v;BaN zh-QJ%;dd?E zSRUA6f=nQuv%Qew*{H98Cx)y{3-0h&Imso{2!877r?FubQA)vtDn1FLfM$Z|8ES^l ze0ot^6}e1?v^e}*eMS$R7VQP1)@O?=8g&+Y0z)v5T7Xx=C0!XqevV@ncx?ggZQ$ zV~YU1)?%qvbAA&xqdSOH=XlA11LGRLY$-Y5u`>Xqscd%AbXE161f65k!L~u8RoSDX z^tuw3B1z>3NqsSQn<||=sdMOEE%5$sA@XnXJbe2TtBhw==Z1kvzs-(bsP2d^08Fo7 ztWqQk`oP+GwR2M52;R1Cl6f%ny3Z0Xa<`beTQ`Danr@Imn%trdj7Nj)5pve5SN@?V zNw}wP%;wsMP{+jqK;?brKI$J>U`Dt=Xx-{yi@}`Acmz%G}-9Q8I2~3Nf99fTd z6e@T9uFtH&Y|!3wSJ_h^2xwC}{arGuh|$O8lG_Oob1Zw+tOc4(VKt*lLB9}gZH?MA zZGfjPfe6h*6N_aw1P);b|A_*vKu}`PssQ}V@vT(r>wK>sZh8k~ZKNPhZ`}D3`Nz8* zkQ@mxh799^`IkIOIK-=8E#8G~KeY^B7J{5>iGlmMX<`bTlUfK@=$ie8FkB>K-`I0QS!jI9D>j8WxOZH`Fnyc7T8~pF1LzC znSl#XHOXHDk9-z&%ps1o)c*^{VEpeO=~6A_Xl#8zRV^?cTd2{a21Ixmq2+qcN0!|~G3gzNuvnk1036TC} z;CWlMY*zE3HDb4mIBnv_=J*rUQt^SQ(h?#!RX%0PBqR$S@NyA4tdEl~q>Rt6@m zLYuvgIlkphJ&JqpFF+d+=+F-iZNUTjbS$r_Bp0%I>9X4@4zMYo!AcG%;-jGN6XQ_r z&2JX`jEVerU%~>>+;Fkax(*=`2E*GIw)shqNc(V1)^Q9B?i+fpqRgZV(?#MXHA1@@ zy(R=t#+6`>6s<@xTT(YKqb?A+O&{ocsC8!d+H$nwLC3-AYzMLnv+QvAd`rQ-w3)N# zZsy%89^s$LgA-(7YZ7IEHe_a=uRm+LnrN`G$3fI&|NeqAX%iDUd716TLo7sXwFZ@r zc*Q?8e;(Wli^)jh;n=by(}7zYnX*G6w&s;bd!`0eH(JXw;X{8{d;)iU=a>>i3^TLz z_uQ_UWw-qPH2ZmYuTL7B z?RjT6Jr)7r;Gt}uU(jy!Yp8`?{aDDMXuJA>DrglmY|xkoCbN^c;*uD`6WA*y&Ug(x zkxT1Q5M|LcCCO2K7vfi>(}cX$|ENA|t76!Sypd@ZES_ON)+M7x$6}cAlXA8N;+tQb z)j2AbK;Dx9tE}QAaUc@XvYRa~WyeMYo;-PvcK%HamqK}(4$=I#2WLZ0-IO1f6@h%y+8vHE>-lV2RBl}p12E*cq-+NE+uNhuDVQ0EMPmToiH;U?qB(3#udh-R z@t^A=y1@4o(cM@AcA-oNIR|^RITl$QWpH}@U`uY(jN35?rd3# zYTisO_FIs2*;juS^lAnLKZX~lhG-G1QGK_3d^QT)>J0u_Wz!*pekxNRGO0(*;nOn@ zgArAK3G5BkF*NH&mUqjB_)7h+Yx)jA5X%Ag#Vd z;>38Cw5h71h?+Ct(wudP)D60zH{~~a!PI2bHho=;=hbsJo~+dpE`z))Jxl9c8 zV1uWHqAmQPgO$#GQHaJRa~G8V4^kBak72WWoZ7LVzd(WCbY2ffM!{gQtIp}RRn(VK zex5elCOxFeqE+xN3wPaz>KHe3uE#HY4w?EYv}VKpB&x-P;+BopJSxG4y{<}YtUT75 zK~Z_th-=E-ZvSWFa72ed(p5&~Ta|=a@w%I2jdFn*ISRbpja&@>(vp~k+V=-AnKnN2 zoKWJLGT`#N1CV$k1dI%~;*M*(hE*7b`e6+rd}ZR}Z&+A8pny*-b>R{I|5NJZKqcU1 zBJ;c=dB>o1N~71~L6n{$Vi&f7PmoXPV;yRrp>n4g3B46^KONtSI(znZ0BjxQjTO&`!^5URF!{)ed3t?ni{>eA`MiL}-U~f8mPZzt*HbI6 zJYrjIi6K2DVj*Aylh7;3p%~QcO3{&yO=oM*ZJ7zdo#GDmSFo?wH-Q}6GSAVe`*cg+ z3KW9p*D|RCQp~JEmwsCJ6UCOSU0t5uibHgcGbjv%_|Xd*1>yO-f~o1eh--)=M7x(dTjJn!FEc%yY(#r@CcB(@JAKW?z8zi+&Yu?0{6YzBkPh5m{yBW+9TYlBejaLEQ~u&Uxv)~7GxG8G$mjbR z3$%$RegLU1v&~)oE33Ah{yzpyEd(Dre@u?q5So_;LBf6SE)A!V-f|!WMeane{?jBP zO#xh(FvPVOkMc3il%=ju90+vnk)voDvL#wE$ zcM0ZZgm{o};I}Iqhu&q7*>fL2#aJy+)m}V4Dajj5?jO(S5baQuk+}P%Y+oJY+Zdro z>4WMnL29}TE1{=OAFBsXFY@GaGkH~)Jj0|gB9gu8ik^pzUVZXf0W>P4YX@v2=ktOH zGTq*YJ_SK;VA&!8ow4y&C*9*^Yv+ytfh(c^7&Wnpj%QamYDtns(0oX6LL~7G78Kiv zhr4fQ%+I{%vK1m`MMOi3vcbMxMj)REm$2?Gq)PW>51{bQ=YG8YRTD31#-a81KF=P6 zE>jQqdrfKi?0f=IwaMWnazb!4f2 zepx9IMIY|)H_;bTLv!D~05HMFv1ks#3VErPvL0PaCm*b=l^t@1;`J~Pe^km}dgBBN zkdd7iB0df6;nu^bKPQ2U3q!&ABOYNLIgF4kbQzom5tw?`F;GBvBjS;f*jhM)fm4B1}_((bsw zJ82Lt*r11mHMI2Or|kdp$4M-krAZuc;J>MlJthY*BV=zvDHZflWI1ToP{QVU7>kr` za+?a8^=rCM2ElbK{&UC{(zSpwI&G7>)|ORquHad#X}n|bv;g&$3)-`K&V&n4jZ*@# zHIc<3iGT=%*+9obe>GM6^cqOj)%W~~(NLZ1adGWx`v>NilpH8`=CofU7y4RC@o>s? zkKZMxKLvnIw8w@v<~hUS5ghOZZdM4xLdJap{Ygk}R4@Seq_BXjgrFpV#nJkDwe>@W z81p3#EJ)%#>#gsL$I&LfQn32PMizzz3Xf+YY>y1vJVs8;e-p{MtN71bH1MnFEGm;NyuI$g!E0@*R zI7v2^2g7PogKysubzEZITr;O*!NIvE;k}WUoIBJEJgzJh_F0=11^Zoxs4P>FO7#cTJ*A0Y0@llJN>dXQ{$i$g8YINps~ zbWOXEkhmHnm2rkJe~bgmttu)f%(`67+pjES{G}vAuZ`+q&iZqr`yh$Vno!5gGD_($XmNCaw~zeNEwAAiSR0S}#E-@~e|mHVAguvoL!@HSa!p7lF}vu58ZbwowX#E9fhyK&k#UgP&eUT? zfOG_bLbmZSEn^YZkuK^1HCs#c%7mXbam(LZ0!D6)35Ia%lnoY`y>4mDn0g-fHtp*G4VBV)MUoAaG=Dga(JL_IQmgR3@R+RpEn()TKH(&nU0IlV{D6*CMteO1u^9weFY&$4%gM>CRSAdR^0bm_awd zk2jcB6KN+k2viO@xtBWTrIy7qbz4I-sfuhGqQDSD*y*&rSF$EU*VgVN@jKb{KoFlL zLcTP!H(T8lD?}N0y@!yMio?w@Gq)Jd@f_5ebMrE-dj7;KST}U z2%Us`#^Nkxg{BfGrbC8$K`ibPYA5+FxfJ?IhhQlh)N_0j6#dEb>-hMZE7P9wGP`A# m+j=@WHfaAa+(%TMj9~+C#cRBelVBd-{u}XZ;c+bkBQx6e!_Phd literal 0 HcmV?d00001 diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 new file mode 100644 index 0000000000000000000000000000000000000000..01d9b6eed1150f85bb8aec986eb6f27e09f803b3 GIT binary patch literal 768 zcmV+b1ONPTHX67i26i+QwVq9z#G{ZL&Y=NJsJWeIxr`p7Jkmv9YARXIRR@>k=?2I9z7e(f@QAuAruFR%^g>%H_?M(GGMQ4ClZv>sy<>#~^R^iMO3G-)3cVf$PPO zR3F5=DM{+X$1}7W-m^ zN@1H0?Uq@N-fXU4j-pGrcCI7J*k$Tm$Wb9^EkH6P&?4! z?47Ax+)ns*f8(jY{*eH+ygL2n#$&~o_U0$P@$ zE=OA=sZNI7fr(rE?LOusJkB~@N~RP1H9_EmNiy?&*=8 zh<|3g%J)9IVV7_8d+dX*62Gy>Ry2rX$w|cI6qPH4P05^7pvgNoRjtRUQOD1-mCQ%T zJgt>NmPH^Yal;dJ=h@rFTdrTwo;5p2@+sxtN(XyX%s>lB>663S$ofhm=<{`>D29-p yD52Bvmf7!(-@21ssKXvOBT;YrJFy+WAIx;VTu4cMVz6`Lw1AG)sN3PgL+}rYn1)pV literal 0 HcmV?d00001 diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 new file mode 100644 index 0000000000000000000000000000000000000000..e2c434fd07f5707f4fc4fed134f94062f8bf9970 GIT binary patch literal 8192 zcmV+bAphU99s1R--on}fcVbnECc0V3R9wTNuo0jYxBhS~QSgRShVtXkfvll-^-y&! z;w)V;IX02`)u)}62y#4x9#yDEZv>%9Y|JB_j1y?*o6D=dWf?Lt2CyG_s!RiAlN!Oh z7m7mX+%R(=E|%poSM&J_JM%vXsumZ5FQDVgsO_ySegy`Iqp1Q1j~@U{6mB7S*$22? z{;+(6L z!orvLbW%PJ7P>#i=?V~{U#n4Czl!Sv)xV2=)q@2_YfhA*ln&57UA(-|p$lw{p6gVi z-!jMtEoFIoE+ly|vv?^%$CKe#x)4TJDIbagNL%MmT$9dWQ0;hU8K-<-H5?IL=h(ht zFO58}b*h%B$tU#Iiv1A>3PZW^jig&DS>po&GG&BVuJ1i_hXB7T|@Vq z5+qNXi8IpB>&J)a483b9K{YGaf4zSKC(YorqqHNC*2Caetnv4vnH?KBgMGK z|3%W!3ls+9mv@{LgOMK{`ZFu1(bu#kpQ=8?61Fr~%QhPV9rqY%ZN*_rmTJd1t;@({ zlJTpM++bVw{Gk8nR)CLJfh=(c!d~7=>R8!kRVe$|MP1q6<^FF`qmba1;yX3F4)QD` z^Kqp@o(;156}eEgk>ZPH3v+(|s8s8s6QW*--kJ1|YkSH1Do%kgF^pDlgl-dNXv*)l zr6e-L@{Iv`^@8VL@>`&5E1CoqC9`5fQ@0M+s482lTEoLHAn{p?qPp%X;I_%)oUe8_ z6Vq>mYsWyIj$ybDc0QL^05Vb^89wJHwU}(5q4fLD_8z+bvM-LsNDJVYH?Y$LR*yfe zc;{3$ivsX<^9B2-Z^SR+9aglllC+hiq9g1HP;>4*{(y)Pb@`nF*6SIbsGKx47aaaCz4OouQ~Dqy3im*&M2v>(E^OBrkv%UJ_^d2 z3+Yx#yv~6ZI1_$Y5D5dMJ9>YnA|?Nn+$fH>P@X}U13M$%}VH>h`r<*SXn z^4_N=vW~wh-%duJRF-C92?uQq*(ObzoLVTDP3D(LwpAehfd|}v7N`T&Usw1}0Gc4F zvH*-rM>D#fmm%fwz~$)6@psS}w7+Sr3ZdSb`O0K{C;U~a$7YJJ#D9`@h1kT#_jhc} zYi@gx8KKzpUyY&Py9@DsF+W&=et=HyHmE72()zN|rbmA7HqHTb&sG$Dp-qGAP-5VF zi26bs|L|={E8W&n-FBixSL`>FnYzAnfppcHUP8_5sS2H#7?*3!n)jzN=#PHA-FbI@jc`b>>NMU5Pskdj~7KsxaE&n{>vTDPK!`HE;z>_+rUkF-Zr zxFU88_zPW!2UYB6$&en*TRlhN2H-WTjaF`(`x9tn8kzDsI8pIkFV!W3$Oc#@aCL5> zH%=oGGeISso;=!SP|_q8r~r8Tjd(c5E_x<=u`E#ynt&b>D5^lZI)$PwWPza} zH;(l|fKKJoC3kn^;BK>_wuQw6(6;;KY+Hs#_XxdoYptg=+IzrO?UJI(tGVwR0Je61 z(<>$oKp$%rH*y_ml6NMedBs`s&Hqdx0gNkUYcaFxH;})#_-}jnXh_Dh-qS{3)};&) z%?rw>A5Bvhokfn_n&PT3LyCE1a{EzXqm^2=Fbup3A7L2%rO~qUgQ%>B0I@TJI%y^g zmi-)I@~GQzFD(yd#EQ0-H=>Qpa4G7xtuU$!LcM-npiGkcULh^it-p#8eB z?@ny}N%mk|1#33Tb+?Na* z@|nQSCid|;{er`uYDKyW#CLK6;b!^1zaDmF-EJXB^jDqHvvP*IZvR=`OLI)nk@t-a zPMrT*jV=ez^m&F+83w$4NaydHa%pPp5}we$S20x^mrcCApTr7eh11L{>lslOhD&Y$ z<(Qr84Al=FT)<)!)eP6r$WhdD_i^K=(oQW_k&j2mNp-G=TS{pa_;GU@8iOm1Fl6Q| zf9Y<_R@?jGq_ul_wzkt?32MppP;hL{5l_d;s>kV9`lMkCSoWojqvMi$Wtq4|M~mm5 zU0up`aJS58~B16UP>c|TCZ8-ef4DOVEG>rF~Rkg z?$~tt0I`Ii{xzoodHsdSHj6#)=?ttEn_G_~NwgdKmko&R4SBX8%h@dLKfRCIO!(L5 z;(H~=#t;{cD5ZnFzNzt#rwdyD#wG4g^9NSgvHyz3RWxT{bI=g)vmCH$b-OCWg*c(S z3&`?pGzj;w5C9yvIXRuv1?ZIbHz!Les*G8Tlq7j*)H{D|HW4L8Uz0euVRg(zhg z4-vpMyPysq8XP||BqkjYE9tkc1?JSOv>+tOSQGp@3n2{sEml8yn=Km{ zCNYF29nIE!+80#2E5KVf(r&-h;&EZ@5fXA~XCu~q5X zuZROS*>LI9O1G4)T4VXgwrVlsohrX#+Sc|y!y@`t`3rG1_X{wo=65wfCZB?{0MDo@ zoy?QESQO4fc+0-SE~KVVaM`Ol)Pv41zs4;h9_pqbNWdw$yNbDmX`X08YRT^+iFg%& zjF~!SkcP&ObJ9XI1#Sp<+jbA6KNJQt4kKpPs;FjLQ)PY>P@UT^`SV~GGS>YU)bUHS zX^XEL<&KK|+X%bYts#z)kn1j*^jC5>K`4NkLrPId;dS(u2#cTA`uLqKcF6(^tOk~0 z%+5wEB2?D&p2y%A_W}(8tG|JCoD?brSa1}$NY`dgtan_4*?5?m<}$3E9DT{tMM_yB z6?!q#>LQXRyyoEl!#fGZ4i17!xD?8%SP*brJt0{t}%>=~e zyiNYeZ{?Jw*~gbT^h#`u@Sn}3La7<^O=`-5ir--KVha}fO0yYhNk(dw6V{v!p&Z@g}`45I+@eT%gpO?={sh+ka=Q?EHFxM`W;>! zNGDpI2n(2ycW#Y0K`hoSH?CiHH|7iqRm+R|+u@WRq-PJ8iEx=UFKU@(=Tkl1H7;wq zQX9I+s<)3XEiDf;$~g{dU=<3bH0&icbfMgcW`^1n+@xEruQtlr+|JfD0WAuINaOzsig_8mQ^viGGp+dY7;V{9M5BY zpbEXP&#Ms;R2!{0GA!t5=u91(`u-KC4xBx^#MW8C7M6y;LOlpF##rlTVy{Tg@o*F0*i4;P4r1V9*6&F%NauZ(+AS_In{mM?x z_<7DDO)*0&zh!-ywpje`&8s;vRPfYHf>Y$*?$EjTIt_p;*&0UgpmN97)BK zjZJUfVy1p*=&kcH3JJsVv@HqIw3LfsBhe}GRAM>(h+yw1Uu~d`k(+K4T~LDkU|Agb ztTiqQ{0w43CBAP6(jWC%m}fl1bT;~6sx4i+vFI*N(XK_6SDxc5dppXIc+}d{4@4Q) z62CZ+y?^?)NUW+;>q_e#E<@?d>r$8ZeamrX!56Xaa1{KX3}Yys{H{aOJ#h*F@gXYEEhgms(kVKI=7~nliVE(FRtp4(8(@C2hhUY<@U5$GSy=6*jjF(^T)1+$IsT+Yv!B+#9)6B-NK{oc~nw; z9zw8+a~8b9SK!-<-nTZo!T^D0hiNP+4`u7I(W|@8Z>h5!yB$&2{*)f}CySZVc@419 z`tONZ>>+2!|C%-zwE-ZRjWw>M?PyJeI?+gv5y>zEg}yVV>O_9D{Q_&FRXe893Vlrb z^ccI@w7;MFidpC~p4R_JSG*+9%G;#a6Iu}}tnXB;E!wWOyj#t)}_a z(dPC#_2=oLmf4kAp7GKhT$D6ILwmpNH(LGtPpoOW?}qEZ0Zr{VcVDW6?^Yvz*(YF4 z9byf{#6OUYANGO-oEh(nc-RYjtO)2$01izU@(qFwt(E1~U0P<)Zn@0hx2@t9_(v75 z1I*HZUq)c5bqUT@vN_0_YN6~L9$on;j9q?!LJO3y1Z2sEs^joifLlfTr{8wzcP_6)V4- z5UPs=!BR(JCVimh#%$v2qu=!gkukG-q%Hd~U)K*clil)6?hZcQE%7_OTvG2ebFFka z=By6mohKb?zsj*L;n7DJ`^X2GLH_j47gd3sJ@EGh`-BtobysV~ajMxXa33Yq;Joc< zFPQF6HdMk#$XT%mVbqfPZVKDJ2h zWE0j~u3s55PImNM-nbH-PngKOU=%1|f<%+}I|`F`rlUXY?HV#uTl;BA#@6O@jZ+&` z)5mWpS)0<&#MQN4`O3GA*%L2)mYB}4(M(*NqPxmuHJ+HYlN$tg0E1?&3mJh)0QBh# zhKhFPDJH#(R_^ODe_8*J>L4_);Zxhwv9X6cHQNHF@C z@!NO)-J*HBJx!ox zoM@AI$i83@jB?2SqpcYms-_c~X>SZz-{N?*N7KQ#^*7&39Jg9Dfcf< zI|V2bG3T8B?QbM>`Yx45m%Z@1KH{PI3CLX^m9W5$ahUc27%?Xik(y`eAlNqZm>rIi zfbB&*gemus79epvV2=pK|4bJDD}8fo4Uli_yzyt1kRGW6qf#%SYOnvmx-oq8n?KGbw2}c6V!+lvP=0 zH<+OZaIjEqKcF39Nl_!JjR^2+v~i#om2DoGe>dBG% zIAlRUtMe+NCS6pzN)xG=IpaQa`N~J*&m&H3KX@Kb`unT=+dE^E561(jMC_2y+FNjp z=ixCFVe6q&^M%e@ zh9QX@C-_b-abgkmO_x$R&6KL?_^SBxe|7=Qp!OcX{d$qsikbVL7q}h8J)#A(Zq}SO5F*v7?hzNPsvLfUpl9V8?}i>!iIBYoh6r_PivWt(ph`RNyKrQ8 zNE2(zl6~m~BMKee8Bc=r#pMz?7OWYgd@a7~EMg)bPKySep}%FdnBL|P!~y+2#6O(4 ze%Qcv>FE9>oa(O)O`L?WHrueAiLe|B!AQFY^*;(Z!flcR%F{!RA+*B_^2xL~?@Ef&(q~hFyB})%@<7hbA8; z?*%5(@_CS>2-LJu!QAhKy7dg{=Aq8nG|77}by$9MY69^jHMgkZ4E79oI3BAl|Z1D{~wx61$eKWxeaM z$Rr}oqQYb)V=QbzYqZ2f!3R}tDWCSqB0h+FVyF3U64Fw!-A~moRU-llCKfp_uPIIx zihyDwGdc^+pI=I5D`j<;m_I1VL@V3VMjlU4g_n{sk>9)NOvwr%M9tL4*B~0U<)3d6^ELbf&DmcY*_${?MAA9g66<^n&7IDRb`L6w} z(#LswaV>)H6L#Mci>IEh0DT^^Zz0*2luKox%`TFR(=YKkc%8)P0O;S^4#``3M0amN zP)ey+II^(cigZAmfs7cSEANpb zQbMAt(yev%ywG%~u=R|h*3K>;G2Y7E3qj;9q)X}A;}TaWdS|QCxJJT-bK`tIUje-A z3sMEYtTrr~>c|Mit#H0jonm0>`*UJOfrvfu-?g+RuJHm5+AM#tmwCSJdk%AcK=|b# z$wfNkDB)xca_)6(huDcKRXSmUEvl&aLci3mDrFR8sBtJot~o-A0bUn64%C`^(n3+O zg>={OkP4YwTH5G5d-ORJP*NY5)55Gg3V1$U;sr1nm5piLsb~yJ8Xh>+c~YwE z7SO8}@PDe7Y1zR&Dx*rSm;m$}qEcEvSRptJY&0Uz%OIy3S@8Aw&jEfuqao2>7 z-btL+SIi*HngT}SM3C?rG&EaED$WBOjtk3yQYZJ-?zL)OOo z{!dLNrMLi8WThY|P?)#F?;S zG^fmt1CWfCWph(4`$oq|C!kLQsu^@&(|RjMfcPh^jS97zNS4TLvg{}G8~yk=#=ALnL`?0$*drGR#^Na*C7{vP=ga$ ms+`}wu1yxKluQ@*KJsliU++pRSnA*k?x{Fz{nOQYl=th`AQNE# literal 0 HcmV?d00001 diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 new file mode 100644 index 0000000000000000000000000000000000000000..ff565c04c66b068c0a3239c483c93f32814b69ce GIT binary patch literal 4128 zcmV+*5Z~`6oQ@6D!;`F&zBS-iS#gLRFQD@f?DLwvh8?+8sWt!x zZ&u)OJ59fY7ef03nmcMa zzM6yn0+EQXa_}14u|k2t4&`{?v;o2~>b*pHCNpNx5k1YTg^D?UJ5b_EK^^_Iy}iHv zslk(ua5@)A^~vNwv{W5R>{B)n(Cb{gAtXz_gRi!RGo@TcTtp$icKL>sHU16|!PY%3 z$RLDQ=Jk*gxn?sV8py;5z!P)gxeoYjBOLzFSJ@8OmxVYxFG^1gp;hPnOBq;Ubx<3Y z#z0sQwkMSgwWFJ{mdL6#^Me44>!B?gRy`C443@Z9M-scppsvg&Zg{YqDkMu$5%VL$ zZN98}Q^y?T)mvWQVuIt&C&vQuS3*z9gNZUFu%8l{I@RFDMbCcrx_{1XM;{;ETM86pva02JppT6%_MA-42w$mn+!DVkveW!cinWEy73jTB!v zBT=>8x}{{e{&8vMrb8_?>{o|UQ6^QnkL3^P-Y(54yb$SR74DtU+d&*l6qW}TSInoF zFGWP}cJ^csqK{zQEv6cRdkb^>55WubBRW!(pF8y95`vMfpw?624cTe6nXY^0MCoTi z0Lz4=HT!|C?_L=%bme#wWM-8bKLVpq%7uw0-1C<6R{Laj?SKVFgk+xDncFN!LszuX zoDor43*hd~)()+BXq?mnP^|x0o8Wd_Lyg=4zGR_3{s|1q-t*yYW$lj>x>`Vc%`b@qZqe)Arsein92lzQt1=&F- zM()|WEpJx!zBzL zP3F^i<+-*(QIo>bn(BUjHpMHB$OYD;wAP*ITxQV@q3b_!KK;&oS4>++rU z>>Yh)1*P#BGXo^j4vrMb>L;6j$(XfW=RzJgI}0;ZaUhV8{IWQwYIs^jNGanVn~BD5$zFEXdxy)0n;G|aO46P31l3&n?WdP9kaZ<5ZR>6|rFExeU?~x{ zUnm)+fQgs1NH#9EkObFVFQ(+l!>~k0ieF(6$R5$^ZK>_|?qgxW9}jd}NW!qVfM(^eKMDI6XGIOl@`BIw$q{R9^Rl;+h%*tOWyrO#Se$rlM#LGi(OPvxJXJwFyh&sN1>S_f_TWs;vB4at@auiePw@EX!I&&i$C zLH!qV4zSwK1Z+wd46n+KwCF#dL_Ly5hVxBXax?$03z;bZ92soW?UK3OW~Ee({jzCT zIaS}KIm8MwU!^JwdP{|UJAK;W*__kxtglR*`F^hItbm32{vy$z-lpv@7jKzA5n$*_ z3FYfixMX1p0&kur0)}wTv>0tkRO{h+3xjeP&nFE872j(m4{VJBcYj3(;vp8PT1y8J zOaI4OjL8MyQZ=s8r)uw05CE=f`8`};(po#w{Vg>Zc0~6pC$bS&S~n%fnm#%d5P{~a z3OoJ;pOAjhx%H1VNqtMIHx$i24j+7^alOirvsi!fq=Kd;3z5bOmj7J(2M!L~N7M)x zvj6>g^?7e*1uA##wq2TF6zmh(B=jl`YhAmGM9j}4JprH|aZ0CT0-XsWo@LWFB!P*_ zNEXID1S%pH4wz}<9%pqtVm^gwzQ(56mtBpfTf;val;GQYU!}!;svCF}wyojQhC~&- zcE}0#MIixhk~jTETmAl4&C_uL(&;nel8M)6%i!>}C?#uaoWloW6 zZlK+bo+mkRvawRZLO$YQ`*c~s8Zx}bYFhm0JJ+D)loMxj@I^P!o*{YP@W{c7{~xz{ z*xKeS%K!T5!soBMi0*LKK0btMciYdn9H|YQuDKwF2Z)1=?%?4ke6d3ulo3w#v^7hu zG(erqi^E287`qr`fQ1pn1E_+VF`f2yT>?Xv}t5z1$hP?7$Rl!t+`Q z0ZKxN!%YbMTvA6<$5c5NcEJxPm>l80N#$B8zQKpyVgbK_&X$m>7=xcJnA8Lg&){xM zydm*m;5ac&q|f`g2PgY_?2)3n&f_ptag}7)yFy+biC4gpILd3Y$V#Eor;qKW= z0Oh2o?(jr3rRX=5Oh2AG-g(W0P5NR3ZS?+5uz26p!5!K4UtSrNS;O-={n?C4FiWl9| zP8JJ1JY4&!Ow?6%k$&Jp)dgnMJqqrL; zJh3%bTUS4}LphWiR9nJ+7O663k*UEK=~5rj;W3w<4Z^>@srfKN1OO7z5 zZ|bEjHuvZ@J_PRp57Jv?wM;cNCoPx0DIEgG{B(=b=4R( z#5xA{pFzg4A@@^2^0r2PWn|z)RYrs;A_Q*b7_Z&(M`_yOBaxCg47^?ampzQm3iN!N z$(MtJRXa=i@Zd-TlOwY3g)=(Et!ybnkc2JK;A!mu&plmWCRJBI!12OcWRa1nqFPr` zKhypcn`HO?hknj*ZJhZSlD_p}!4^OXx>MBEo{h^JL(O#Eb~6BL_JdLb5)~%nfZyLz zBv|0d>qWoh%R3$plWjaQ-+Q$@E~8*8suNK%F*#LuUYWtyaZe;*xKq=xxfA+NQ5X#S zyp`DAIdU(u=OQgDt?kO&_u80=II#Z z!L~HbLg-7QT|s>DtWRc8jJt4&a&4Su7#SJ=$uL@my4V^CMZ4KU;ae&5;}aKO=Mm>5 zCC9+KhdEQ@J+ArT;8a;kGQ;LJ92CS)9HP*8dR6&yg^B2waN%Y<;=$3I6nAv#)|YIH}9^HFXsY)C!CX5u_Lvm@!hDFH}}{uA_8Wy|!K$jXop|SM~k2S?#Oz^ zg0dfIG09o#(V>SE(uFfxtb14X*k)9u;$Pv0p>vu=BlV!?Sta}SX1LpS@%dJ$yL{?UIHK^MVZ;1-0siI&a1GdU)^-=N(y-_E*z;8Md z^1ur1H;G8Rd~&rXfszc|=MO>#CrCU{1czVgOaR9szCBUp+dLfOj~#fi^Y_1(t68~4 zIhYzer#i2CSqaZ|VPzZJtTtXvPRMK+C?7?HpE0onUQd@TAHao%+GVdUWb>Y;FwK{a zjylCxpL#u>_K@`zqW6N1dJ?SXAK4;gK=UncvS&@9Ye%au>XyGQO$%|~s(MAQ!a}#{ ek8In@;nouRs!s`Y+*)=(BTvNnY0_PlctaHRKod>? literal 0 HcmV?d00001 diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 new file mode 100644 index 0000000000000000000000000000000000000000..fbfb0682ee5866c7c077d19ed4e75f1de6de8864 GIT binary patch literal 4864 zcmV+b6aVZL`qPqs`0#MW!MyH5rQBgeZ7Q_Qgp2kULqCYNJUNvkw0kZ=wm?SA3l~ZK zO2?s(muSyYq=(BmpHpL$CN$m_ITeJ|$^Mmm~JY&@kul*Zwx z<{iu>nP+uSEHsks^+14<68H-#5 z1Dk*ko*{=NRr!T*Km4Ag9d|N6D61$$I_4<1pgOOR03Ryq{3eK9p-0dc9A1p~DbGA*b{d z!%GB#cacqx?CK5#n~Sxe{Ux==`+==B4Rj2AvBw~}pbxOivr{$W1zCcpsc8K;!V^aN zS>C3mYG=@J;gkyhaiy`H7ikY0o{Ca$lf$Xxr9pCj2^R-0w>Xn#iqN0bZhqP?=$|Iw z3KL=~>LY}-4=(Q{{4+J&dayG)n5wgwo@-UYCY{+=hIzj?_U&?l{F_YIs9uH>sfr5L zX&;}I75#W1p~s73><8RtfYD-E+#(!qWngEX0r9Qkn3;^X=PKP16g=%gkQPu``unNo zVW*~4rsue_4K=xAg(K45Su~^Z-Vg=86k35q$Ul%djQWibXA1GWuV-J_>f=vUFQA#4 zBy{7~OoYr-uF+#eakkH8`|yc>ePUNdqrSoCB{r!PDy!@3rSqmXlV)-7`Us2f zaFl_VA#3)>Qw;*7*LBFjzlfYcVUx>zH(vAXoTwsverzM%p7Q+yb;JX--~MLUZ@m@4 zj1d1R%?#dZ$AjF*9h-NRa*ecAw%W70eru!C85MkU5|vSbTT`hHw4$S;b^}X91y`6~ z;@C()*}V@9_iQhJVjsk#67TynDXiFVQZIha&X#ckD4;)ZGJrvjsnoHZ!!OP?^a5vW z-L=D&R-oyn)vZ!bmS~u{3{n^9-oQn;^QtBMqncadZSeZ7xOrs-7TuirCR&`T6RpBv z|8vXAr3Is;+`M7yQIE_3BVLQ?2;XH_&p+^Kn}Q#AZTL{+$&$0Cd&E)V95rb-TbNgl zcR`WU$8hU{;DoJ(Mei)Qr=y{`MnM^>fIow$w4}u0b}ND<$hd7zUK?oSBU%OSLLiqC zq_y!s+B_8yNXJ+yk;|pz4JN{Q4CAfOBYc@_*Wp*m6fiTfeVp<10_;GC2EVZ?B3^s3 zP2nVp1P((z;bOvpHYKUX!~GaEqaz#283z-$DgmhdKIefplOyEnH_ip*)^j44jeYIR z!pciD?HA2=#1u41T*%^|pqxR}Qjq@|PYQF$I1jMeHQ`oMao|1)F-j2@J~sywwSwYN zYy+>B4RmlKD;lD%5Lc=HZY!k*mMtFGvW&;Qz#EC6bekyoJwNU@1Pobl8bfd=M9J>*>TOgX6W+g()18IrB&TV(XPWu1m$KHxKaiHgY@O`UY7ec18aAO( z*+o*`pyVhn8x!%W!%dL`Y5iQ|`>Np_(wfUJyeeKIMQ?+xw~**H_Ls!&qypzy0!ZdP zC>jqiJ8%70&nS~AO`oHlu^XbZ)w(U8BX&$aCEu`*w7wMSfLZiwBH-fT7kZCuR{Jr# zDt=)dpOzq(N%`HS7jyQME;+D|@1fhikSaJ$$wXT7F8Y1qROmq`s;9G3iaoG!&5BVkGabiFY(TVJc6nzF5&Kur^W)luxV&n9dr5m z3x;JWo>Jz|lX7T2YoP*g#@@V939=+nIQS3BO(Rx84qh~c5rIV}CviZcF@QtY85Zc7 zczDEGHHfKl&Qm7zn6n|PvT-%0IJY2>OG%-EX311ca$iTRLzbF3!@xFF6jY44FB?Sh z=k8xf1dJ9sZ<#OzEDSVu`_$3=@IEO}lXBi*_*yHLs+6-|R{;8%ymLCqGNef8k5rZ3eV>?^EE75)cN({A+~s z30^HanFp|ijg;kcjh<9MHP2`&?ckp=_JenXav@O3uC)RR+89-K1~I)|uQnQ2f`^7V zc;=hA)VwT3c5%L5ITpVXjikUT$Hx-aWZ!Wpm&>mX%_|RkaA|vHoehgW`8gnPoJ;gd zI4D>3%XAYh6#(y#j86S~saE9~ur~V!h@z7>{jiNDBUYxtyGET(;bjEBmI_WlSijr5 zwZJFp#%%HMaG*iVFI1{Kv1ns&6`5Z8=qVi|Z`au03Kt$`B9pO3asNYLv_23z`%BR< zIM_l9c&%%6&y&P~0yCVm@8cT_GLyxc**1A8^bw#jS|NG3pTZ!Q+SwL4VAn>g_|HuTuf~)L2s59dP#IK@`SG-m2UvqeC_+j#kP2a{aM+vA1BU5&yL;` zNX$r@)nLsi@a32q5$E@izOj2ta9P!kJ6T9j33_S}OCF2s8=5ig<)%SEzs7OYhdDPc18baq-^f5zGS9T(ZcQv)dw}6)%9-yBgO}XA2#fT`c0IhX+F*a ze1!+NYJdj4A4r6{uzI#vv8H)Nq?c%%TkmdsO7=9vGDZ|e25Bz&BYtnB?410DDmckP zMajT!Z&_PVcKADl6u!upYsk&WYeEsliG(RI@y7(YFQ4_~=5Jv#b1TG4tiTq#2X<&r zQ5PfRghXXDTYw?bw9xwcGoEt!51b6wY>(v?hWL=i0Y!_>GMrzLES3gmTSE7DQE{Uz zQX~_8EZAnlCrtzk*cBS3)0%snWb|~#$U2UsHP;=F${e0e@^hBxuDoqY=;RqKkon)6 z2_eY*2o~37cr`UZ;Tp~_ZM1y-RqUz5+M@Y9Ay=3&Pthk|5O@NZrvV13=M=Gbg0|{MxA9{=hc7D#rMg~aeoO~9IV4cpB zs&0LN>jX%UQ}bgJ>>QWjsp>-cfMGI-v^c*00M`8uPJ;M++=!p;P#!-o_a&ku?kLy7 zqp#?@6L#QE`uyw0XKe|jr9LS4=m0n_Ac~J=R&RXw8|U9om2m1b8Dk)(=$VD~k<90F z{k`$2K?@abpvl!^*rwMYg<7-B44Cb_GFcsObnc>l=E)jHEcPKAmYCpu0hhw<1k+@P)88$@lb*waQ@c$Ub$qI$EJ<(pNf1o5aoi8a z@T99M$G$fFko2!8V}K3|dUGUo?(n%_4m=AUIDt*v4blJhU`dNOnAg3R;Yf#IKIZm= z_ixSc#aPv?O*?QKW+dOCg=0ZLbB>a)p{i0n4o1tJvkDf2`H=%ARPpBOeP|*y*`aL< z70KJ=5+(xzgB-qH$3B=fMi4+JpZmp<>_Yr(H*LI@fe?X^v;&d$`fdqjq|i~|Z_f?Y&dD2t$@QH4l#7q)T|xN```8+so(I_f%MR~@Fn6{3b}xiKF?DBUfrhIT?idk}0GI21SB8)b>sAGNlH zR^8@=R_%+I0(6%-&gPsk$$T5a(d~U?A z+ML*gG2+@M(tr+NR6ep!oWN@RpqHO`$Qz|>wP8<#NaT1f!63pU~B&9m^es7-t9RB)dAA%*g)MsR8@YY#f2&+Pg~+dvzzWV6N8PY z{LRRUU&Sh#`XdPTlqO2*&*?~dHs)^kk?@3=ZL?b(wX8HaT-dj8r{{X;eLKaB#rKI* zLvi4m@)Si0^)zoE9JxqVG}}zJLJ=Rdnme z8|8ELO4=($DwW7ph`lg^3F?<{tn39nULAj9#57`>C2ZWXDuS+DxWF0Ul8{x^Mz-4d z3}(0MjVJKy-{5k-hW>yYc;J;9eE23mU|-ldKSBfrER7karY*T@i6~Xdr*3K2{PeuP zQokx|ME=a7&*fiK{ZF2qhs_6@HX#sQU5GJw(mjj zJY`xI*RhV_4bKkK6%@D}DT6y#dW?W}ugj@Njali}jjyjz7RW8|zcy4>396ag_BZ4) zIb08LE09 zQ#h<%JG(#>$$4jj(D*4KDh655oYs00Low|Z(O}HJoP-n(85d^3LJCFm(gx~B=+6DE zy2-T93ir)rnGM1t3}6a=!y=(3+&e;ElxC5(q{KHM^;hx3XCEeb->W1&bp^0gZTwfv zYt3qiFsztjm-u5N)LO?ruI3U3z9t2Pw(L-~>T<#0H939nh&PNo;!a&F6k)l&p3TJB zy1jgibBIl=I89{{<3iBX#HsyG#rtGsgec~8#Xm2?-|ywHM&iTwIF%2S1O|aJRZ7ij zjledx?abqaTFozpoMVC^s%ps&QvbuH9-&}jGneDgK-9^SqKLfD0YKs;UGQh!>mYnX zms!FeL%GaLD9I>ycgS8n+0;H9sVVC`wf}m*L?*gJA0EARybKNav5CNffrddLx}6j~ zRr=4Jtu38j_?iu1YBVQy8h6xFPQehaK8+mUe^9?Nvb(VRF13h}c* zhuD2Egb$Q*R8RAG<_#rmR7P;8(Q6t}$uoy8ysJ2H#5mVU;^`1s32*UgDEeB!VThnJ z)0_acd8tTn89h7o?g7kC+?vVCb)X_B6>~&i$+#5WRus(4(`?DjHz;vh;N--9b72ri z5W^ Date: Sat, 20 Sep 2025 16:15:28 +0300 Subject: [PATCH 14/19] doc: added README for how-to with the new contexts, removed SetGate public method --- bindings/go/README.md | 149 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) diff --git a/bindings/go/README.md b/bindings/go/README.md index 9d832096512..c796b77f0c7 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -7,8 +7,12 @@ This package provides Go bindings for whisper.cpp. They have been tested on: * Fedora Linux on x86_64 The "low level" bindings are in the `bindings/go` directory and there is a more -Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage -is as follows: +Go-style package in the `bindings/go/pkg/whisper` directory. + +Legacy stateless example (single worker). For the recommended stateful API and +concurrency-safe usage, see "New high-level API" below. Note: `Model.NewContext()` +returns a stateless context for backward compatibility and is not safe for parallel +`Process` calls (may return `ErrStatelessBusy`). ```go import ( @@ -100,6 +104,147 @@ Getting help: * Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312) +## New high-level API (stateful and stateless contexts) + +The `pkg/whisper` package now exposes two context kinds: + +- StatefulContext: recommended for concurrency. Each context owns its own whisper_state. +- StatelessContext: shares the model context. Simpler, but not suitable for parallel `Process` calls. + +### Quick start: stateful context (recommended) + +```go +package main + +import ( + "fmt" + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" +) + +func main() { + // Load model + model, err := whisper.NewModelContext("./models/ggml-small.en.bin") + if err != nil { + panic(err) + } + defer model.Close() + + // Configure parameters (optional: provide a config func) + params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) { + p.SetThreads(4) + p.SetLanguage("en") // or "auto" + p.SetTranslate(false) + }) + if err != nil { + panic(err) + } + + // Create stateful context (safe for running in parallel goroutines) + ctx, err := whisper.NewStatefulContext(model, params) + if err != nil { + panic(err) + } + defer ctx.Close() + + // Your 16-bit mono PCM at 16kHz as float32 samples + var samples []float32 + + // Process. Callbacks are optional. + if err := ctx.Process(samples, nil, nil, nil); err != nil { + panic(err) + } + + // Read segments + for { + seg, err := ctx.NextSegment() + if err != nil { + break + } + fmt.Printf("[%v -> %v] %s\n", seg.Start, seg.End, seg.Text) + } +} +``` + +### Quick start: stateless context (single worker) + +```go +// Load model as above +model, _ := whisper.NewModelContext("./models/ggml-small.en.bin") +defer model.Close() + +params, _ := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, nil) +ctx, _ := whisper.NewStatelessContext(model, params) +defer ctx.Close() + +if err := ctx.Process(samples, nil, nil, nil); err != nil { panic(err) } +for { + seg, err := ctx.NextSegment() + if err != nil { break } + fmt.Println(seg.Text) +} +``` + +### Deprecations and migration notes + +- The `Context` interface setters are deprecated (SetThreads, SetLanguage, etc.). Use `Parameters` via `NewParameters` and pass it when creating a context. +- `Model.NewContext()` remains for backward compatibility and returns a stateless context by default. Prefer `NewStatefulContext` for concurrency. +- Stateless contexts share the model context. A concurrency gate prevents overlapping `Process` calls and will return `ErrStatelessBusy` if another `Process` is in flight. +- For parallel processing, create one `StatefulContext` per goroutine. + +## Benchmarks + +Benchmarks live in `pkg/whisper` and compare CPU vs GPU, stateful vs stateless, threads, and callback modes. + +### Prerequisites + +- Model: `models/ggml-small.en.bin` (or your choice). +- Sample: `samples/jfk.wav`. +- Build the C libs once (also downloads a model for examples): + +```bash +cd bindings/go +make examples +# optionally: ./build/go-model-download -out models +``` + +### Run CPU benchmarks + +```bash +cd bindings/go/pkg/whisper +go test -bench=ContextProcessCPU -benchmem -run=^$ . +go test -bench=ContextProcessBigCPU -benchmem -run=^$ . +# or run all +go test -bench=. -benchmem -run=^$ . +``` + +### Run GPU benchmarks + +GPU runs toggle `UseGPU` in model params. Ensure your build has GPU backends enabled: + +- CUDA: build the C libs with CUDA + +```bash +cd bindings/go +GGML_CUDA=1 make whisper +``` + +- macOS Metal: supported by default in the build flags on Darwin. + +Then run: + +```bash +cd bindings/go/pkg/whisper +go test -bench=ContextProcessGPU -benchmem -run=^$ . +go test -bench=ContextProcessBigGPU -benchmem -run=^$ . +``` + +### What the benchmarks measure + +- Variants: device (cpu/gpu) x context kind (stateless/stateful) x threads {1,2,4, NumCPU} x callback mode (NoCallback, WithSegmentCallback). +- Standard Go benchmark outputs: ns/op, B/op, allocs/op. We also set bytes per op to sample bytes. +- Custom metric `ms_process`: wall time per `Process` iteration, reported via `b.ReportMetric`. +- When `printTimings` is enabled, model-level timings are printed for NoCallback runs using `model.PrintTimings()`. + ## License The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details. From feaba63935b798a034d922a99d7ba5c39f8a814b Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 20 Sep 2025 16:20:06 +0300 Subject: [PATCH 15/19] chore: remove pkg/mod --- .../download/golang.org/x/tools/gopls/@v/list | 1 - .../golang.org/x/tools/gopls/@v/v0.20.0.info | 1 - .../golang.org/x/tools/gopls/@v/v0.20.0.mod | 32 ------------------ .../lookup/golang.org/x/tools/gopls@v0.20.0 | 9 ----- .../sumdb/sum.golang.org/tile/8/0/x161/441 | Bin 8192 -> 0 bytes .../sum.golang.org/tile/8/0/x170/113.p/24 | Bin 768 -> 0 bytes .../sumdb/sum.golang.org/tile/8/1/630 | Bin 8192 -> 0 bytes .../sumdb/sum.golang.org/tile/8/1/664.p/129 | Bin 4128 -> 0 bytes .../sumdb/sum.golang.org/tile/8/2/002.p/152 | Bin 4864 -> 0 bytes .../sumdb/sum.golang.org/tile/8/3/000.p/2 | 3 -- 10 files changed, 46 deletions(-) delete mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list delete mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info delete mode 100644 pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 delete mode 100644 pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/3/000.p/2 diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list deleted file mode 100644 index 1847373e96d..00000000000 --- a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/list +++ /dev/null @@ -1 +0,0 @@ -v0.20.0 diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info deleted file mode 100644 index 08057762fab..00000000000 --- a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.info +++ /dev/null @@ -1 +0,0 @@ -{"Version":"v0.20.0","Time":"2025-07-28T18:28:48Z","Origin":{"VCS":"git","URL":"https://go.googlesource.com/tools","Subdir":"gopls","Hash":"2e31135b736b96cd609904370c71563ce5447826","Ref":"refs/tags/gopls/v0.20.0"}} \ No newline at end of file diff --git a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod b/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod deleted file mode 100644 index 47caff989be..00000000000 --- a/pkg/mod/cache/download/golang.org/x/tools/gopls/@v/v0.20.0.mod +++ /dev/null @@ -1,32 +0,0 @@ -module golang.org/x/tools/gopls - -go 1.24.2 - -require ( - github.com/fatih/gomodifytags v1.17.1-0.20250423142747-f3939df9aa3c - github.com/fsnotify/fsnotify v1.9.0 - github.com/google/go-cmp v0.7.0 - github.com/jba/templatecheck v0.7.1 - golang.org/x/mod v0.26.0 - golang.org/x/sync v0.16.0 - golang.org/x/telemetry v0.0.0-20250710130107-8d8967aff50b - golang.org/x/text v0.27.0 - golang.org/x/tools v0.35.1-0.20250728180453-01a3475a31bc - golang.org/x/vuln v1.1.4 - gopkg.in/yaml.v3 v3.0.1 - honnef.co/go/tools v0.7.0-0.dev.0.20250523013057-bbc2f4dd71ea - mvdan.cc/gofumpt v0.8.0 - mvdan.cc/xurls/v2 v2.6.0 -) - -require ( - github.com/BurntSushi/toml v1.5.0 // indirect - github.com/fatih/camelcase v1.0.0 // indirect - github.com/fatih/structtag v1.2.0 // indirect - github.com/google/safehtml v0.1.0 // indirect - golang.org/x/exp/typeparams v0.0.0-20250620022241-b7579e27df2b // indirect - golang.org/x/sys v0.34.0 // indirect - golang.org/x/tools/go/expect v0.1.1-deprecated // indirect - golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated // indirect - gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect -) diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 b/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 deleted file mode 100644 index fd63ef44621..00000000000 --- a/pkg/mod/cache/download/sumdb/sum.golang.org/lookup/golang.org/x/tools/gopls@v0.20.0 +++ /dev/null @@ -1,9 +0,0 @@ -41328958 -golang.org/x/tools/gopls v0.20.0 h1:fxOYZXKl6IsOTKIh6IgjDbIDHlr5btOtOUkrGOgFDB4= -golang.org/x/tools/gopls v0.20.0/go.mod h1:vxYUZ8l4swjbvTQJJONmVfbHsd1ovixCwB7sodBbTYI= - -go.sum database tree -43548952 -nX6jrsdthQ8kDPrwxKP2h/3CAC+o/Tzl00DK+QUiDxE= - -— sum.golang.org Az3grtVCRqi+V2+TLDpRvXhgZDzixz81eDxCTse8HVQFKkxvm3+CBHWwrkincl2+LzuJetgKkMzjLg5M1SI/XmJT7AQ= diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x161/441 deleted file mode 100644 index 20eff0ee6d9284c82c0e98d3804f9791854c6d3b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmV+bAphSdj%vR;)I5Uxx2s1R{E`}CgGoWpa$z1`_VHr5Xo*bqDD!60P{&nKyEaOu}g9`1-<@zKxWDndZ?G6<8GoIJOl_BDJ zTw!bs>AeP8*Y?43ZPk*$@=g_E$jHzPp2-L?wGq0Uw}A>Ml2eoF1AZr(#KT**d)Gif zn&U}6$UP_32NE`cNbzhYJHg#|x>ql(bq1*B3Tf|E*ZgwXHsbrGcvSiB%Xy4Ze5Xa- zwLc;-1LxZcMrNis++#OA@f(jD>8&^F&)wI&s zxetlaT&wmppG;EA zH;?U7MA~=M#J0bH!Z_Z`tp0i>SK^!<-j0{($J;5q_3&FkZQNZ*-WHHJosP9hry!EI z?dQl5n(5?WC&gwaS4XTm{-1mgfkOn2#Sq(T9LRJhZWDhl{57olH=e@!y)PvQfZD4K z+uq5O&myZvv?jhRpH4@)IJNgwI`GeW(MlGp%>f6t7-9FRM=_1&R$M&%kHEZv&-Wa; zCEjAC&i3k4h^G>8K5zVI0-l5b3}c5B$sJgA1FGyIS;sV&p#%oh@X&}M;Q^3ZAiqa_ z`j_8nz)G0_1To@cf5c^P6PAn=ards`hT<==-PjCckh+!nw@Bu6!q}0Z`n(84?I8Gw z*1Lk&g5#el6b7#cC4a&Z%8k$&*F}g#?yJ72Y$KFB-B?omz>>3A?nUDy@UhSHB$!xIG07YXOE^%wbF_$xIK^_6~P{t?_>_-du`nSFTlYoCcu>0 zFS(2Kbj$I7Q=Y8Y zxwd?Wb!R1h6In2VQfsif%k1#&e!8FBMZw>pe*uB9Rm!1p>4y2r0MlND2LO1p-6MQ0v=KE0q4AYlccTkN%i&7(+Z+CM!& z`Y9-c_DX?h4M1BbyV2xBcigf)c4r*{Jl8Ekn_HUq&9-5}C#`3E?7Ux2>p2N+Uv9o> zXveFmuAoudMk;U7UEIj;&PbkSk&A^P>mTiVtQ>x^=Z2oX@NKR|=pZIRt(4%8mpI8l zK&`jIK(Xs7%W#2$+(!)j82L!CrKqZy#AiXK)x`YR2>B7Tqc=?QcHRu!MkmKYd9;NQ z(+jw_!;53Ghjc-G|ylac` zsUyJE*GdER%`R%hCI)PsvaVY-_jOZA!jz#L=%F6e%8N^CEeD4;bzmly)=wpLTD(>% zd!qaUr(>rQx`nAx7k+TTQNi0rSDD{Q7P47p^Vm4}D=lj*v*6dxYTqX&_`UW=;RezhLp-!-RJCbILzGQ4 zRJ(S$+r}96Og09k^tSXIsxeJW5*y?i=kkh5;qF<==HuGd-_oQCm7cQ7HT=`y0rc^_ydm_7ni-8y&c zBGS32A?{>x0^O{g6Ru4NiJXl7-x8LPJ4gkOwZc5@EEW>!Qt+J2>hWeu>t*tmM1;>j z@vYw)XUY-q4Ov;5dO7I1H3b5H#qKgi$!9k>7SQ-@(Pt&SA#-(2d5TAyBN2i|z;I^{ zsa}x9q2g9mnqSnK*v}jbKL(;+T5gG^Znjjl;Ub7-OjI zD2wCesjV2H>IytL9^v|6a9a-6gpMC`HnVT{VtlrPB8ULo#Fm<>(57MvxJ%T;@3V00 zCkH~iCm!+~e2Cz!q!aAt?dWFKlsF-thUlOMUBMapr6n$@_`yR7KqZV|%bwy#>gT8p5>YDU;}MpZlxp%?Q1(<4+CV^3m96^d%H79WDeZblh?%1j z*IV`!!S!^dr=!@>bN0OC?Je0n8Py3OAlhf}S_Qew!C^)-{HZ( z`04mSeO0R;mdj1%0zJcCH~sh9`-H@?yz}v|XXZ@UyY1REFtg=Qhwxj!^V9^lEK6D; zfU611fYK~p8@qiSUBDB(D*v5aL#%1*M;4U!_1?LPWcI$_%``QQxtdubD)SL4swqDu z091~YT>I0v$FxtuK^%p#!OPI6N+BnlA~K?iDkDU!>QO?pt`A6GGYp7`=|)l)9x<4E zjUeQB8$#Lknr=om;C!SMMDw(&7+nUZw4dS{+%&+U<0htUkHS0{Yutw*29Qn)v;BaN zh-QJ%;dd?E zSRUA6f=nQuv%Qew*{H98Cx)y{3-0h&Imso{2!877r?FubQA)vtDn1FLfM$Z|8ES^l ze0ot^6}e1?v^e}*eMS$R7VQP1)@O?=8g&+Y0z)v5T7Xx=C0!XqevV@ncx?ggZQ$ zV~YU1)?%qvbAA&xqdSOH=XlA11LGRLY$-Y5u`>Xqscd%AbXE161f65k!L~u8RoSDX z^tuw3B1z>3NqsSQn<||=sdMOEE%5$sA@XnXJbe2TtBhw==Z1kvzs-(bsP2d^08Fo7 ztWqQk`oP+GwR2M52;R1Cl6f%ny3Z0Xa<`beTQ`Danr@Imn%trdj7Nj)5pve5SN@?V zNw}wP%;wsMP{+jqK;?brKI$J>U`Dt=Xx-{yi@}`Acmz%G}-9Q8I2~3Nf99fTd z6e@T9uFtH&Y|!3wSJ_h^2xwC}{arGuh|$O8lG_Oob1Zw+tOc4(VKt*lLB9}gZH?MA zZGfjPfe6h*6N_aw1P);b|A_*vKu}`PssQ}V@vT(r>wK>sZh8k~ZKNPhZ`}D3`Nz8* zkQ@mxh799^`IkIOIK-=8E#8G~KeY^B7J{5>iGlmMX<`bTlUfK@=$ie8FkB>K-`I0QS!jI9D>j8WxOZH`Fnyc7T8~pF1LzC znSl#XHOXHDk9-z&%ps1o)c*^{VEpeO=~6A_Xl#8zRV^?cTd2{a21Ixmq2+qcN0!|~G3gzNuvnk1036TC} z;CWlMY*zE3HDb4mIBnv_=J*rUQt^SQ(h?#!RX%0PBqR$S@NyA4tdEl~q>Rt6@m zLYuvgIlkphJ&JqpFF+d+=+F-iZNUTjbS$r_Bp0%I>9X4@4zMYo!AcG%;-jGN6XQ_r z&2JX`jEVerU%~>>+;Fkax(*=`2E*GIw)shqNc(V1)^Q9B?i+fpqRgZV(?#MXHA1@@ zy(R=t#+6`>6s<@xTT(YKqb?A+O&{ocsC8!d+H$nwLC3-AYzMLnv+QvAd`rQ-w3)N# zZsy%89^s$LgA-(7YZ7IEHe_a=uRm+LnrN`G$3fI&|NeqAX%iDUd716TLo7sXwFZ@r zc*Q?8e;(Wli^)jh;n=by(}7zYnX*G6w&s;bd!`0eH(JXw;X{8{d;)iU=a>>i3^TLz z_uQ_UWw-qPH2ZmYuTL7B z?RjT6Jr)7r;Gt}uU(jy!Yp8`?{aDDMXuJA>DrglmY|xkoCbN^c;*uD`6WA*y&Ug(x zkxT1Q5M|LcCCO2K7vfi>(}cX$|ENA|t76!Sypd@ZES_ON)+M7x$6}cAlXA8N;+tQb z)j2AbK;Dx9tE}QAaUc@XvYRa~WyeMYo;-PvcK%HamqK}(4$=I#2WLZ0-IO1f6@h%y+8vHE>-lV2RBl}p12E*cq-+NE+uNhuDVQ0EMPmToiH;U?qB(3#udh-R z@t^A=y1@4o(cM@AcA-oNIR|^RITl$QWpH}@U`uY(jN35?rd3# zYTisO_FIs2*;juS^lAnLKZX~lhG-G1QGK_3d^QT)>J0u_Wz!*pekxNRGO0(*;nOn@ zgArAK3G5BkF*NH&mUqjB_)7h+Yx)jA5X%Ag#Vd z;>38Cw5h71h?+Ct(wudP)D60zH{~~a!PI2bHho=;=hbsJo~+dpE`z))Jxl9c8 zV1uWHqAmQPgO$#GQHaJRa~G8V4^kBak72WWoZ7LVzd(WCbY2ffM!{gQtIp}RRn(VK zex5elCOxFeqE+xN3wPaz>KHe3uE#HY4w?EYv}VKpB&x-P;+BopJSxG4y{<}YtUT75 zK~Z_th-=E-ZvSWFa72ed(p5&~Ta|=a@w%I2jdFn*ISRbpja&@>(vp~k+V=-AnKnN2 zoKWJLGT`#N1CV$k1dI%~;*M*(hE*7b`e6+rd}ZR}Z&+A8pny*-b>R{I|5NJZKqcU1 zBJ;c=dB>o1N~71~L6n{$Vi&f7PmoXPV;yRrp>n4g3B46^KONtSI(znZ0BjxQjTO&`!^5URF!{)ed3t?ni{>eA`MiL}-U~f8mPZzt*HbI6 zJYrjIi6K2DVj*Aylh7;3p%~QcO3{&yO=oM*ZJ7zdo#GDmSFo?wH-Q}6GSAVe`*cg+ z3KW9p*D|RCQp~JEmwsCJ6UCOSU0t5uibHgcGbjv%_|Xd*1>yO-f~o1eh--)=M7x(dTjJn!FEc%yY(#r@CcB(@JAKW?z8zi+&Yu?0{6YzBkPh5m{yBW+9TYlBejaLEQ~u&Uxv)~7GxG8G$mjbR z3$%$RegLU1v&~)oE33Ah{yzpyEd(Dre@u?q5So_;LBf6SE)A!V-f|!WMeane{?jBP zO#xh(FvPVOkMc3il%=ju90+vnk)voDvL#wE$ zcM0ZZgm{o};I}Iqhu&q7*>fL2#aJy+)m}V4Dajj5?jO(S5baQuk+}P%Y+oJY+Zdro z>4WMnL29}TE1{=OAFBsXFY@GaGkH~)Jj0|gB9gu8ik^pzUVZXf0W>P4YX@v2=ktOH zGTq*YJ_SK;VA&!8ow4y&C*9*^Yv+ytfh(c^7&Wnpj%QamYDtns(0oX6LL~7G78Kiv zhr4fQ%+I{%vK1m`MMOi3vcbMxMj)REm$2?Gq)PW>51{bQ=YG8YRTD31#-a81KF=P6 zE>jQqdrfKi?0f=IwaMWnazb!4f2 zepx9IMIY|)H_;bTLv!D~05HMFv1ks#3VErPvL0PaCm*b=l^t@1;`J~Pe^km}dgBBN zkdd7iB0df6;nu^bKPQ2U3q!&ABOYNLIgF4kbQzom5tw?`F;GBvBjS;f*jhM)fm4B1}_((bsw zJ82Lt*r11mHMI2Or|kdp$4M-krAZuc;J>MlJthY*BV=zvDHZflWI1ToP{QVU7>kr` za+?a8^=rCM2ElbK{&UC{(zSpwI&G7>)|ORquHad#X}n|bv;g&$3)-`K&V&n4jZ*@# zHIc<3iGT=%*+9obe>GM6^cqOj)%W~~(NLZ1adGWx`v>NilpH8`=CofU7y4RC@o>s? zkKZMxKLvnIw8w@v<~hUS5ghOZZdM4xLdJap{Ygk}R4@Seq_BXjgrFpV#nJkDwe>@W z81p3#EJ)%#>#gsL$I&LfQn32PMizzz3Xf+YY>y1vJVs8;e-p{MtN71bH1MnFEGm;NyuI$g!E0@*R zI7v2^2g7PogKysubzEZITr;O*!NIvE;k}WUoIBJEJgzJh_F0=11^Zoxs4P>FO7#cTJ*A0Y0@llJN>dXQ{$i$g8YINps~ zbWOXEkhmHnm2rkJe~bgmttu)f%(`67+pjES{G}vAuZ`+q&iZqr`yh$Vno!5gGD_($XmNCaw~zeNEwAAiSR0S}#E-@~e|mHVAguvoL!@HSa!p7lF}vu58ZbwowX#E9fhyK&k#UgP&eUT? zfOG_bLbmZSEn^YZkuK^1HCs#c%7mXbam(LZ0!D6)35Ia%lnoY`y>4mDn0g-fHtp*G4VBV)MUoAaG=Dga(JL_IQmgR3@R+RpEn()TKH(&nU0IlV{D6*CMteO1u^9weFY&$4%gM>CRSAdR^0bm_awd zk2jcB6KN+k2viO@xtBWTrIy7qbz4I-sfuhGqQDSD*y*&rSF$EU*VgVN@jKb{KoFlL zLcTP!H(T8lD?}N0y@!yMio?w@Gq)Jd@f_5ebMrE-dj7;KST}U z2%Us`#^Nkxg{BfGrbC8$K`ibPYA5+FxfJ?IhhQlh)N_0j6#dEb>-hMZE7P9wGP`A# m+j=@WHfaAa+(%TMj9~+C#cRBelVBd-{u}XZ;c+bkBQx6e!_Phd diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/0/x170/113.p/24 deleted file mode 100644 index 01d9b6eed1150f85bb8aec986eb6f27e09f803b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 768 zcmV+b1ONPTHX67i26i+QwVq9z#G{ZL&Y=NJsJWeIxr`p7Jkmv9YARXIRR@>k=?2I9z7e(f@QAuAruFR%^g>%H_?M(GGMQ4ClZv>sy<>#~^R^iMO3G-)3cVf$PPO zR3F5=DM{+X$1}7W-m^ zN@1H0?Uq@N-fXU4j-pGrcCI7J*k$Tm$Wb9^EkH6P&?4! z?47Ax+)ns*f8(jY{*eH+ygL2n#$&~o_U0$P@$ zE=OA=sZNI7fr(rE?LOusJkB~@N~RP1H9_EmNiy?&*=8 zh<|3g%J)9IVV7_8d+dX*62Gy>Ry2rX$w|cI6qPH4P05^7pvgNoRjtRUQOD1-mCQ%T zJgt>NmPH^Yal;dJ=h@rFTdrTwo;5p2@+sxtN(XyX%s>lB>663S$ofhm=<{`>D29-p yD52Bvmf7!(-@21ssKXvOBT;YrJFy+WAIx;VTu4cMVz6`Lw1AG)sN3PgL+}rYn1)pV diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/630 deleted file mode 100644 index e2c434fd07f5707f4fc4fed134f94062f8bf9970..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmV+bAphU99s1R--on}fcVbnECc0V3R9wTNuo0jYxBhS~QSgRShVtXkfvll-^-y&! z;w)V;IX02`)u)}62y#4x9#yDEZv>%9Y|JB_j1y?*o6D=dWf?Lt2CyG_s!RiAlN!Oh z7m7mX+%R(=E|%poSM&J_JM%vXsumZ5FQDVgsO_ySegy`Iqp1Q1j~@U{6mB7S*$22? z{;+(6L z!orvLbW%PJ7P>#i=?V~{U#n4Czl!Sv)xV2=)q@2_YfhA*ln&57UA(-|p$lw{p6gVi z-!jMtEoFIoE+ly|vv?^%$CKe#x)4TJDIbagNL%MmT$9dWQ0;hU8K-<-H5?IL=h(ht zFO58}b*h%B$tU#Iiv1A>3PZW^jig&DS>po&GG&BVuJ1i_hXB7T|@Vq z5+qNXi8IpB>&J)a483b9K{YGaf4zSKC(YorqqHNC*2Caetnv4vnH?KBgMGK z|3%W!3ls+9mv@{LgOMK{`ZFu1(bu#kpQ=8?61Fr~%QhPV9rqY%ZN*_rmTJd1t;@({ zlJTpM++bVw{Gk8nR)CLJfh=(c!d~7=>R8!kRVe$|MP1q6<^FF`qmba1;yX3F4)QD` z^Kqp@o(;156}eEgk>ZPH3v+(|s8s8s6QW*--kJ1|YkSH1Do%kgF^pDlgl-dNXv*)l zr6e-L@{Iv`^@8VL@>`&5E1CoqC9`5fQ@0M+s482lTEoLHAn{p?qPp%X;I_%)oUe8_ z6Vq>mYsWyIj$ybDc0QL^05Vb^89wJHwU}(5q4fLD_8z+bvM-LsNDJVYH?Y$LR*yfe zc;{3$ivsX<^9B2-Z^SR+9aglllC+hiq9g1HP;>4*{(y)Pb@`nF*6SIbsGKx47aaaCz4OouQ~Dqy3im*&M2v>(E^OBrkv%UJ_^d2 z3+Yx#yv~6ZI1_$Y5D5dMJ9>YnA|?Nn+$fH>P@X}U13M$%}VH>h`r<*SXn z^4_N=vW~wh-%duJRF-C92?uQq*(ObzoLVTDP3D(LwpAehfd|}v7N`T&Usw1}0Gc4F zvH*-rM>D#fmm%fwz~$)6@psS}w7+Sr3ZdSb`O0K{C;U~a$7YJJ#D9`@h1kT#_jhc} zYi@gx8KKzpUyY&Py9@DsF+W&=et=HyHmE72()zN|rbmA7HqHTb&sG$Dp-qGAP-5VF zi26bs|L|={E8W&n-FBixSL`>FnYzAnfppcHUP8_5sS2H#7?*3!n)jzN=#PHA-FbI@jc`b>>NMU5Pskdj~7KsxaE&n{>vTDPK!`HE;z>_+rUkF-Zr zxFU88_zPW!2UYB6$&en*TRlhN2H-WTjaF`(`x9tn8kzDsI8pIkFV!W3$Oc#@aCL5> zH%=oGGeISso;=!SP|_q8r~r8Tjd(c5E_x<=u`E#ynt&b>D5^lZI)$PwWPza} zH;(l|fKKJoC3kn^;BK>_wuQw6(6;;KY+Hs#_XxdoYptg=+IzrO?UJI(tGVwR0Je61 z(<>$oKp$%rH*y_ml6NMedBs`s&Hqdx0gNkUYcaFxH;})#_-}jnXh_Dh-qS{3)};&) z%?rw>A5Bvhokfn_n&PT3LyCE1a{EzXqm^2=Fbup3A7L2%rO~qUgQ%>B0I@TJI%y^g zmi-)I@~GQzFD(yd#EQ0-H=>Qpa4G7xtuU$!LcM-npiGkcULh^it-p#8eB z?@ny}N%mk|1#33Tb+?Na* z@|nQSCid|;{er`uYDKyW#CLK6;b!^1zaDmF-EJXB^jDqHvvP*IZvR=`OLI)nk@t-a zPMrT*jV=ez^m&F+83w$4NaydHa%pPp5}we$S20x^mrcCApTr7eh11L{>lslOhD&Y$ z<(Qr84Al=FT)<)!)eP6r$WhdD_i^K=(oQW_k&j2mNp-G=TS{pa_;GU@8iOm1Fl6Q| zf9Y<_R@?jGq_ul_wzkt?32MppP;hL{5l_d;s>kV9`lMkCSoWojqvMi$Wtq4|M~mm5 zU0up`aJS58~B16UP>c|TCZ8-ef4DOVEG>rF~Rkg z?$~tt0I`Ii{xzoodHsdSHj6#)=?ttEn_G_~NwgdKmko&R4SBX8%h@dLKfRCIO!(L5 z;(H~=#t;{cD5ZnFzNzt#rwdyD#wG4g^9NSgvHyz3RWxT{bI=g)vmCH$b-OCWg*c(S z3&`?pGzj;w5C9yvIXRuv1?ZIbHz!Les*G8Tlq7j*)H{D|HW4L8Uz0euVRg(zhg z4-vpMyPysq8XP||BqkjYE9tkc1?JSOv>+tOSQGp@3n2{sEml8yn=Km{ zCNYF29nIE!+80#2E5KVf(r&-h;&EZ@5fXA~XCu~q5X zuZROS*>LI9O1G4)T4VXgwrVlsohrX#+Sc|y!y@`t`3rG1_X{wo=65wfCZB?{0MDo@ zoy?QESQO4fc+0-SE~KVVaM`Ol)Pv41zs4;h9_pqbNWdw$yNbDmX`X08YRT^+iFg%& zjF~!SkcP&ObJ9XI1#Sp<+jbA6KNJQt4kKpPs;FjLQ)PY>P@UT^`SV~GGS>YU)bUHS zX^XEL<&KK|+X%bYts#z)kn1j*^jC5>K`4NkLrPId;dS(u2#cTA`uLqKcF6(^tOk~0 z%+5wEB2?D&p2y%A_W}(8tG|JCoD?brSa1}$NY`dgtan_4*?5?m<}$3E9DT{tMM_yB z6?!q#>LQXRyyoEl!#fGZ4i17!xD?8%SP*brJt0{t}%>=~e zyiNYeZ{?Jw*~gbT^h#`u@Sn}3La7<^O=`-5ir--KVha}fO0yYhNk(dw6V{v!p&Z@g}`45I+@eT%gpO?={sh+ka=Q?EHFxM`W;>! zNGDpI2n(2ycW#Y0K`hoSH?CiHH|7iqRm+R|+u@WRq-PJ8iEx=UFKU@(=Tkl1H7;wq zQX9I+s<)3XEiDf;$~g{dU=<3bH0&icbfMgcW`^1n+@xEruQtlr+|JfD0WAuINaOzsig_8mQ^viGGp+dY7;V{9M5BY zpbEXP&#Ms;R2!{0GA!t5=u91(`u-KC4xBx^#MW8C7M6y;LOlpF##rlTVy{Tg@o*F0*i4;P4r1V9*6&F%NauZ(+AS_In{mM?x z_<7DDO)*0&zh!-ywpje`&8s;vRPfYHf>Y$*?$EjTIt_p;*&0UgpmN97)BK zjZJUfVy1p*=&kcH3JJsVv@HqIw3LfsBhe}GRAM>(h+yw1Uu~d`k(+K4T~LDkU|Agb ztTiqQ{0w43CBAP6(jWC%m}fl1bT;~6sx4i+vFI*N(XK_6SDxc5dppXIc+}d{4@4Q) z62CZ+y?^?)NUW+;>q_e#E<@?d>r$8ZeamrX!56Xaa1{KX3}Yys{H{aOJ#h*F@gXYEEhgms(kVKI=7~nliVE(FRtp4(8(@C2hhUY<@U5$GSy=6*jjF(^T)1+$IsT+Yv!B+#9)6B-NK{oc~nw; z9zw8+a~8b9SK!-<-nTZo!T^D0hiNP+4`u7I(W|@8Z>h5!yB$&2{*)f}CySZVc@419 z`tONZ>>+2!|C%-zwE-ZRjWw>M?PyJeI?+gv5y>zEg}yVV>O_9D{Q_&FRXe893Vlrb z^ccI@w7;MFidpC~p4R_JSG*+9%G;#a6Iu}}tnXB;E!wWOyj#t)}_a z(dPC#_2=oLmf4kAp7GKhT$D6ILwmpNH(LGtPpoOW?}qEZ0Zr{VcVDW6?^Yvz*(YF4 z9byf{#6OUYANGO-oEh(nc-RYjtO)2$01izU@(qFwt(E1~U0P<)Zn@0hx2@t9_(v75 z1I*HZUq)c5bqUT@vN_0_YN6~L9$on;j9q?!LJO3y1Z2sEs^joifLlfTr{8wzcP_6)V4- z5UPs=!BR(JCVimh#%$v2qu=!gkukG-q%Hd~U)K*clil)6?hZcQE%7_OTvG2ebFFka z=By6mohKb?zsj*L;n7DJ`^X2GLH_j47gd3sJ@EGh`-BtobysV~ajMxXa33Yq;Joc< zFPQF6HdMk#$XT%mVbqfPZVKDJ2h zWE0j~u3s55PImNM-nbH-PngKOU=%1|f<%+}I|`F`rlUXY?HV#uTl;BA#@6O@jZ+&` z)5mWpS)0<&#MQN4`O3GA*%L2)mYB}4(M(*NqPxmuHJ+HYlN$tg0E1?&3mJh)0QBh# zhKhFPDJH#(R_^ODe_8*J>L4_);Zxhwv9X6cHQNHF@C z@!NO)-J*HBJx!ox zoM@AI$i83@jB?2SqpcYms-_c~X>SZz-{N?*N7KQ#^*7&39Jg9Dfcf< zI|V2bG3T8B?QbM>`Yx45m%Z@1KH{PI3CLX^m9W5$ahUc27%?Xik(y`eAlNqZm>rIi zfbB&*gemus79epvV2=pK|4bJDD}8fo4Uli_yzyt1kRGW6qf#%SYOnvmx-oq8n?KGbw2}c6V!+lvP=0 zH<+OZaIjEqKcF39Nl_!JjR^2+v~i#om2DoGe>dBG% zIAlRUtMe+NCS6pzN)xG=IpaQa`N~J*&m&H3KX@Kb`unT=+dE^E561(jMC_2y+FNjp z=ixCFVe6q&^M%e@ zh9QX@C-_b-abgkmO_x$R&6KL?_^SBxe|7=Qp!OcX{d$qsikbVL7q}h8J)#A(Zq}SO5F*v7?hzNPsvLfUpl9V8?}i>!iIBYoh6r_PivWt(ph`RNyKrQ8 zNE2(zl6~m~BMKee8Bc=r#pMz?7OWYgd@a7~EMg)bPKySep}%FdnBL|P!~y+2#6O(4 ze%Qcv>FE9>oa(O)O`L?WHrueAiLe|B!AQFY^*;(Z!flcR%F{!RA+*B_^2xL~?@Ef&(q~hFyB})%@<7hbA8; z?*%5(@_CS>2-LJu!QAhKy7dg{=Aq8nG|77}by$9MY69^jHMgkZ4E79oI3BAl|Z1D{~wx61$eKWxeaM z$Rr}oqQYb)V=QbzYqZ2f!3R}tDWCSqB0h+FVyF3U64Fw!-A~moRU-llCKfp_uPIIx zihyDwGdc^+pI=I5D`j<;m_I1VL@V3VMjlU4g_n{sk>9)NOvwr%M9tL4*B~0U<)3d6^ELbf&DmcY*_${?MAA9g66<^n&7IDRb`L6w} z(#LswaV>)H6L#Mci>IEh0DT^^Zz0*2luKox%`TFR(=YKkc%8)P0O;S^4#``3M0amN zP)ey+II^(cigZAmfs7cSEANpb zQbMAt(yev%ywG%~u=R|h*3K>;G2Y7E3qj;9q)X}A;}TaWdS|QCxJJT-bK`tIUje-A z3sMEYtTrr~>c|Mit#H0jonm0>`*UJOfrvfu-?g+RuJHm5+AM#tmwCSJdk%AcK=|b# z$wfNkDB)xca_)6(huDcKRXSmUEvl&aLci3mDrFR8sBtJot~o-A0bUn64%C`^(n3+O zg>={OkP4YwTH5G5d-ORJP*NY5)55Gg3V1$U;sr1nm5piLsb~yJ8Xh>+c~YwE z7SO8}@PDe7Y1zR&Dx*rSm;m$}qEcEvSRptJY&0Uz%OIy3S@8Aw&jEfuqao2>7 z-btL+SIi*HngT}SM3C?rG&EaED$WBOjtk3yQYZJ-?zL)OOo z{!dLNrMLi8WThY|P?)#F?;S zG^fmt1CWfCWph(4`$oq|C!kLQsu^@&(|RjMfcPh^jS97zNS4TLvg{}G8~yk=#=ALnL`?0$*drGR#^Na*C7{vP=ga$ ms+`}wu1yxKluQ@*KJsliU++pRSnA*k?x{Fz{nOQYl=th`AQNE# diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/1/664.p/129 deleted file mode 100644 index ff565c04c66b068c0a3239c483c93f32814b69ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4128 zcmV+*5Z~`6oQ@6D!;`F&zBS-iS#gLRFQD@f?DLwvh8?+8sWt!x zZ&u)OJ59fY7ef03nmcMa zzM6yn0+EQXa_}14u|k2t4&`{?v;o2~>b*pHCNpNx5k1YTg^D?UJ5b_EK^^_Iy}iHv zslk(ua5@)A^~vNwv{W5R>{B)n(Cb{gAtXz_gRi!RGo@TcTtp$icKL>sHU16|!PY%3 z$RLDQ=Jk*gxn?sV8py;5z!P)gxeoYjBOLzFSJ@8OmxVYxFG^1gp;hPnOBq;Ubx<3Y z#z0sQwkMSgwWFJ{mdL6#^Me44>!B?gRy`C443@Z9M-scppsvg&Zg{YqDkMu$5%VL$ zZN98}Q^y?T)mvWQVuIt&C&vQuS3*z9gNZUFu%8l{I@RFDMbCcrx_{1XM;{;ETM86pva02JppT6%_MA-42w$mn+!DVkveW!cinWEy73jTB!v zBT=>8x}{{e{&8vMrb8_?>{o|UQ6^QnkL3^P-Y(54yb$SR74DtU+d&*l6qW}TSInoF zFGWP}cJ^csqK{zQEv6cRdkb^>55WubBRW!(pF8y95`vMfpw?624cTe6nXY^0MCoTi z0Lz4=HT!|C?_L=%bme#wWM-8bKLVpq%7uw0-1C<6R{Laj?SKVFgk+xDncFN!LszuX zoDor43*hd~)()+BXq?mnP^|x0o8Wd_Lyg=4zGR_3{s|1q-t*yYW$lj>x>`Vc%`b@qZqe)Arsein92lzQt1=&F- zM()|WEpJx!zBzL zP3F^i<+-*(QIo>bn(BUjHpMHB$OYD;wAP*ITxQV@q3b_!KK;&oS4>++rU z>>Yh)1*P#BGXo^j4vrMb>L;6j$(XfW=RzJgI}0;ZaUhV8{IWQwYIs^jNGanVn~BD5$zFEXdxy)0n;G|aO46P31l3&n?WdP9kaZ<5ZR>6|rFExeU?~x{ zUnm)+fQgs1NH#9EkObFVFQ(+l!>~k0ieF(6$R5$^ZK>_|?qgxW9}jd}NW!qVfM(^eKMDI6XGIOl@`BIw$q{R9^Rl;+h%*tOWyrO#Se$rlM#LGi(OPvxJXJwFyh&sN1>S_f_TWs;vB4at@auiePw@EX!I&&i$C zLH!qV4zSwK1Z+wd46n+KwCF#dL_Ly5hVxBXax?$03z;bZ92soW?UK3OW~Ee({jzCT zIaS}KIm8MwU!^JwdP{|UJAK;W*__kxtglR*`F^hItbm32{vy$z-lpv@7jKzA5n$*_ z3FYfixMX1p0&kur0)}wTv>0tkRO{h+3xjeP&nFE872j(m4{VJBcYj3(;vp8PT1y8J zOaI4OjL8MyQZ=s8r)uw05CE=f`8`};(po#w{Vg>Zc0~6pC$bS&S~n%fnm#%d5P{~a z3OoJ;pOAjhx%H1VNqtMIHx$i24j+7^alOirvsi!fq=Kd;3z5bOmj7J(2M!L~N7M)x zvj6>g^?7e*1uA##wq2TF6zmh(B=jl`YhAmGM9j}4JprH|aZ0CT0-XsWo@LWFB!P*_ zNEXID1S%pH4wz}<9%pqtVm^gwzQ(56mtBpfTf;val;GQYU!}!;svCF}wyojQhC~&- zcE}0#MIixhk~jTETmAl4&C_uL(&;nel8M)6%i!>}C?#uaoWloW6 zZlK+bo+mkRvawRZLO$YQ`*c~s8Zx}bYFhm0JJ+D)loMxj@I^P!o*{YP@W{c7{~xz{ z*xKeS%K!T5!soBMi0*LKK0btMciYdn9H|YQuDKwF2Z)1=?%?4ke6d3ulo3w#v^7hu zG(erqi^E287`qr`fQ1pn1E_+VF`f2yT>?Xv}t5z1$hP?7$Rl!t+`Q z0ZKxN!%YbMTvA6<$5c5NcEJxPm>l80N#$B8zQKpyVgbK_&X$m>7=xcJnA8Lg&){xM zydm*m;5ac&q|f`g2PgY_?2)3n&f_ptag}7)yFy+biC4gpILd3Y$V#Eor;qKW= z0Oh2o?(jr3rRX=5Oh2AG-g(W0P5NR3ZS?+5uz26p!5!K4UtSrNS;O-={n?C4FiWl9| zP8JJ1JY4&!Ow?6%k$&Jp)dgnMJqqrL; zJh3%bTUS4}LphWiR9nJ+7O663k*UEK=~5rj;W3w<4Z^>@srfKN1OO7z5 zZ|bEjHuvZ@J_PRp57Jv?wM;cNCoPx0DIEgG{B(=b=4R( z#5xA{pFzg4A@@^2^0r2PWn|z)RYrs;A_Q*b7_Z&(M`_yOBaxCg47^?ampzQm3iN!N z$(MtJRXa=i@Zd-TlOwY3g)=(Et!ybnkc2JK;A!mu&plmWCRJBI!12OcWRa1nqFPr` zKhypcn`HO?hknj*ZJhZSlD_p}!4^OXx>MBEo{h^JL(O#Eb~6BL_JdLb5)~%nfZyLz zBv|0d>qWoh%R3$plWjaQ-+Q$@E~8*8suNK%F*#LuUYWtyaZe;*xKq=xxfA+NQ5X#S zyp`DAIdU(u=OQgDt?kO&_u80=II#Z z!L~HbLg-7QT|s>DtWRc8jJt4&a&4Su7#SJ=$uL@my4V^CMZ4KU;ae&5;}aKO=Mm>5 zCC9+KhdEQ@J+ArT;8a;kGQ;LJ92CS)9HP*8dR6&yg^B2waN%Y<;=$3I6nAv#)|YIH}9^HFXsY)C!CX5u_Lvm@!hDFH}}{uA_8Wy|!K$jXop|SM~k2S?#Oz^ zg0dfIG09o#(V>SE(uFfxtb14X*k)9u;$Pv0p>vu=BlV!?Sta}SX1LpS@%dJ$yL{?UIHK^MVZ;1-0siI&a1GdU)^-=N(y-_E*z;8Md z^1ur1H;G8Rd~&rXfszc|=MO>#CrCU{1czVgOaR9szCBUp+dLfOj~#fi^Y_1(t68~4 zIhYzer#i2CSqaZ|VPzZJtTtXvPRMK+C?7?HpE0onUQd@TAHao%+GVdUWb>Y;FwK{a zjylCxpL#u>_K@`zqW6N1dJ?SXAK4;gK=UncvS&@9Ye%au>XyGQO$%|~s(MAQ!a}#{ ek8In@;nouRs!s`Y+*)=(BTvNnY0_PlctaHRKod>? diff --git a/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 b/pkg/mod/cache/download/sumdb/sum.golang.org/tile/8/2/002.p/152 deleted file mode 100644 index fbfb0682ee5866c7c077d19ed4e75f1de6de8864..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4864 zcmV+b6aVZL`qPqs`0#MW!MyH5rQBgeZ7Q_Qgp2kULqCYNJUNvkw0kZ=wm?SA3l~ZK zO2?s(muSyYq=(BmpHpL$CN$m_ITeJ|$^Mmm~JY&@kul*Zwx z<{iu>nP+uSEHsks^+14<68H-#5 z1Dk*ko*{=NRr!T*Km4Ag9d|N6D61$$I_4<1pgOOR03Ryq{3eK9p-0dc9A1p~DbGA*b{d z!%GB#cacqx?CK5#n~Sxe{Ux==`+==B4Rj2AvBw~}pbxOivr{$W1zCcpsc8K;!V^aN zS>C3mYG=@J;gkyhaiy`H7ikY0o{Ca$lf$Xxr9pCj2^R-0w>Xn#iqN0bZhqP?=$|Iw z3KL=~>LY}-4=(Q{{4+J&dayG)n5wgwo@-UYCY{+=hIzj?_U&?l{F_YIs9uH>sfr5L zX&;}I75#W1p~s73><8RtfYD-E+#(!qWngEX0r9Qkn3;^X=PKP16g=%gkQPu``unNo zVW*~4rsue_4K=xAg(K45Su~^Z-Vg=86k35q$Ul%djQWibXA1GWuV-J_>f=vUFQA#4 zBy{7~OoYr-uF+#eakkH8`|yc>ePUNdqrSoCB{r!PDy!@3rSqmXlV)-7`Us2f zaFl_VA#3)>Qw;*7*LBFjzlfYcVUx>zH(vAXoTwsverzM%p7Q+yb;JX--~MLUZ@m@4 zj1d1R%?#dZ$AjF*9h-NRa*ecAw%W70eru!C85MkU5|vSbTT`hHw4$S;b^}X91y`6~ z;@C()*}V@9_iQhJVjsk#67TynDXiFVQZIha&X#ckD4;)ZGJrvjsnoHZ!!OP?^a5vW z-L=D&R-oyn)vZ!bmS~u{3{n^9-oQn;^QtBMqncadZSeZ7xOrs-7TuirCR&`T6RpBv z|8vXAr3Is;+`M7yQIE_3BVLQ?2;XH_&p+^Kn}Q#AZTL{+$&$0Cd&E)V95rb-TbNgl zcR`WU$8hU{;DoJ(Mei)Qr=y{`MnM^>fIow$w4}u0b}ND<$hd7zUK?oSBU%OSLLiqC zq_y!s+B_8yNXJ+yk;|pz4JN{Q4CAfOBYc@_*Wp*m6fiTfeVp<10_;GC2EVZ?B3^s3 zP2nVp1P((z;bOvpHYKUX!~GaEqaz#283z-$DgmhdKIefplOyEnH_ip*)^j44jeYIR z!pciD?HA2=#1u41T*%^|pqxR}Qjq@|PYQF$I1jMeHQ`oMao|1)F-j2@J~sywwSwYN zYy+>B4RmlKD;lD%5Lc=HZY!k*mMtFGvW&;Qz#EC6bekyoJwNU@1Pobl8bfd=M9J>*>TOgX6W+g()18IrB&TV(XPWu1m$KHxKaiHgY@O`UY7ec18aAO( z*+o*`pyVhn8x!%W!%dL`Y5iQ|`>Np_(wfUJyeeKIMQ?+xw~**H_Ls!&qypzy0!ZdP zC>jqiJ8%70&nS~AO`oHlu^XbZ)w(U8BX&$aCEu`*w7wMSfLZiwBH-fT7kZCuR{Jr# zDt=)dpOzq(N%`HS7jyQME;+D|@1fhikSaJ$$wXT7F8Y1qROmq`s;9G3iaoG!&5BVkGabiFY(TVJc6nzF5&Kur^W)luxV&n9dr5m z3x;JWo>Jz|lX7T2YoP*g#@@V939=+nIQS3BO(Rx84qh~c5rIV}CviZcF@QtY85Zc7 zczDEGHHfKl&Qm7zn6n|PvT-%0IJY2>OG%-EX311ca$iTRLzbF3!@xFF6jY44FB?Sh z=k8xf1dJ9sZ<#OzEDSVu`_$3=@IEO}lXBi*_*yHLs+6-|R{;8%ymLCqGNef8k5rZ3eV>?^EE75)cN({A+~s z30^HanFp|ijg;kcjh<9MHP2`&?ckp=_JenXav@O3uC)RR+89-K1~I)|uQnQ2f`^7V zc;=hA)VwT3c5%L5ITpVXjikUT$Hx-aWZ!Wpm&>mX%_|RkaA|vHoehgW`8gnPoJ;gd zI4D>3%XAYh6#(y#j86S~saE9~ur~V!h@z7>{jiNDBUYxtyGET(;bjEBmI_WlSijr5 zwZJFp#%%HMaG*iVFI1{Kv1ns&6`5Z8=qVi|Z`au03Kt$`B9pO3asNYLv_23z`%BR< zIM_l9c&%%6&y&P~0yCVm@8cT_GLyxc**1A8^bw#jS|NG3pTZ!Q+SwL4VAn>g_|HuTuf~)L2s59dP#IK@`SG-m2UvqeC_+j#kP2a{aM+vA1BU5&yL;` zNX$r@)nLsi@a32q5$E@izOj2ta9P!kJ6T9j33_S}OCF2s8=5ig<)%SEzs7OYhdDPc18baq-^f5zGS9T(ZcQv)dw}6)%9-yBgO}XA2#fT`c0IhX+F*a ze1!+NYJdj4A4r6{uzI#vv8H)Nq?c%%TkmdsO7=9vGDZ|e25Bz&BYtnB?410DDmckP zMajT!Z&_PVcKADl6u!upYsk&WYeEsliG(RI@y7(YFQ4_~=5Jv#b1TG4tiTq#2X<&r zQ5PfRghXXDTYw?bw9xwcGoEt!51b6wY>(v?hWL=i0Y!_>GMrzLES3gmTSE7DQE{Uz zQX~_8EZAnlCrtzk*cBS3)0%snWb|~#$U2UsHP;=F${e0e@^hBxuDoqY=;RqKkon)6 z2_eY*2o~37cr`UZ;Tp~_ZM1y-RqUz5+M@Y9Ay=3&Pthk|5O@NZrvV13=M=Gbg0|{MxA9{=hc7D#rMg~aeoO~9IV4cpB zs&0LN>jX%UQ}bgJ>>QWjsp>-cfMGI-v^c*00M`8uPJ;M++=!p;P#!-o_a&ku?kLy7 zqp#?@6L#QE`uyw0XKe|jr9LS4=m0n_Ac~J=R&RXw8|U9om2m1b8Dk)(=$VD~k<90F z{k`$2K?@abpvl!^*rwMYg<7-B44Cb_GFcsObnc>l=E)jHEcPKAmYCpu0hhw<1k+@P)88$@lb*waQ@c$Ub$qI$EJ<(pNf1o5aoi8a z@T99M$G$fFko2!8V}K3|dUGUo?(n%_4m=AUIDt*v4blJhU`dNOnAg3R;Yf#IKIZm= z_ixSc#aPv?O*?QKW+dOCg=0ZLbB>a)p{i0n4o1tJvkDf2`H=%ARPpBOeP|*y*`aL< z70KJ=5+(xzgB-qH$3B=fMi4+JpZmp<>_Yr(H*LI@fe?X^v;&d$`fdqjq|i~|Z_f?Y&dD2t$@QH4l#7q)T|xN```8+so(I_f%MR~@Fn6{3b}xiKF?DBUfrhIT?idk}0GI21SB8)b>sAGNlH zR^8@=R_%+I0(6%-&gPsk$$T5a(d~U?A z+ML*gG2+@M(tr+NR6ep!oWN@RpqHO`$Qz|>wP8<#NaT1f!63pU~B&9m^es7-t9RB)dAA%*g)MsR8@YY#f2&+Pg~+dvzzWV6N8PY z{LRRUU&Sh#`XdPTlqO2*&*?~dHs)^kk?@3=ZL?b(wX8HaT-dj8r{{X;eLKaB#rKI* zLvi4m@)Si0^)zoE9JxqVG}}zJLJ=Rdnme z8|8ELO4=($DwW7ph`lg^3F?<{tn39nULAj9#57`>C2ZWXDuS+DxWF0Ul8{x^Mz-4d z3}(0MjVJKy-{5k-hW>yYc;J;9eE23mU|-ldKSBfrER7karY*T@i6~Xdr*3K2{PeuP zQokx|ME=a7&*fiK{ZF2qhs_6@HX#sQU5GJw(mjj zJY`xI*RhV_4bKkK6%@D}DT6y#dW?W}ugj@Njali}jjyjz7RW8|zcy4>396ag_BZ4) zIb08LE09 zQ#h<%JG(#>$$4jj(D*4KDh655oYs00Low|Z(O}HJoP-n(85d^3LJCFm(gx~B=+6DE zy2-T93ir)rnGM1t3}6a=!y=(3+&e;ElxC5(q{KHM^;hx3XCEeb->W1&bp^0gZTwfv zYt3qiFsztjm-u5N)LO?ruI3U3z9t2Pw(L-~>T<#0H939nh&PNo;!a&F6k)l&p3TJB zy1jgibBIl=I89{{<3iBX#HsyG#rtGsgec~8#Xm2?-|ywHM&iTwIF%2S1O|aJRZ7ij zjledx?abqaTFozpoMVC^s%ps&QvbuH9-&}jGneDgK-9^SqKLfD0YKs;UGQh!>mYnX zms!FeL%GaLD9I>ycgS8n+0;H9sVVC`wf}m*L?*gJA0EARybKNav5CNffrddLx}6j~ zRr=4Jtu38j_?iu1YBVQy8h6xFPQehaK8+mUe^9?Nvb(VRF13h}c* zhuD2Egb$Q*R8RAG<_#rmR7P;8(Q6t}$uoy8ysJ2H#5mVU;^`1s32*UgDEeB!VThnJ z)0_acd8tTn89h7o?g7kC+?vVCb)X_B6>~&i$+#5WRus(4(`?DjHz;vh;N--9b72ri z5W^ Date: Sat, 20 Sep 2025 16:20:54 +0300 Subject: [PATCH 16/19] chor: remove pkg/sumdb --- pkg/sumdb/sum.golang.org/latest | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 pkg/sumdb/sum.golang.org/latest diff --git a/pkg/sumdb/sum.golang.org/latest b/pkg/sumdb/sum.golang.org/latest deleted file mode 100644 index 65fabf8ca4a..00000000000 --- a/pkg/sumdb/sum.golang.org/latest +++ /dev/null @@ -1,5 +0,0 @@ -go.sum database tree -43548952 -nX6jrsdthQ8kDPrwxKP2h/3CAC+o/Tzl00DK+QUiDxE= - -— sum.golang.org Az3grtVCRqi+V2+TLDpRvXhgZDzixz81eDxCTse8HVQFKkxvm3+CBHWwrkincl2+LzuJetgKkMzjLg5M1SI/XmJT7AQ= From 50caee3467aada77a4b442a0da8336c6937d6c98 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 20 Sep 2025 16:31:45 +0300 Subject: [PATCH 17/19] doc: benchmark step by step doc --- bindings/go/README.md | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/bindings/go/README.md b/bindings/go/README.md index c796b77f0c7..c6de0b9790a 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -207,35 +207,11 @@ make examples # optionally: ./build/go-model-download -out models ``` -### Run CPU benchmarks +### Run benchmarks ```bash cd bindings/go/pkg/whisper -go test -bench=ContextProcessCPU -benchmem -run=^$ . -go test -bench=ContextProcessBigCPU -benchmem -run=^$ . -# or run all -go test -bench=. -benchmem -run=^$ . -``` - -### Run GPU benchmarks - -GPU runs toggle `UseGPU` in model params. Ensure your build has GPU backends enabled: - -- CUDA: build the C libs with CUDA - -```bash -cd bindings/go -GGML_CUDA=1 make whisper -``` - -- macOS Metal: supported by default in the build flags on Darwin. - -Then run: - -```bash -cd bindings/go/pkg/whisper -go test -bench=ContextProcessGPU -benchmem -run=^$ . -go test -bench=ContextProcessBigGPU -benchmem -run=^$ . +make benchmark ``` ### What the benchmarks measure From 91f5cc12f77f925c9b8a0fe8f577a0e9012faf18 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 20 Sep 2025 17:03:25 +0300 Subject: [PATCH 18/19] refactor: added segment formatiing, fixed benchmark printings --- .../go/pkg/whisper/context_benchmark_test.go | 66 ++++++++++++++++--- bindings/go/pkg/whisper/interface.go | 6 ++ 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/bindings/go/pkg/whisper/context_benchmark_test.go b/bindings/go/pkg/whisper/context_benchmark_test.go index 04cf5c8977f..8cc6e5d30a6 100644 --- a/bindings/go/pkg/whisper/context_benchmark_test.go +++ b/bindings/go/pkg/whisper/context_benchmark_test.go @@ -2,6 +2,7 @@ package whisper_test import ( "fmt" + "io" "math" "os" "runtime" @@ -13,6 +14,38 @@ import ( wav "github.com/go-audio/wav" ) +func processAndExtractSegmentsSequentially(ctx whisper.Context, samples []float32) ([]whisper.Segment, error) { + if err := ctx.Process(samples, nil, nil, nil); err != nil { + return nil, err + } + + var segments []whisper.Segment + for { + seg, err := ctx.NextSegment() + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + + segments = append(segments, seg) + } + + return segments, nil +} + +func processAndExtractSegmentsWithCallback(ctx whisper.Context, samples []float32) ([]whisper.Segment, error) { + segments := make([]whisper.Segment, 0) + + if err := ctx.Process(samples, nil, func(seg whisper.Segment) { + segments = append(segments, seg) + }, nil); err != nil { + return nil, err + } + + return segments, nil +} + // benchProcessVariants runs the common benchmark matrix across context kinds, // thread sets, and callback modes, for given samples. If singleIteration is true // it runs only one iteration regardless of b.N. If printTimings is true, @@ -87,18 +120,23 @@ func benchProcessVariants( b.ResetTimer() for i := 0; i < iters; i++ { - if printTimings { - model.ResetTimings() - } + model.ResetTimings() start := time.Now() - if err := ctx.Process(samples, nil, nil, nil); err != nil { - b.Fatalf("process: %v", err) + + segments, err := processAndExtractSegmentsSequentially(ctx, samples) + if err != nil { + b.Fatalf("process and extract segments sequentially: %v", err) } + + b.Logf("segments: %+v", segments) + + elapsed := time.Since(start) + if printTimings { - elapsed := time.Since(start) model.PrintTimings() - b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") } + + b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") } }) @@ -120,14 +158,22 @@ func benchProcessVariants( b.ResetTimer() for i := 0; i < iters; i++ { start := time.Now() + model.ResetTimings() + // Passing a segment callback forces single-segment mode and exercises token extraction - if err := ctx.Process(samples, nil, func(seg whisper.Segment) {}, nil); err != nil { + segments, err := processAndExtractSegmentsWithCallback(ctx, samples) + if err != nil { b.Fatalf("process with callback: %v", err) } + + b.Logf("segments: %+v", segments) + + elapsed := time.Since(start) if printTimings { - elapsed := time.Since(start) - b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") + model.PrintTimings() } + + b.ReportMetric(float64(elapsed.Milliseconds()), "ms_process") } }) } diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index 4bd0262be06..eabdb2db097 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -1,6 +1,7 @@ package whisper import ( + "fmt" "io" "time" ) @@ -175,6 +176,11 @@ type Segment struct { SpeakerTurnNext bool } +func (s Segment) String() string { + // foramt: [00:01:39.000 --> 00:01:50.000] And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country. + return fmt.Sprintf("[%s --> %s] %s", s.Start.Truncate(time.Millisecond), s.End.Truncate(time.Millisecond), s.Text) +} + // Token is a text or special token type Token struct { // ID of the token From 5711a71d0f71f3e2817e6ee2f9c76310789ff8f1 Mon Sep 17 00:00:00 2001 From: ciricc Date: Sat, 20 Sep 2025 20:23:19 +0300 Subject: [PATCH 19/19] fix: fix sampling type --- bindings/go/pkg/whisper/consts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/go/pkg/whisper/consts.go b/bindings/go/pkg/whisper/consts.go index fbdd9c310a5..90ca20664c2 100644 --- a/bindings/go/pkg/whisper/consts.go +++ b/bindings/go/pkg/whisper/consts.go @@ -37,7 +37,7 @@ const SampleRate = whisper.SampleRate // SampleBits is the number of bytes per sample. const SampleBits = whisper.SampleBits -type SamplingStrategy whisper.SamplingStrategy +type SamplingStrategy uint32 const ( SAMPLING_GREEDY SamplingStrategy = SamplingStrategy(whisper.SAMPLING_GREEDY)