Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,15 @@ cat README.md | gh models run gpt-4o-mini "summarize this text"

### Building

Run `script/build`.
Run `script/build`. Now you can run the binary locally, e.g. `./gh-models list`

### Releasing

`gh extension upgrade github/gh-models` or `gh extension install github/gh-models` will pull the latest release, not the latest commit, so all changes require cutting a new release:

```shell
git tag v0.0.x main
git push origin tag v0.0.x
```

This will trigger the `release` action that runs the actual production build.
25 changes: 14 additions & 11 deletions cmd/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,18 +391,21 @@ func NewRunCommand() *cobra.Command {
sp.Stop()

for _, choice := range completion.Choices {
if choice.Delta != nil {
if choice.Delta.Content == nil {
continue
}

messageBuilder.WriteString(*choice.Delta.Content)
io.WriteString(out, *choice.Delta.Content)
// Streamed responses from the OpenAI API have their data in `.Delta`, while
// non-streamed responses use `.Message`, so let's support both
if choice.Delta != nil && choice.Delta.Content != nil {
content := choice.Delta.Content
messageBuilder.WriteString(*content)
io.WriteString(out, *content)
} else if choice.Message != nil && choice.Message.Content != nil {
content := choice.Message.Content
messageBuilder.WriteString(*content)
io.WriteString(out, *content)
}

// Introduce a small delay in between response tokens to better simulate a conversation
if terminal.IsTerminalOutput() {
time.Sleep(10 * time.Millisecond)
}
// Introduce a small delay in between response tokens to better simulate a conversation
if terminal.IsTerminalOutput() {
time.Sleep(10 * time.Millisecond)
}
}
}
Expand Down
22 changes: 20 additions & 2 deletions internal/azure_models/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ func NewClient(authToken string) *Client {
}

func (c *Client) GetChatCompletionStream(req ChatCompletionOptions) (*ChatCompletionResponse, error) {
req.Stream = true
// Check if the model name is `o1-mini` or `o1-preview`
if req.Model == "o1-mini" || req.Model == "o1-preview" {
req.Stream = false
} else {
req.Stream = true
}

bodyBytes, err := json.Marshal(req)
if err != nil {
Expand Down Expand Up @@ -60,7 +65,20 @@ func (c *Client) GetChatCompletionStream(req ChatCompletionOptions) (*ChatComple
}

var chatCompletionResponse ChatCompletionResponse
chatCompletionResponse.Reader = sse.NewEventReader[ChatCompletion](resp.Body)

if req.Stream {
// Handle streamed response
chatCompletionResponse.Reader = sse.NewEventReader[ChatCompletion](resp.Body)
} else {
var completion ChatCompletion
if err := json.NewDecoder(resp.Body).Decode(&completion); err != nil {
return nil, err
}

// Create a mock reader that returns the decoded completion
mockReader := sse.NewMockEventReader([]ChatCompletion{completion})
chatCompletionResponse.Reader = mockReader
}

return &chatCompletionResponse, nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/azure_models/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type ChatCompletion struct {
}

type ChatCompletionResponse struct {
Reader *sse.EventReader[ChatCompletion]
Reader sse.Reader[ChatCompletion]
}

type modelCatalogSearchResponse struct {
Expand Down
37 changes: 37 additions & 0 deletions internal/sse/mockeventreader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package sse

import (
"bufio"
"bytes"
"io"
)

// MockEventReader is a mock implementation of the sse.EventReader. This lets us use EventReader as a common interface
// for models that support streaming (like gpt-4o) and models that do not (like the o1 class of models)
type MockEventReader[T any] struct {
reader io.ReadCloser
scanner *bufio.Scanner
events []T
index int
}

func NewMockEventReader[T any](events []T) *MockEventReader[T] {
data := []byte{}
reader := io.NopCloser(bytes.NewReader(data))
scanner := bufio.NewScanner(reader)
return &MockEventReader[T]{reader: reader, scanner: scanner, events: events, index: 0}
}

func (mer *MockEventReader[T]) Read() (T, error) {
if mer.index >= len(mer.events) {
var zero T
return zero, io.EOF
}
event := mer.events[mer.index]
mer.index++
return event, nil
}

func (mer *MockEventReader[T]) Close() error {
return mer.reader.Close()
}