Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions pkg/evaluation/save.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
var currentModel string
var currentUsage *chat.Usage
var currentCost float64
var currentTimestamp string

// Helper to flush current assistant message
flushAssistantMessage := func() {
Expand All @@ -87,7 +88,7 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
ReasoningContent: currentReasoningContent.String(),
ToolCalls: currentToolCalls,
ToolDefinitions: currentToolDefinitions,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAt: currentTimestamp,
Model: currentModel,
Usage: currentUsage,
Cost: currentCost,
Expand All @@ -101,11 +102,13 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
currentModel = ""
currentUsage = nil
currentCost = 0
currentTimestamp = ""
}
}

for _, event := range events {
eventType, _ := event["type"].(string)
eventTimestamp := parseEventTimestamp(event)

switch eventType {
case "agent_choice":
Expand All @@ -116,6 +119,9 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
if agentName, ok := event["agent_name"].(string); ok && agentName != "" {
currentAgentName = agentName
}
if eventTimestamp != "" {
currentTimestamp = eventTimestamp
}

case "agent_choice_reasoning":
// Accumulate reasoning content (for models like DeepSeek, Claude with extended thinking)
Expand All @@ -125,6 +131,9 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
if agentName, ok := event["agent_name"].(string); ok && agentName != "" {
currentAgentName = agentName
}
if eventTimestamp != "" {
currentTimestamp = eventTimestamp
}

case "tool_call":
// Parse tool call and add to current message
Expand All @@ -143,6 +152,9 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
if agentName, ok := event["agent_name"].(string); ok && agentName != "" {
currentAgentName = agentName
}
if eventTimestamp != "" {
currentTimestamp = eventTimestamp
}

case "tool_call_response":
// Flush any pending assistant message before adding tool response
Expand All @@ -158,7 +170,7 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
Role: chat.MessageRoleTool,
Content: response,
ToolCallID: toolCallID,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAt: eventTimestamp,
},
}
sess.AddMessage(msg)
Expand Down Expand Up @@ -198,7 +210,7 @@ func SessionFromEvents(events []map[string]any, title, question string) *session
Message: chat.Message{
Role: chat.MessageRoleSystem,
Content: "Error: " + errorMsg,
CreatedAt: time.Now().Format(time.RFC3339),
CreatedAt: eventTimestamp,
},
}
sess.AddMessage(msg)
Expand Down Expand Up @@ -301,6 +313,19 @@ func parseMessageUsage(m map[string]any) *chat.Usage {
return usage
}

// parseEventTimestamp extracts the timestamp from an event map.
// Returns the timestamp string, falling back to current time if not present or invalid.
func parseEventTimestamp(event map[string]any) string {
if ts, ok := event["timestamp"].(string); ok && ts != "" {
// Validate RFC3339 format
if _, err := time.Parse(time.RFC3339, ts); err == nil {
return ts
}
// Invalid timestamp format - fall back to current time
}
return time.Now().Format(time.RFC3339)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could this mess the the order of things? 🤔

Copy link
Member Author

@dgageot dgageot Feb 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really. This is for evals. Not super dangerous even if it did

}

// SaveRunJSON saves the eval run results to a JSON file.
// This is kept for backward compatibility and debugging purposes.
func SaveRunJSON(run *EvalRun, outputDir string) (string, error) {
Expand Down
Loading
Loading