diff --git a/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs b/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs index 861a1d1f..3d2496b6 100644 --- a/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs +++ b/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs @@ -2,6 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using DevProxy.Abstractions.Utils; +using Microsoft.Extensions.Logging; +using System.Text.Json; using System.Text.Json.Serialization; namespace DevProxy.Abstractions.LanguageModel; @@ -20,6 +23,95 @@ public class OpenAIRequest public double? Temperature { get; set; } [JsonPropertyName("top_p")] public double? TopP { get; set; } + + public static bool TryGetOpenAIRequest(string content, ILogger logger, out OpenAIRequest? request) + { + logger.LogTrace("{Method} called", nameof(TryGetOpenAIRequest)); + + request = null; + + if (string.IsNullOrEmpty(content)) + { + logger.LogDebug("Request content is empty or null"); + return false; + } + + try + { + logger.LogDebug("Checking if the request is an OpenAI request..."); + + var rawRequest = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + + // Check for completion request (has "prompt", but not specific to image) + if (rawRequest.TryGetProperty("prompt", out _) && + !rawRequest.TryGetProperty("size", out _) && + !rawRequest.TryGetProperty("n", out _)) + { + logger.LogDebug("Request is a completion request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Chat completion request + if (rawRequest.TryGetProperty("messages", out _)) + { + logger.LogDebug("Request is a chat completion request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Embedding request + if (rawRequest.TryGetProperty("input", out _) && + rawRequest.TryGetProperty("model", out _) && + !rawRequest.TryGetProperty("voice", out _)) + { + logger.LogDebug("Request is an embedding request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Image generation request + if (rawRequest.TryGetProperty("prompt", out _) && + (rawRequest.TryGetProperty("size", out _) || rawRequest.TryGetProperty("n", out _))) + { + logger.LogDebug("Request is an image generation request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Audio transcription request + if (rawRequest.TryGetProperty("file", out _)) + { + logger.LogDebug("Request is an audio transcription request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Audio speech synthesis request + if (rawRequest.TryGetProperty("input", out _) && rawRequest.TryGetProperty("voice", out _)) + { + logger.LogDebug("Request is an audio speech synthesis request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + // Fine-tuning request + if (rawRequest.TryGetProperty("training_file", out _)) + { + logger.LogDebug("Request is a fine-tuning request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + logger.LogDebug("Request is not an OpenAI request."); + return false; + } + catch (JsonException ex) + { + logger.LogDebug(ex, "Failed to deserialize OpenAI request."); + return false; + } + } } public class OpenAIResponse : ILanguageModelCompletionResponse @@ -82,10 +174,18 @@ public class OpenAIResponseUsage public long CompletionTokens { get; set; } [JsonPropertyName("prompt_tokens")] public long PromptTokens { get; set; } + [JsonPropertyName("prompt_tokens_details")] + public PromptTokenDetails? PromptTokensDetails { get; set; } [JsonPropertyName("total_tokens")] public long TotalTokens { get; set; } } +public class PromptTokenDetails +{ + [JsonPropertyName("cached_tokens")] + public long CachedTokens { get; set; } +} + public class OpenAIResponsePromptFilterResult { [JsonPropertyName("content_filter_results")] diff --git a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs index 7707cdf5..1233dc0b 100644 --- a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs +++ b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs @@ -7,6 +7,7 @@ using DevProxy.Abstractions.Plugins; using DevProxy.Abstractions.Proxy; using DevProxy.Abstractions.Utils; +using DevProxy.Plugins.Utils; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; @@ -19,7 +20,6 @@ using System.Diagnostics; using System.Diagnostics.Metrics; using System.Text.Json; -using Titanium.Web.Proxy.Http; namespace DevProxy.Plugins.Inspection; @@ -108,7 +108,7 @@ public override Task BeforeRequestAsync(ProxyRequestArgs e, CancellationToken ca return Task.CompletedTask; } - if (!TryGetOpenAIRequest(request.BodyString, out var openAiRequest) || openAiRequest is null) + if (!OpenAIRequest.TryGetOpenAIRequest(request.BodyString, Logger, out var openAiRequest) || openAiRequest is null) { Logger.LogRequest("Skipping non-OpenAI request", MessageType.Skipped, new(e.Session)); return Task.CompletedTask; @@ -323,9 +323,9 @@ private void ProcessSuccessResponse(Activity activity, ProxyResponseArgs e) } var bodyString = response.BodyString; - if (IsStreamingResponse(response)) + if (HttpUtils.IsStreamingResponse(response, Logger)) { - bodyString = GetBodyFromStreamingResponse(response); + bodyString = HttpUtils.GetBodyFromStreamingResponse(response, Logger); } AddResponseTypeSpecificTags(activity, openAiRequest, bodyString); @@ -895,95 +895,6 @@ private void RecordUsageMetrics(Activity activity, OpenAIRequest request, OpenAI Logger.LogTrace("RecordUsageMetrics() finished"); } - private bool TryGetOpenAIRequest(string content, out OpenAIRequest? request) - { - Logger.LogTrace("TryGetOpenAIRequest() called"); - - request = null; - - if (string.IsNullOrEmpty(content)) - { - Logger.LogDebug("Request content is empty or null"); - return false; - } - - try - { - Logger.LogDebug("Checking if the request is an OpenAI request..."); - - var rawRequest = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - - // Check for completion request (has "prompt", but not specific to image) - if (rawRequest.TryGetProperty("prompt", out _) && - !rawRequest.TryGetProperty("size", out _) && - !rawRequest.TryGetProperty("n", out _)) - { - Logger.LogDebug("Request is a completion request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Chat completion request - if (rawRequest.TryGetProperty("messages", out _)) - { - Logger.LogDebug("Request is a chat completion request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Embedding request - if (rawRequest.TryGetProperty("input", out _) && - rawRequest.TryGetProperty("model", out _) && - !rawRequest.TryGetProperty("voice", out _)) - { - Logger.LogDebug("Request is an embedding request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Image generation request - if (rawRequest.TryGetProperty("prompt", out _) && - (rawRequest.TryGetProperty("size", out _) || rawRequest.TryGetProperty("n", out _))) - { - Logger.LogDebug("Request is an image generation request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Audio transcription request - if (rawRequest.TryGetProperty("file", out _)) - { - Logger.LogDebug("Request is an audio transcription request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Audio speech synthesis request - if (rawRequest.TryGetProperty("input", out _) && rawRequest.TryGetProperty("voice", out _)) - { - Logger.LogDebug("Request is an audio speech synthesis request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - // Fine-tuning request - if (rawRequest.TryGetProperty("training_file", out _)) - { - Logger.LogDebug("Request is a fine-tuning request"); - request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); - return true; - } - - Logger.LogDebug("Request is not an OpenAI request."); - return false; - } - catch (JsonException ex) - { - Logger.LogDebug(ex, "Failed to deserialize OpenAI request."); - return false; - } - } - private static string GetOperationName(OpenAIRequest request) { if (request == null) @@ -1004,63 +915,6 @@ private static string GetOperationName(OpenAIRequest request) }; } - private bool IsStreamingResponse(Response response) - { - Logger.LogTrace("{Method} called", nameof(IsStreamingResponse)); - var contentType = response.Headers.FirstOrDefault(h => h.Name.Equals("content-type", StringComparison.OrdinalIgnoreCase))?.Value; - if (string.IsNullOrEmpty(contentType)) - { - Logger.LogDebug("No content-type header found"); - return false; - } - - var isStreamingResponse = contentType.Contains("text/event-stream", StringComparison.OrdinalIgnoreCase); - Logger.LogDebug("IsStreamingResponse: {IsStreamingResponse}", isStreamingResponse); - - Logger.LogTrace("{Method} finished", nameof(IsStreamingResponse)); - return isStreamingResponse; - } - - private string GetBodyFromStreamingResponse(Response response) - { - Logger.LogTrace("{Method} called", nameof(GetBodyFromStreamingResponse)); - - // default to the whole body - var bodyString = response.BodyString; - - var chunks = bodyString.Split("\n\n", StringSplitOptions.RemoveEmptyEntries); - if (chunks.Length == 0) - { - Logger.LogDebug("No chunks found in the response body"); - return bodyString; - } - - // check if the last chunk is `data: [DONE]` - var lastChunk = chunks.Last().Trim(); - if (lastChunk.Equals("data: [DONE]", StringComparison.OrdinalIgnoreCase)) - { - // get next to last chunk - var chunk = chunks.Length > 1 ? chunks[^2].Trim() : string.Empty; - if (chunk.StartsWith("data: ", StringComparison.OrdinalIgnoreCase)) - { - // remove the "data: " prefix - bodyString = chunk["data: ".Length..].Trim(); - Logger.LogDebug("Last chunk starts with 'data: ', using the last chunk as the body: {BodyString}", bodyString); - } - else - { - Logger.LogDebug("Last chunk does not start with 'data: ', using the whole body"); - } - } - else - { - Logger.LogDebug("Last chunk is not `data: [DONE]`, using the whole body"); - } - - Logger.LogTrace("{Method} finished", nameof(GetBodyFromStreamingResponse)); - return bodyString; - } - public void Dispose() { _loader?.Dispose(); diff --git a/DevProxy.Plugins/Inspection/OpenAIUsageDebuggingPlugin.cs b/DevProxy.Plugins/Inspection/OpenAIUsageDebuggingPlugin.cs new file mode 100644 index 00000000..d3a3f83f --- /dev/null +++ b/DevProxy.Plugins/Inspection/OpenAIUsageDebuggingPlugin.cs @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using DevProxy.Abstractions.LanguageModel; +using DevProxy.Abstractions.Plugins; +using DevProxy.Abstractions.Proxy; +using DevProxy.Abstractions.Utils; +using DevProxy.Plugins.Utils; +using Microsoft.Extensions.Logging; +using System.Globalization; +using System.Text.Json; +using Titanium.Web.Proxy.Http; + +namespace DevProxy.Plugins.Inspection; + +sealed class UsageRecord +{ + public DateTime Time { get; set; } + public int Status { get; set; } + public string? RetryAfter { get; set; } + public string? Policy { get; set; } + public long? PromptTokens { get; set; } + public long? CompletionTokens { get; set; } + public long? CachedTokens { get; set; } + public long? TotalTokens { get; set; } + public long? RemainingTokens { get; set; } + public long? RemainingRequests { get; set; } + + public override string ToString() + { + return $"{Time:O},{Status},{RetryAfter},{Policy},{PromptTokens},{CompletionTokens},{CachedTokens},{TotalTokens},{RemainingTokens},{RemainingRequests}"; + } + + public static string GetHeader() + { + return "time,status,retry-after,policy,prompt tokens,completion tokens,cached tokens,total tokens,remaining tokens,remaining requests"; + } +} + +public sealed class OpenAIUsageDebuggingPlugin( + ILogger logger, + ISet urlsToWatch) : + BaseReportingPlugin( + logger, + urlsToWatch) +{ + public override string Name => nameof(OpenAIUsageDebuggingPlugin); + + private readonly string outputFileName = $"devproxy_llmusage_{DateTime.Now:yyyyMMddHHmmss}.csv"; + + public override async Task InitializeAsync(InitArgs e, CancellationToken cancellationToken) + { + Logger.LogTrace("{Method} called", nameof(InitializeAsync)); + + ArgumentNullException.ThrowIfNull(e); + + if (!File.Exists(outputFileName)) + { + await File.AppendAllLinesAsync(outputFileName, [UsageRecord.GetHeader()], cancellationToken); + } + + Logger.LogTrace("{Method} finished", nameof(InitializeAsync)); + } + + public override async Task AfterResponseAsync(ProxyResponseArgs e, CancellationToken cancellationToken) + { + Logger.LogTrace("{Method} called", nameof(AfterResponseAsync)); + + ArgumentNullException.ThrowIfNull(e); + + if (!e.HasRequestUrlMatch(UrlsToWatch)) + { + Logger.LogRequest("URL not matched", MessageType.Skipped, new LoggingContext(e.Session)); + return; + } + + var request = e.Session.HttpClient.Request; + if (request.Method is null || + !request.Method.Equals("POST", StringComparison.OrdinalIgnoreCase) || + !request.HasBody) + { + Logger.LogRequest("Request is not a POST request with a body", MessageType.Skipped, new(e.Session)); + return; + } + + if (!OpenAIRequest.TryGetOpenAIRequest(request.BodyString, Logger, out var openAiRequest) || openAiRequest is null) + { + Logger.LogRequest("Skipping non-OpenAI request", MessageType.Skipped, new(e.Session)); + return; + } + + var response = e.Session.HttpClient.Response; + var bodyString = response.BodyString; + if (HttpUtils.IsStreamingResponse(response, Logger)) + { + bodyString = HttpUtils.GetBodyFromStreamingResponse(response, Logger); + } + + var usage = new UsageRecord + { + Time = DateTime.TryParse( + e.Session.HttpClient.Response.Headers.FirstOrDefault(h => h.Name.Equals("date", StringComparison.OrdinalIgnoreCase))?.Value, + CultureInfo.InvariantCulture, + DateTimeStyles.None, + out var parsedDate) + ? parsedDate + : DateTime.Now, + Status = e.Session.HttpClient.Response.StatusCode + }; + +#pragma warning disable IDE0010 + switch (response.StatusCode) +#pragma warning restore IDE0010 + { + case int code when code is >= 200 and < 300: + ProcessSuccessResponse(bodyString, usage, e); + break; + case int code when code >= 400: + ProcessErrorResponse(usage, e); + break; + } + + await File.AppendAllLinesAsync(outputFileName, [usage.ToString()], cancellationToken); + Logger.LogRequest("Processed OpenAI request", MessageType.Processed, new(e.Session)); + + Logger.LogTrace("Left {Name}", nameof(AfterResponseAsync)); + } + + private void ProcessSuccessResponse(string responseBody, UsageRecord usage, ProxyResponseArgs e) + { + Logger.LogTrace("{Method} called", nameof(ProcessSuccessResponse)); + + var oaiResponse = JsonSerializer.Deserialize(responseBody, ProxyUtils.JsonSerializerOptions); + if (oaiResponse is null) + { + return; + } + + var response = e.Session.HttpClient.Response; + + usage.PromptTokens = oaiResponse.Usage?.PromptTokens; + usage.CompletionTokens = oaiResponse.Usage?.CompletionTokens; + usage.CachedTokens = oaiResponse.Usage?.PromptTokensDetails?.CachedTokens; + usage.TotalTokens = oaiResponse.Usage?.TotalTokens; + usage.RemainingTokens = TryParseHeaderAsLong(response, "x-ratelimit-remaining-tokens", out var remainingTokens) ? remainingTokens : null; + usage.RemainingRequests = TryParseHeaderAsLong(response, "x-ratelimit-remaining-requests", out var remainingRequests) ? remainingRequests : null; + + Logger.LogTrace("Left {Name}", nameof(ProcessSuccessResponse)); + } + private void ProcessErrorResponse(UsageRecord usage, ProxyResponseArgs e) + { + Logger.LogTrace("{Method} called", nameof(ProcessErrorResponse)); + + var response = e.Session.HttpClient.Response; + + usage.RetryAfter = response.Headers.FirstOrDefault(h => h.Name.Equals("retry-after", StringComparison.OrdinalIgnoreCase))?.Value; + usage.Policy = response.Headers.FirstOrDefault(h => h.Name.Equals("policy-id", StringComparison.OrdinalIgnoreCase))?.Value; + + Logger.LogTrace("Left {Name}", nameof(ProcessErrorResponse)); + } + + private static bool TryParseHeaderAsLong(Response response, string headerName, out long? value) + { + value = null; + var header = response.Headers.FirstOrDefault(h => h.Name.Equals(headerName, StringComparison.OrdinalIgnoreCase))?.Value; + + if (header is not null && long.TryParse(header, out var parsedValue)) + { + value = parsedValue; + return true; + } + + return false; + } +} \ No newline at end of file diff --git a/DevProxy.Plugins/Utils/HttpUtils.cs b/DevProxy.Plugins/Utils/HttpUtils.cs new file mode 100644 index 00000000..14b9b37a --- /dev/null +++ b/DevProxy.Plugins/Utils/HttpUtils.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.Extensions.Logging; +using Titanium.Web.Proxy.Http; + +namespace DevProxy.Plugins.Utils; + +internal sealed class HttpUtils +{ + public static string GetBodyFromStreamingResponse(Response response, ILogger logger) + { + logger.LogTrace("{Method} called", nameof(GetBodyFromStreamingResponse)); + + ArgumentNullException.ThrowIfNull(response); + + // default to the whole body + var bodyString = response.BodyString; + + var chunks = bodyString.Split("\n\n", StringSplitOptions.RemoveEmptyEntries); + if (chunks.Length == 0) + { + logger.LogDebug("No chunks found in the response body"); + return bodyString; + } + + // check if the last chunk is `data: [DONE]` + var lastChunk = chunks.Last().Trim(); + if (lastChunk.Equals("data: [DONE]", StringComparison.OrdinalIgnoreCase)) + { + // get next to last chunk + var chunk = chunks.Length > 1 ? chunks[^2].Trim() : string.Empty; + if (chunk.StartsWith("data: ", StringComparison.OrdinalIgnoreCase)) + { + // remove the "data: " prefix + bodyString = chunk["data: ".Length..].Trim(); + logger.LogDebug("Last chunk starts with 'data: ', using the last chunk as the body: {BodyString}", bodyString); + } + else + { + logger.LogDebug("Last chunk does not start with 'data: ', using the whole body"); + } + } + else + { + logger.LogDebug("Last chunk is not `data: [DONE]`, using the whole body"); + } + + logger.LogTrace("{Method} finished", nameof(GetBodyFromStreamingResponse)); + return bodyString; + } + + public static bool IsStreamingResponse(Response response, ILogger logger) + { + logger.LogTrace("{Method} called", nameof(IsStreamingResponse)); + var contentType = response.Headers.FirstOrDefault(h => h.Name.Equals("content-type", StringComparison.OrdinalIgnoreCase))?.Value; + if (string.IsNullOrEmpty(contentType)) + { + logger.LogDebug("No content-type header found"); + return false; + } + + var isStreamingResponse = contentType.Contains("text/event-stream", StringComparison.OrdinalIgnoreCase); + logger.LogDebug("IsStreamingResponse: {IsStreamingResponse}", isStreamingResponse); + + logger.LogTrace("{Method} finished", nameof(IsStreamingResponse)); + return isStreamingResponse; + } +} \ No newline at end of file