From 31e55543820394dc0ded16372bb3b4f9ca0ac6cf Mon Sep 17 00:00:00 2001 From: Fesenko-A <111450737+Fesenko-A@users.noreply.github.com> Date: Thu, 14 May 2026 11:39:30 +0300 Subject: [PATCH 1/2] update TranscriptionResponse output to list of words and segments instead of JSONs --- Apps.OpenAI/Actions/AudioActions.cs | 9 ++++----- Apps.OpenAI/Apps.OpenAI.csproj | 2 +- .../Responses/Audio/TranscriptionResponse.cs | 11 ++++++----- Tests.OpenAI/AudioServiceTests.cs | 17 +++++------------ 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/Apps.OpenAI/Actions/AudioActions.cs b/Apps.OpenAI/Actions/AudioActions.cs index bee9c58..8a29f82 100644 --- a/Apps.OpenAI/Actions/AudioActions.cs +++ b/Apps.OpenAI/Actions/AudioActions.cs @@ -12,7 +12,6 @@ using Blackbird.Applications.Sdk.Common.Invocation; using Blackbird.Applications.Sdk.Utils.Extensions.Files; using Blackbird.Applications.SDK.Extensions.FileManagement.Interfaces; -using Newtonsoft.Json; using RestSharp; using System; using System.Collections.Generic; @@ -77,14 +76,14 @@ public async Task CreateTranscription( } var response = await UniversalClient.ExecuteWithErrorHandling(request); - var words = response.Words?.Select(x => new WordResponse(x)).ToList() ?? new List(); - var segments = response.Segments?.Select(x => new SegmentResponse(x)).ToList() ?? new List(); + var words = response.Words?.Select(x => new WordResponse(x)).ToList() ?? []; + var segments = response.Segments?.Select(x => new SegmentResponse(x)).ToList() ?? []; return new() { Transcription = BuildTranscription(response, isDiarizationModel), - Words = JsonConvert.SerializeObject(words), - Segments = JsonConvert.SerializeObject(segments) + Words = words, + Segments = segments }; static string GetResponseFormat(string model) => model switch diff --git a/Apps.OpenAI/Apps.OpenAI.csproj b/Apps.OpenAI/Apps.OpenAI.csproj index 604aebd..470f43b 100644 --- a/Apps.OpenAI/Apps.OpenAI.csproj +++ b/Apps.OpenAI/Apps.OpenAI.csproj @@ -4,7 +4,7 @@ net8.0 OpenAI Creating safe artificial general intelligence that benefits all of humanity - 2.8.42 + 2.8.43 Apps.OpenAI diff --git a/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs b/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs index 955cd32..76443c5 100644 --- a/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs +++ b/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs @@ -1,4 +1,5 @@ -using Apps.OpenAI.Dtos; +using System.Collections.Generic; +using Apps.OpenAI.Dtos; using Blackbird.Applications.Sdk.Common; namespace Apps.OpenAI.Models.Responses.Audio; @@ -8,13 +9,13 @@ public class TranscriptionResponse public string Transcription { get; set; } [Display("Words (serialized)")] - public string Words { get; set; } + public List Words { get; set; } [Display("Segments (serialized)")] - public string Segments { get; set; } + public List Segments { get; set; } } -public record WordResponse(WordDto dto) +public class WordResponse(WordDto dto) { public string Word { get; set; } = dto.Word; @@ -23,7 +24,7 @@ public record WordResponse(WordDto dto) public double End { get; set; } = dto.End; } -public record SegmentResponse(SegmentDto dto) +public class SegmentResponse(SegmentDto dto) { [Display("Segment ID")] public string Id { get; set; } = dto.Id; diff --git a/Tests.OpenAI/AudioServiceTests.cs b/Tests.OpenAI/AudioServiceTests.cs index f7e42d3..5b789fb 100644 --- a/Tests.OpenAI/AudioServiceTests.cs +++ b/Tests.OpenAI/AudioServiceTests.cs @@ -7,35 +7,30 @@ using Blackbird.Applications.Sdk.Common.Files; using Blackbird.Applications.Sdk.Common.Exceptions; using Blackbird.Applications.Sdk.Common.Invocation; -using Newtonsoft.Json.Linq; namespace Tests.OpenAI; [TestClass] public class AudioServiceTests : TestBaseWithContext { - [TestMethod, ContextDataSource(ConnectionTypes.OpenAiEmbedded, ConnectionTypes.OpenAi)] - public async Task CreateTranscription_OpenAi_ReturnsTranscription_DiarizedJsonFormat(InvocationContext context) + [TestMethod, ContextDataSource(ConnectionTypes.OpenAi)] + public async Task CreateTranscription_OpenAi_ReturnsTranscription(InvocationContext context) { // Arrange var handler = new AudioActions(context, FileManagementClient); var model = new AudioModelIdentifier { ModelId = "gpt-4o-transcribe-diarize" }; var request = new TranscriptionRequest { - File = new FileReference { Name = "Transcription sample short.mp3" }, - Language = "pt", + File = new FileReference { Name = "tts delorean.mp3" }, + Language = "en", }; // Act var result = await handler.CreateTranscription(model, request); - var segments = JArray.Parse(result.Segments); // Assert - TestContext.WriteLine(result.Transcription); - TestContext.WriteLine(result.Segments); + PrintResult(result); Assert.IsNotNull(result); - Assert.IsTrue(segments.Count > 0); - Assert.IsTrue(segments.Any(x => x["Speaker"] != null)); } [TestMethod, ContextDataSource(ConnectionTypes.OpenAiEmbedded, ConnectionTypes.OpenAi)] @@ -75,7 +70,6 @@ public async Task CreateTranscription_OpenAi_StandardModel_ReturnsSingleBlobText // Assert TestContext.WriteLine(result.Transcription); - TestContext.WriteLine(result.Segments); Assert.IsNotNull(result); Assert.IsFalse(result.Transcription.Contains("A:")); } @@ -96,7 +90,6 @@ public async Task CreateTranscription_AzureOpenAi_ReturnsTranscription(Invocatio // Assert TestContext.WriteLine(result.Transcription); - TestContext.WriteLine(result.Segments); Assert.IsNotNull(result); } From 027504eafbd410f8552570aa8c86da19f0d75878 Mon Sep 17 00:00:00 2001 From: Fesenko-A <111450737+Fesenko-A@users.noreply.github.com> Date: Thu, 14 May 2026 11:45:07 +0300 Subject: [PATCH 2/2] update TranscriptionResponse display names --- Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs b/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs index 76443c5..bc47c58 100644 --- a/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs +++ b/Apps.OpenAI/Models/Responses/Audio/TranscriptionResponse.cs @@ -8,10 +8,10 @@ public class TranscriptionResponse { public string Transcription { get; set; } - [Display("Words (serialized)")] + [Display("Words")] public List Words { get; set; } - [Display("Segments (serialized)")] + [Display("Segments")] public List Segments { get; set; } }