diff --git a/config/pipeline_configs/SpeechmaticsStreamingOrchestrationPipeline.yaml b/config/pipeline_configs/SpeechmaticsStreamingOrchestrationPipeline.yaml new file mode 100644 index 0000000..32ae005 --- /dev/null +++ b/config/pipeline_configs/SpeechmaticsStreamingOrchestrationPipeline.yaml @@ -0,0 +1,9 @@ +SpeechmaticsStreamingOrchestrationPipeline: + pipeline_config: + sample_rate: 16000 + language: "en" + operating_point: "enhanced" + max_delay: 1 + enable_partials: true + enable_diarization: true + diff --git a/config/pipeline_configs/SpeechmaticsStreamingPipeline.yaml b/config/pipeline_configs/SpeechmaticsStreamingPipeline.yaml new file mode 100644 index 0000000..929ca2f --- /dev/null +++ b/config/pipeline_configs/SpeechmaticsStreamingPipeline.yaml @@ -0,0 +1,9 @@ +SpeechmaticsStreamingPipeline: + pipeline_config: + sample_rate: 16000 + language: "en" + operating_point: "enhanced" + max_delay: 1 + enable_partials: true + endpoint_url: "wss://eu2.rt.speechmatics.com/v2" + diff --git a/pyproject.toml b/pyproject.toml index 936c0ab..4c4aead 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "texterrors==0.5.1", "nemo-toolkit[asr]>=2.5.0", "openai>=2.7.1", + "speechmatics-python>=5.0.0", ] [project.scripts] diff --git a/src/openbench/metric/word_error_metrics/word_error_metrics.py b/src/openbench/metric/word_error_metrics/word_error_metrics.py index 5b45d75..9935d47 100644 --- a/src/openbench/metric/word_error_metrics/word_error_metrics.py +++ b/src/openbench/metric/word_error_metrics/word_error_metrics.py @@ -296,10 +296,8 @@ def compute_metric(self, detail: Details) -> float: return (S + D + I) / N if N > 0 else 0.0 -@MetricRegistry.register_metric( - (PipelineType.ORCHESTRATION, PipelineType.STREAMING_DIARIZATION), - MetricOptions.CPWER, -) +@MetricRegistry.register_metric(PipelineType.ORCHESTRATION, + MetricOptions.CPWER) class ConcatenatedMinimumPermutationWER(BaseWordErrorMetric): """Concatenated minimum-Permutation Word Error Rate (cpWER) implementation. diff --git a/src/openbench/pipeline/orchestration/__init__.py b/src/openbench/pipeline/orchestration/__init__.py index c450180..7150196 100644 --- a/src/openbench/pipeline/orchestration/__init__.py +++ b/src/openbench/pipeline/orchestration/__init__.py @@ -1,13 +1,26 @@ # For licensing see accompanying LICENSE.md file. # Copyright (C) 2025 Argmax, Inc. All Rights Reserved. -from .orchestration_deepgram import DeepgramOrchestrationPipeline, DeepgramOrchestrationPipelineConfig +from .orchestration_deepgram import ( + DeepgramOrchestrationPipeline, + DeepgramOrchestrationPipelineConfig, +) from .orchestration_deepgram_streaming import ( DeepgramStreamingOrchestrationPipeline, DeepgramStreamingOrchestrationPipelineConfig, ) -from .orchestration_openai import OpenAIOrchestrationPipeline, OpenAIOrchestrationPipelineConfig -from .orchestration_whisperkitpro import WhisperKitProOrchestrationConfig, WhisperKitProOrchestrationPipeline +from .orchestration_openai import ( + OpenAIOrchestrationPipeline, + OpenAIOrchestrationPipelineConfig, +) +from .orchestration_speechmatics_streaming import ( + SpeechmaticsStreamingOrchestrationPipeline, + SpeechmaticsStreamingOrchestrationPipelineConfig, +) +from .orchestration_whisperkitpro import ( + WhisperKitProOrchestrationConfig, + WhisperKitProOrchestrationPipeline, +) from .whisperx import WhisperXPipeline, WhisperXPipelineConfig @@ -16,6 +29,8 @@ "DeepgramOrchestrationPipelineConfig", "DeepgramStreamingOrchestrationPipeline", "DeepgramStreamingOrchestrationPipelineConfig", + "SpeechmaticsStreamingOrchestrationPipeline", + "SpeechmaticsStreamingOrchestrationPipelineConfig", "WhisperXPipeline", "WhisperXPipelineConfig", "WhisperKitProOrchestrationPipeline", diff --git a/src/openbench/pipeline/orchestration/orchestration_speechmatics_streaming.py b/src/openbench/pipeline/orchestration/orchestration_speechmatics_streaming.py new file mode 100644 index 0000000..bf8c58d --- /dev/null +++ b/src/openbench/pipeline/orchestration/orchestration_speechmatics_streaming.py @@ -0,0 +1,115 @@ +# For licensing see accompanying LICENSE.md file. +# Copyright (C) 2025 Argmax, Inc. All Rights Reserved. + +import numpy as np +from pydantic import Field + +from ...dataset import OrchestrationSample +from ...pipeline import Pipeline, PipelineConfig, register_pipeline +from ...pipeline_prediction import Transcript, Word +from ...types import PipelineType +from ..streaming_transcription.speechmatics import SpeechmaticsApi +from .common import OrchestrationOutput + + +class SpeechmaticsStreamingOrchestrationPipelineConfig(PipelineConfig): + sample_rate: int = Field( + default=16000, + description="Sample rate of the audio" + ) + language: str = Field( + default="en", + description="Language code for transcription" + ) + operating_point: str = Field( + default="enhanced", + description="Operating point (standard or enhanced)" + ) + max_delay: int = Field( + default=1, + description="Maximum delay in seconds" + ) + enable_partials: bool = Field( + default=True, + description="Enable partial transcripts" + ) + enable_diarization: bool = Field( + default=True, + description="Whether to enable speaker diarization" + ) + + +@register_pipeline +class SpeechmaticsStreamingOrchestrationPipeline(Pipeline): + _config_class = SpeechmaticsStreamingOrchestrationPipelineConfig + pipeline_type = PipelineType.ORCHESTRATION + + def build_pipeline(self): + """Build Speechmatics streaming API with diarization.""" + # Create a modified config for the streaming API + from types import SimpleNamespace + + api_config = SimpleNamespace( + sample_rate=self.config.sample_rate, + language=self.config.language, + operating_point=self.config.operating_point, + max_delay=self.config.max_delay, + enable_partials=self.config.enable_partials, + enable_diarization=self.config.enable_diarization, + ) + + pipeline = SpeechmaticsApi(api_config) + return pipeline + + def parse_input(self, input_sample: OrchestrationSample): + """Convert audio waveform to bytes for streaming.""" + y = input_sample.waveform + y_int16 = (y * 32767).astype(np.int16) + audio_data_byte = y_int16.tobytes() + return audio_data_byte + + def parse_output(self, output) -> OrchestrationOutput: + """Parse output to extract transcription and diarization.""" + # Extract words with speaker info if diarization enabled + words = [] + + if ( + "words_with_speakers" in output and + output["words_with_speakers"] + ): + # This comes from diarization-enabled streaming + for word_info in output["words_with_speakers"]: + words.append(Word( + word=word_info.get("word", ""), + start=word_info.get("start"), + end=word_info.get("end"), + speaker=word_info.get("speaker"), + )) + elif ( + "model_timestamps_confirmed" in output and + output["model_timestamps_confirmed"] + ): + # Fallback to regular transcription without speaker + transcript_words = output.get("transcript", "").split() + timestamp_idx = 0 + + for timestamp_group in output["model_timestamps_confirmed"]: + for word_info in timestamp_group: + if timestamp_idx < len(transcript_words): + words.append(Word( + word=transcript_words[timestamp_idx], + start=word_info.get("start"), + end=word_info.get("end"), + speaker=None, + )) + timestamp_idx += 1 + + # Create final transcript with speaker-attributed words + transcript = Transcript(words=words) + + return OrchestrationOutput( + prediction=transcript, + transcription_output=None, + diarization_output=None, + ) + diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py index 10c22c5..a098584 100644 --- a/src/openbench/pipeline/pipeline_aliases.py +++ b/src/openbench/pipeline/pipeline_aliases.py @@ -18,6 +18,7 @@ DeepgramOrchestrationPipeline, DeepgramStreamingOrchestrationPipeline, OpenAIOrchestrationPipeline, + SpeechmaticsStreamingOrchestrationPipeline, WhisperKitProOrchestrationPipeline, WhisperXPipeline, ) @@ -28,6 +29,7 @@ FireworksStreamingPipeline, GladiaStreamingPipeline, OpenAIStreamingPipeline, + SpeechmaticsStreamingPipeline, ) from .transcription import ( AssemblyAITranscriptionPipeline, @@ -186,6 +188,25 @@ def register_pipeline_aliases() -> None: "Deepgram streaming orchestration pipeline with diarization enabled." ), ) + + PipelineRegistry.register_alias( + "speechmatics-streaming-orchestration", + SpeechmaticsStreamingOrchestrationPipeline, + default_config={ + "sample_rate": 16000, + "language": "en", + "operating_point": "enhanced", + "max_delay": 1, + "enable_partials": True, + "enable_diarization": True, + }, + description=( + "Speechmatics streaming orchestration pipeline with " + "diarization. Requires API key from " + "https://www.speechmatics.com/. Set " + "`SPEECHMATICS_API_KEY` env var." + ), + ) PipelineRegistry.register_alias( "whisperkitpro-orchestration-tiny", @@ -684,5 +705,23 @@ def register_pipeline_aliases() -> None: description="AssemblyAI streaming transcription pipeline. Requires API key from https://www.assemblyai.com/. Set `ASSEMBLYAI_API_KEY` env var.", ) + PipelineRegistry.register_alias( + "speechmatics-streaming", + SpeechmaticsStreamingPipeline, + default_config={ + "sample_rate": 16000, + "language": "en", + "operating_point": "enhanced", + "max_delay": 1, + "enable_partials": True, + "endpoint_url": "wss://eu2.rt.speechmatics.com/v2", + }, + description=( + "Speechmatics streaming transcription pipeline. " + "Requires API key from https://www.speechmatics.com/. " + "Set `SPEECHMATICS_API_KEY` env var." + ), + ) + register_pipeline_aliases() diff --git a/src/openbench/pipeline/streaming_transcription/__init__.py b/src/openbench/pipeline/streaming_transcription/__init__.py index c152587..d8b5dc7 100644 --- a/src/openbench/pipeline/streaming_transcription/__init__.py +++ b/src/openbench/pipeline/streaming_transcription/__init__.py @@ -1,8 +1,27 @@ # For licensing see accompanying LICENSE.md file. # Copyright (C) 2025 Argmax, Inc. All Rights Reserved. -from .assemblyai import AssemblyAIStreamingPipeline, AssemblyAIStreamingPipelineConfig -from .deepgram import DeepgramStreamingPipeline, DeepgramStreamingPipelineConfig -from .fireworks import FireworksStreamingPipeline, FireworksStreamingPipelineConfig -from .gladia import GladiaStreamingPipeline, GladiaStreamingPipelineConfig -from .openai import OpenAIStreamingPipeline, OpenAIStreamingPipelineConfig +from .assemblyai import ( + AssemblyAIStreamingPipeline, + AssemblyAIStreamingPipelineConfig, +) +from .deepgram import ( + DeepgramStreamingPipeline, + DeepgramStreamingPipelineConfig, +) +from .fireworks import ( + FireworksStreamingPipeline, + FireworksStreamingPipelineConfig, +) +from .gladia import ( + GladiaStreamingPipeline, + GladiaStreamingPipelineConfig, +) +from .openai import ( + OpenAIStreamingPipeline, + OpenAIStreamingPipelineConfig, +) +from .speechmatics import ( + SpeechmaticsStreamingPipeline, + SpeechmaticsStreamingPipelineConfig, +) diff --git a/src/openbench/pipeline/streaming_transcription/speechmatics.py b/src/openbench/pipeline/streaming_transcription/speechmatics.py new file mode 100644 index 0000000..b513796 --- /dev/null +++ b/src/openbench/pipeline/streaming_transcription/speechmatics.py @@ -0,0 +1,271 @@ +# For licensing see accompanying LICENSE.md file. +# Copyright (C) 2025 Argmax, Inc. All Rights Reserved. + +import io +import os + +import numpy as np +import speechmatics +from argmaxtools.utils import get_logger +from pydantic import Field +from speechmatics.models import ServerMessageType + +from openbench.dataset import StreamingSample + +from ...pipeline import Pipeline, register_pipeline +from ...pipeline_prediction import StreamingTranscript +from ...types import PipelineType +from .common import StreamingTranscriptionConfig, StreamingTranscriptionOutput + + +logger = get_logger(__name__) + + +class SpeechmaticsApi: + def __init__(self, cfg) -> None: + self.api_key = os.getenv("SPEECHMATICS_API_KEY") + assert ( + self.api_key is not None + ), "Please set SPEECHMATICS_API_KEY in environment" + self.language = getattr(cfg, 'language', 'en') + self.operating_point = getattr(cfg, 'operating_point', 'enhanced') + self.max_delay = getattr(cfg, 'max_delay', 1) + self.enable_partials = getattr(cfg, 'enable_partials', True) + self.sample_rate = cfg.sample_rate + self.connection_url = os.getenv( + "SPEECHMATICS_URL", "wss://eu2.rt.speechmatics.com/v2" + ) + self.enable_diarization = getattr( + cfg, 'enable_diarization', False + ) + + def __call__(self, sample): + # Sample must be in bytes (raw audio data) + transcript = "" + interim_transcripts = [] + audio_cursor_l = [] + confirmed_interim_transcripts = [] + confirmed_audio_cursor_l = [] + model_timestamps_hypothesis = [] + model_timestamps_confirmed = [] + words_with_speakers = [] + + # Create audio cursor tracker + audio_cursor = [0.0] + + # Create a transcription client + ws = speechmatics.client.WebsocketClient( + speechmatics.models.ConnectionSettings( + url=self.connection_url, + auth_token=self.api_key, + ) + ) + + # Define event handler for partial transcripts + def handle_partial_transcript(msg): + nonlocal interim_transcripts, audio_cursor_l + nonlocal model_timestamps_hypothesis + + metadata = msg.get('metadata', {}) + partial_transcript = metadata.get('transcript', '') + + if partial_transcript: + audio_cursor_l.append(audio_cursor[0]) + interim_transcripts.append( + transcript + " " + partial_transcript + ) + + # Collect word timestamps if available + results = msg.get('results', []) + if results: + words = [] + for result in results: + if result.get('type') == 'word': + words.append({ + 'start': result.get('start_time', 0), + 'end': result.get('end_time', 0), + }) + if words: + model_timestamps_hypothesis.append(words) + + logger.debug(f"[partial] {partial_transcript}") + + # Define event handler for full transcripts + def handle_transcript(msg): + nonlocal transcript, confirmed_interim_transcripts + nonlocal confirmed_audio_cursor_l + nonlocal model_timestamps_confirmed, words_with_speakers + + metadata = msg.get('metadata', {}) + full_transcript = metadata.get('transcript', '') + + if full_transcript: + confirmed_audio_cursor_l.append(audio_cursor[0]) + transcript = transcript + " " + full_transcript + confirmed_interim_transcripts.append(transcript) + + # Collect word timestamps and speaker info + results = msg.get('results', []) + if results: + words = [] + for result in results: + if result.get('type') == 'word': + # Get alternatives array + alternatives = result.get('alternatives', []) + if alternatives: + # Take first alternative + alternative = alternatives[0] + + word_data = { + 'start': result.get('start_time', 0), + 'end': result.get('end_time', 0), + } + words.append(word_data) + + # Collect speaker info if diarization + if self.enable_diarization: + speaker_info = alternative.get( + 'speaker', None + ) + word_content = alternative.get( + 'content', '' + ) + if speaker_info is not None: + words_with_speakers.append({ + 'word': word_content, + 'speaker': ( + f"SPEAKER_{speaker_info}" + ), + 'start': result.get( + 'start_time', 0 + ), + 'end': result.get( + 'end_time', 0 + ), + }) + + if words: + model_timestamps_confirmed.append(words) + + logger.debug(f"[FULL] {full_transcript}") + + # Register event handlers + ws.add_event_handler( + event_name=ServerMessageType.AddPartialTranscript, + event_handler=handle_partial_transcript, + ) + + ws.add_event_handler( + event_name=ServerMessageType.AddTranscript, + event_handler=handle_transcript, + ) + + # Audio settings + settings = speechmatics.models.AudioSettings( + sample_rate=self.sample_rate, + encoding='pcm_s16le', + ) + + # Transcription config + conf_dict = { + 'operating_point': self.operating_point, + 'language': self.language, + 'enable_partials': self.enable_partials, + 'max_delay': self.max_delay, + } + + # Enable diarization if requested + if self.enable_diarization: + conf_dict['diarization'] = 'speaker' + + conf = speechmatics.models.TranscriptionConfig(**conf_dict) + + # Create a BytesIO stream from the audio data + audio_stream = io.BytesIO(sample) + + try: + # Run transcription synchronously + ws.run_synchronously(audio_stream, conf, settings) + except Exception as e: + logger.error(f"Speechmatics transcription error: {e}") + raise + + return { + "transcript": transcript.strip(), + "interim_transcripts": interim_transcripts, + "audio_cursor": audio_cursor_l, + "confirmed_interim_transcripts": ( + confirmed_interim_transcripts + ), + "confirmed_audio_cursor": confirmed_audio_cursor_l, + "model_timestamps_hypothesis": ( + model_timestamps_hypothesis + ), + "model_timestamps_confirmed": ( + model_timestamps_confirmed + ), + "words_with_speakers": words_with_speakers, + } + + +class SpeechmaticsStreamingPipelineConfig(StreamingTranscriptionConfig): + sample_rate: int = Field( + default=16000, + description="Sample rate of the audio" + ) + language: str = Field( + default="en", + description="Language code for transcription" + ) + operating_point: str = Field( + default="enhanced", + description="Operating point (standard or enhanced)" + ) + max_delay: int = Field( + default=1, + description="Maximum delay in seconds" + ) + enable_partials: bool = Field( + default=True, + description="Enable partial transcripts" + ) + + +@register_pipeline +class SpeechmaticsStreamingPipeline(Pipeline): + _config_class = SpeechmaticsStreamingPipelineConfig + pipeline_type = PipelineType.STREAMING_TRANSCRIPTION + + def parse_input(self, input_sample: StreamingSample): + y = input_sample.waveform + y_int16 = (y * 32767).astype(np.int16) + audio_data_byte = y_int16.tobytes() + return audio_data_byte + + def parse_output( + self, output + ) -> StreamingTranscriptionOutput: + model_timestamps_hypothesis = ( + output["model_timestamps_hypothesis"] + ) + model_timestamps_confirmed = ( + output["model_timestamps_confirmed"] + ) + + prediction = StreamingTranscript( + transcript=output["transcript"], + audio_cursor=output["audio_cursor"], + interim_results=output["interim_transcripts"], + confirmed_audio_cursor=output["confirmed_audio_cursor"], + confirmed_interim_results=( + output["confirmed_interim_transcripts"] + ), + model_timestamps_hypothesis=model_timestamps_hypothesis, + model_timestamps_confirmed=model_timestamps_confirmed, + ) + + return StreamingTranscriptionOutput(prediction=prediction) + + def build_pipeline(self): + pipeline = SpeechmaticsApi(self.config) + return pipeline diff --git a/uv.lock b/uv.lock index b1124be..7e7dc3a 100644 --- a/uv.lock +++ b/uv.lock @@ -1346,6 +1346,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hdbscan" version = "0.8.40" @@ -1387,6 +1400,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/d3/0aaf279f4f3dea58e99401b92c31c0f752924ba0e6c7d7bb07b1dbd7f35e/hf_xet-1.1.8-cp37-abi3-win_amd64.whl", hash = "sha256:4171f31d87b13da4af1ed86c98cf763292e4720c088b4957cf9d564f92904ca9", size = 2801689, upload-time = "2025-08-18T22:01:04.81Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1415,6 +1437,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + [[package]] name = "huggingface-hub" version = "0.34.4" @@ -1460,6 +1487,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547, upload-time = "2023-02-23T18:33:40.801Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "hyperpyyaml" version = "1.2.2" @@ -3008,6 +3044,7 @@ dependencies = [ { name = "rich" }, { name = "scikit-learn" }, { name = "speechbrain" }, + { name = "speechmatics-python" }, { name = "texterrors" }, { name = "torch" }, { name = "typer" }, @@ -3059,6 +3096,7 @@ requires-dist = [ { name = "rich", specifier = ">=13.0.0,<14" }, { name = "scikit-learn", specifier = "==1.5.1" }, { name = "speechbrain", specifier = "==1.0.2" }, + { name = "speechmatics-python", specifier = ">=5.0.0" }, { name = "texterrors", specifier = "==0.5.1" }, { name = "torch", specifier = "==2.8" }, { name = "typer", specifier = ">=0.16.0" }, @@ -3291,6 +3329,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polling2" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/9d/6a560ab95e1b92dfce97321d8ffc9f20d352fa4b12a91525d4c575df1c74/polling2-0.5.0.tar.gz", hash = "sha256:90b7da82cf7adbb48029724d3546af93f21ab6e592ec37c8c4619aedd010e342", size = 6549, upload-time = "2021-07-19T18:06:54.951Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/de/e5bf2556ebd6db12590788207575c7c75b1de62f5ddc8b4916b668e04e6b/polling2-0.5.0-py2.py3-none-any.whl", hash = "sha256:ad86d56fbd7502f0856cac2d0109d595c18fa6c7fb12c88cee5e5d16c17286c1", size = 6431, upload-time = "2021-07-19T18:06:53.681Z" }, +] + [[package]] name = "pooch" version = "1.8.2" @@ -4583,6 +4630,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/74/b63579a8f2bd0934a53ae13c10cc20539473cb29dbb911eefce88b59b43d/speechbrain-1.0.2-py3-none-any.whl", hash = "sha256:fe5328554c28bc8fe8bfef355144ee9de5cf569b9706cee2267e19c99b092578", size = 824842, upload-time = "2024-10-30T18:31:32.191Z" }, ] +[[package]] +name = "speechmatics-python" +version = "5.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", extra = ["http2"] }, + { name = "polling2" }, + { name = "tenacity" }, + { name = "toml" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/fe/baa7da879f0b43d941cb9092bf68a8d92dbb9325a7aa614715ae8f0b0b32/speechmatics_python-5.0.0.tar.gz", hash = "sha256:bc8724aff604b13b00a83f7b4bd3ebc68e9f6c4ab713b3fb675b1d5bb0870053", size = 133087, upload-time = "2025-08-14T15:15:33.334Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/80/7e2a698437effd75be73e442b0e0a669e66c48d302bc318406a5bcef5345/speechmatics_python-5.0.0-py3-none-any.whl", hash = "sha256:9d42bf40e452dc20c1f15c9a16b712b2ab2e6c0585ed6943c2d0753269faa0da", size = 132185, upload-time = "2025-08-14T15:15:32.027Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.43" @@ -4657,11 +4720,11 @@ wheels = [ [[package]] name = "tenacity" -version = "9.1.2" +version = "8.2.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +sdist = { url = "https://files.pythonhosted.org/packages/89/3c/253e1627262373784bf9355db9d6f20d2d8831d79f91e9cca48050cddcc2/tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a", size = 40651, upload-time = "2023-08-14T13:22:50.869Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f1/990741d5bb2487d529d20a433210ffa136a367751e454214013b441c4575/tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c", size = 24401, upload-time = "2023-08-14T13:22:49.265Z" }, ] [[package]] @@ -4814,6 +4877,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, +] + [[package]] name = "tomli" version = "2.2.1"