# Setup
To setup this whole Jupyter-Notebook, the following code needs to be executed. It imports the general libraries and sets the utils classes.

In [None]:
import time
import sys
import json
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from utils.logger_handler import Logger

SENDING_TO_RECAPP = False
SENDING_TO_WHISPER = False
SENDING_TO_SPEECHBRAIN = False
SENDING_TO_VOSK = True

if (SENDING_TO_RECAPP): 
    from technologies.stt.recapp.recapp import TTSRecapp
    # Initialize TTSRecapp Instance
    recapp_inst = TTSRecapp()
if (SENDING_TO_WHISPER): 
    from technologies.stt.whisper.whisper import TTSWhisper
    # Initialize TTSWhisper Instance
    whisper_inst = TTSWhisper()
if (SENDING_TO_SPEECHBRAIN): 
    from technologies.stt.speechbrain.speechbrain import TTSSpeechBrain
    # Initialize TTSSpeechBrain Instance
    speechbrain_inst = TTSSpeechBrain()
if (SENDING_TO_VOSK): 
    from technologies.stt.vosk.vosk import TTSVosk
    # Initialize TTSVosk Instance
    vosk_inst = TTSVosk()

# Setup Logger
logger = Logger()
original_stdout = sys.stdout  # Save the original stdout
sys.stdout = logger  # Redirect stdout to logger

# Recapp
Sending Requests to Recapp

In [2]:
# Sending all files in "final_audio" folder.
# Important: Make sure only 345 files are inside, and none are longer than 16min
if (SENDING_TO_RECAPP):
    for dialog_file in Path(recapp_inst.getSourceFolderPath()).iterdir():
        if dialog_file.is_file() and dialog_file.suffix.lower() == ".wav":
            print("-----------------------------------")
            print(f"Sending Request for dialog file: {dialog_file}")
            recapp_inst.sendTranscripitionTask(dialog_file, "jobs")
            # Wait 2minutes / 120s before sending text request, to prevent overload on server
            time.sleep(120)
    # Wait 60mins before checking for transkripts
    time.sleep(3600)
    recapp_inst.checkForUpdatesOnServer()
    recapp_inst.checkForPendingTranscriptDownload()

Checking Jobs on Server manually, and then downloading the scripts if there is need for it. Waiting an 10 mins after each execution to let the server process more.

In [3]:
if (SENDING_TO_RECAPP):
    recapp_inst.checkForUpdatesOnServer()

Show all jobs on Recapp server as a diagram to get a quick overview

In [4]:
if (SENDING_TO_RECAPP):
    jobs = recapp_inst.getAllJobsOnServer("jobs")
    data = json.loads(jobs)

    df = pd.DataFrame(data)
    # Select  the 'id', 'status' and 'created_at' columns
    df_selected = df[['id', 'status', 'created_at']]
    
    # Group by 'status' and count elements
    status_counts = df['status'].value_counts()

    # Plot the bar chart
    plt.figure(figsize=(8, 6))
    sns.barplot(x=status_counts.index, y=status_counts.values, hue=status_counts.index, palette="viridis", legend=False)

    # Labels and title
    plt.xlabel('Status')
    plt.ylabel('Count')
    plt.title('Jobs on Recapp Server, grouped by Status')

    # Show the plot
    plt.show()

Once all files have been transcribed, we need to merge them to prepare them for the metrics.

In [5]:
recappDone = False
if (SENDING_TO_RECAPP and recappDone):
    recapp_inst.transferTranscriptsFilesToMongoDB()

# Whisper

In [6]:
if (SENDING_TO_WHISPER):
    # Add models to be used here
    models = ['turbo', 'large', 'medium']
    if torch.cuda.is_available():
        print(f"CUDA available, using GPU: {torch.cuda.get_device_name(0)}")
        device = "cuda"
    else:
        print(f"CUDA not available, using CPU.")
        device = "cpu"
    lng = "de"
    
    # Check if Outputfolder exists
    if not Path(whisper_inst.getOutputDirectory()).exists():
        print(f"Transcription-Folder not found. Creating Folder 'transcription' at {whisper_inst.getOutputDirectory()} with 'whisper' in it.")
        Path(whisper_inst.getOutputDirectory()).mkdir(parents=True, exist_ok=True)
    
    # Start Task for each model  
    for cur_model in models:
        whisper_inst.transcribeFiles(cur_model, device, lng)
        
    # Add Transcriptions to Transcript Collection
    whisper_inst.transferJSONFilesToMongoDB()

# SpeechBrain

In [7]:
if (SENDING_TO_SPEECHBRAIN):
    model = "whisper_rescuespeech"
    if torch.cuda.is_available():
        print(f"CUDA available, using GPU: {torch.cuda.get_device_name(0)}")
        device = "cuda"
    else:
        print(f"CUDA not available, using CPU.")
        device = "cpu"
    lng = "de"
    speechbrain_inst.transcribeFiles(model, device, lng)

# Vosk

In [None]:
if (SENDING_TO_VOSK):
    vosk_inst.transcribeFiles()  
    vosk_inst.transferJSONFilesToMongoDB()

Change the stdout of sys back to the normal value before closing the logger.

In [9]:
sys.stdout = original_stdout
logger.close()