## Assembly AI (closed-source ASR)

Prediction using the sdk platform provided for python and the API calls 

In [None]:
% pip install assemblyai

In [None]:
import os
import time
import assemblyai as aai
import json
from utils import names
from dotenv import load_dotenv

load_dotenv()

files = names.get_file_names()

In [None]:
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")

transcriber = aai.Transcriber()

config = aai.TranscriptionConfig(speech_model=aai.SpeechModel.universal, language_code="it")

In [3]:
def assemblyai_transcribe(audio_file_path):
    transcript = transcriber.transcribe(data=audio_file_path, config=config)

    if transcript.status == aai.TranscriptStatus.error:
        print(f"Transcription failed: {transcript.error}")
        exit(1)

    return transcript

In [None]:
def save_transcript_text(file_name, text):
    with open(f"../data/assemblyai/text/{file_name}.txt", "w", encoding="utf-8") as f:
        f.write(text)

def save_json(file_name, json_content):
    with open(f"../data/assemblyai/json/{file_name}.json", "w", encoding="utf-8") as f:
        json.dump(json_content,  f, ensure_ascii=False, indent=2)

def save_automatically_generated_srt(file_name, srt_content):
    with open(f"../data/assemblyai/srt/{file_name}.srt", "w", encoding="utf-8") as f:
        f.write(srt_content)

# Function to build the json from the transcript
def build_json(transcript):
    full_transcript = []
    for sentence in transcript.get_sentences():
        transcript_dict = {}
        transcript_dict["start"] = sentence.start
        transcript_dict["end"] = sentence.end
        transcript_dict["text"] = sentence.text
        transcript_dict["confidence"] = sentence.confidence

        words = []
        for word in sentence.words:
            new_word = {}
            new_word["word"] = word.text
            new_word["start"] = word.start
            new_word["end"] = word.end
            new_word["confidence"] = word.confidence

            words.append(new_word)
    
        transcript_dict["words"] = words

        full_transcript.append(transcript_dict)

    return full_transcript

In [None]:
for file in files:
    print(f"Starting prediction of {file}")
    start = time.time()
    _transcript = assemblyai_transcribe(f"../data/audio/full_audio/{file}.wav")
    end = time.time()

    elapsed_time = end - start

    with open(f"../data/assemblyai/times.txt", "a", encoding="utf-8") as f:
        f.write(f"{file} : {elapsed_time}")

    save_transcript_text(file_name=f"{file}",text=_transcript.text)
    save_automatically_generated_srt(file_name=f"{file}",srt_content=_transcript.export_subtitles_srt())

    json_content = build_json(transcript=_transcript)
    save_json(file_name=f"{file}",json_content=json_content)