# Initial setup

In [None]:
import logging
import json
import time
import os
from os import listdir
from os.path import join, dirname, isfile
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
import threading
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

mypath = r'\\192.9.100.44\grabaciones\CAT\20210427'
myoutput = 'Voicebot'
# listdir(f'{myoutput}')

In [None]:
handler = logging.handlers.WatchedFileHandler(os.environ.get("LOGFILE", f"{myoutput}/debug.log"))
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root = logging.getLogger()
root.setLevel(os.environ.get("LOGLEVEL", "INFO"))
root.addHandler(handler)
logging.info(f"INICIO PROCESO '1.speech_to_text.ipynb'")

In [None]:
onlyfiles = [f for f in listdir(f'{mypath}') if isfile(join(f'{mypath}', f))] # whitout subdirs
logging.info(f"{len(onlyfiles)} files inside '{mypath}'")
audiofiles = [os.path.splitext(filename)[0] for filename in onlyfiles if os.path.splitext(filename)[1]=='.ogg' and 'taisagap' in filename] # taisagap = voicebot
logging.info(f"{len(audiofiles)} audio files .ogg and from 'taisagap' inside '{mypath}'")
logging.info(f"The first 5 files are:")
for file in audiofiles[:5]: logging.info(f"{file}")

In [None]:
authenticator = IAMAuthenticator(os.environ['API_KEY'])
stt_service = SpeechToTextV1(authenticator=authenticator)
stt_service.set_service_url(os.environ['URL'])

model_US_Bb = 'en-US_BroadbandModel'
model_AR_Nb = 'es-AR_NarrowbandModel'
model_CL_Nb = 'es-CL_NarrowbandModel'
model_PE_Nb = 'es-PE_NarrowbandModel'

model_selected = model_CL_Nb
# model_selected = stt_service.get_model(model_AR_Bb)
# print(model_selected.get_result())
# print(json.dumps(model, indent=2))

# Multipart speech recognition

In [None]:
times = []
transcripts = []
len(times)

In [None]:
print('Starting Process STT with IBM API...')
inicio = time.time()
if not os.path.exists(f"{myoutput}/transcripts/json"):
        os.makedirs(f"{myoutput}/transcripts/json")
if not os.path.exists(f"{myoutput}/transcripts/info"):
        os.makedirs(f"{myoutput}/transcripts/info")
for i,audiofile in enumerate(audiofiles[:100]):
    start = time.time()
    with open(f'{mypath}/{audiofile}.ogg','rb') as audio_file:
        try: 
            transcript = stt_service.recognize(
                audio=audio_file,
                content_type='audio/ogg',
                model=model_selected,
                timestamps=True,
                speaker_labels=True,
                smart_formatting=True,
                audio_metrics=True,
                inactivity_timeout=-1,
                word_confidence=True).get_result()
            status = 'OK'
        except: 
            status = 'Error'
    end = time.time()
    tiempo = end-start
    transcripts.append(transcript)
    times.append(tiempo)

    with open(f"{myoutput}/transcripts/json/{audiofile}.json", 'w', encoding='utf8') as json_file:
        json.dump(transcript, json_file, ensure_ascii=False)

    with open(f"{myoutput}/transcripts/info/times.json", 'w', encoding='utf8') as json_file:
        json.dump(times, json_file, ensure_ascii=False)

    print(f'File {i+1:02} completed:  Status: {status}\tStart: {start-inicio:.0f}s\t End: {end-inicio:.0f}s\t Total: {end-start:.0f}s')
print('Process Completed')

In [None]:
print(len(transcripts))
sum(time for time in times)