<a href="https://colab.research.google.com/github/nsmq-ai/nsmqai/blob/kojomensahonums-add-stt-inference-notebook/STT_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import and install the required libraries

%%capture
!pip install git+https://github.com/openai/whisper.git
!pip install jiwer
!pip install tabulate
!pip install pydub
import torch
import numpy as np
import whisper
import jiwer
import time
import pandas as pd
from tabulate import tabulate
from pydub import AudioSegment
import os

In [None]:
# Load whisper model

%%capture
whisper.load_model("medium.en")


In [None]:
torch.cuda.is_available()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

model = whisper.load_model("medium.en", device = DEVICE) # Select whisper model size (tiny, base, small, medium, large)
# print(
#       f"Model is {'multilingual ' if model.is_multilingual else 'English only'}"
#       f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."

def transcribe(path_to_audio):

  # Load audio
  audio = whisper.load_audio(path_to_audio)

  # Measure start time
  #start = time.time()

  # Transcribe audio
  result = model.transcribe(audio)

  # Measure duration of transcription time
  #transcription_time = time.time()-start
  #print(f"The transcription time is {transcription_time} seconds")

  # Print transcript
  return result["text"]

In [None]:
# Install required libraries for web api
!pip -q install fastapi
!pip -q install pyngrok
!pip -q install uvicorn
!pip -q install nest_asyncio
!pip -q install python-multipart

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m681.2/681.2 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Import libraries
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pyngrok import ngrok
from pydantic import BaseModel
import nest_asyncio

import shutil

nest_asyncio.apply()

In [None]:
# Import models for serialisation/ deserialisation
from pydantic import BaseModel
import base64
import io
import wave


class AudioBytes(BaseModel):
  data: bytes
  filename: str


app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

@app.get("/get-transcript")
async def get_transcript(audio: AudioBytes):
  decoded_data = base64.b64decode(audio.data)

  # Write bytes data to a .wav file
  with io.BytesIO(decoded_data) as audio_file:
    with wave.open(audio_file, "wb") as wav:
      wav.setnchannels(1)
      wav.setsampwidth(2)
      wav.setframerate(16000)

      # Write .wav files
      wav.writeframes(decoded_data)

  # Save the audio file with the custom name
    audio_filename = audio.filename
    with open(audio_filename, "wb") as file:
        file.write(decoded_data)

   # Get transcript and delete temporary audio file
  print("audio_filename ",audio_filename)
  transcript = transcribe(audio_filename)
  os.remove(audio_filename)

  return {"transcript": transcript}

@app.get("/")
async def home():
  return {"msg":"Hello from STT."}



In [None]:
# Attach personal token
!ngrok config add-authtoken 2SAgVMxoWLLZzs3vhdokV7YpMKz_3cX2qcBiNSc2CjMKSRqh5

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
# Link to model in API ??
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)
uvicorn.run(app, port=8000)

INFO:     Started server process [1010]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


Public URL: https://63d0-35-236-147-215.ngrok-free.app
INFO:     154.160.22.198:0 - "GET /get-transcript HTTP/1.1" 200 OK
INFO:     154.160.22.198:0 - "GET /get-transcript HTTP/1.1" 200 OK
