<a href="https://colab.research.google.com/github/git-pi-e/voicebot_backend/blob/main/Customer_Success_Voicebot_Backend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Backend for Voicebot

## Instructions
- Add your ngrok token as well as HuggingFace token to Google Colab secrets.
- Run all the cells.
- Pick up the ngrok Public URL and use it as the backend url for the deployed frontend (environment variable) or local server (.env.local -> NEXT_PUBLIC_BACKEND_URL)

In [5]:
!pip install fastapi uvicorn transformers
!pip install nest-asyncio pyngrok
!pip install python-multipart
!pip install soundfile pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from fastapi import FastAPI, UploadFile, File, Body, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from transformers import pipeline, AutoModelForSeq2SeqLM
import soundfile as sf
from pydub import AudioSegment
import io
import requests
import uvicorn
import nest_asyncio
from pyngrok import ngrok
import logging
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from google.colab import userdata

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI
app = FastAPI()

# Enable CORS
origins = ["*"]  # Adjust this as needed
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load models
try:
    stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
    logger.info("STT model loaded successfully")
except Exception as e:
    logger.error(f"Error loading STT model: {e}")
    stt_model = None

try:
    llm_model = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    logger.info("LLM model loaded successfully")
except Exception as e:
    logger.error(f"Error loading LLM model: {e}")
    llm_model = None

# try:
#     model = AutoModelForSeq2SeqLM.from_pretrained("parler-tts/parler-tts-mini-v1")
#     tts_model = ParlerTTSPipeline(model=model)
#     logger.info("TTS model loaded successfully")
# except Exception as e:
#     logger.error(f"Error loading TTS model: {e}")
#     tts_model = None

# Utility function to create a retryable session
def create_retryable_session(retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504)):
    session = requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

# Test route to ensure the API is running
@app.get("/")
async def root():
    logger.info("Root endpoint accessed")
    return {"message": "Welcome to the Colab-hosted Voice Interaction API"}

# Transcription route with detailed error handling
@app.post("/transcribe")
async def transcribe_audio(file: UploadFile = File(...)):
    if stt_model is None:
        logger.error("STT model not available")
        raise HTTPException(status_code=500, detail="STT model not available")

    try:
        logger.info("Starting transcription process...")
        logger.info(f"Received file: {file.filename}, Content Type: {file.content_type}")

        # Read the WAV file content
        audio_content = await file.read()
        logger.info(f"Audio content size: {len(audio_content)} bytes")

        # Ensure the audio content is a valid WAV file
        try:
            # Use SoundFile to check the file and read data
            wav_io = io.BytesIO(audio_content)
            wav_io.seek(0)
            data, samplerate = sf.read(wav_io)
        except Exception as file_error:
            logger.error(f"Error reading WAV file: {file_error}")
            raise HTTPException(status_code=400, detail="Invalid WAV file format. Please upload a valid WAV file.")

        # Transcribe audio using the loaded model
        try:
            transcript = stt_model(audio_content)["text"]
            logger.info(f"Transcription result: {transcript}")
            return {"transcript": transcript}
        except ValueError as model_error:
            logger.error(f"Model inference error: {model_error}")
            raise HTTPException(status_code=500, detail="Error during transcription. The audio may be malformed or corrupted.")
        except Exception as unknown_model_error:
            logger.error(f"Unexpected model error: {unknown_model_error}", exc_info=True)
            raise HTTPException(status_code=500, detail="An unexpected error occurred during transcription.")

    except Exception as e:
        logger.error(f"Unhandled exception: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Error transcribing audio: {e}")


# Text generation route with detailed error handling
@app.post("/generate")
async def generate_response(prompt: str = Body(...)):
    if llm_model is None:
        logger.error("LLM model not available")
        raise HTTPException(status_code=500, detail="LLM model not available")
    try:
        logger.info(f"Generating response for prompt: {prompt}")

        response = llm_model(prompt, max_length=100, num_return_sequences=1)
        if not response or "generated_text" not in response[0]:
            logger.error("Invalid response format from LLM model")
            raise HTTPException(status_code=502, detail="Failed to generate valid response")

        response_text = response[0]['generated_text']
        logger.info(f"Generated response: {response_text}")
        return {"response": response_text}
    except requests.exceptions.RequestException as re:
        logger.error(f"RequestException: {re}")
        if isinstance(re, requests.exceptions.ConnectionError):
            error_message = "Connection to the LLM server failed. Please try again later."
        elif isinstance(re, requests.exceptions.Timeout):
            error_message = "The request to the LLM server timed out. Please try again."
        else:
            error_message = "An error occurred while communicating with the LLM server."
        raise HTTPException(status_code=503, detail=error_message)
    except Exception as e:
        logger.error(f"Unhandled exception during response generation: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Error generating response: {e}")

# Speech synthesis route with detailed error handling
# @app.post("/synthesize")
# async def synthesize_speech(text: str = Body(...)):
#     if tts_model is None:
#         logger.error("TTS model not available")
#         raise HTTPException(status_code=500, detail="TTS model not available")
#     try:
#         logger.info(f"Synthesizing speech for text: {text}")
#         audio_output = tts_model(text)
#         if not audio_output:
#             logger.error("Failed to generate audio output")
#             raise HTTPException(status_code=502, detail="Failed to synthesize audio")

#         logger.info("Speech synthesis successful")
#         return StreamingResponse(audio_output, media_type="audio/wav")
#     except requests.exceptions.RequestException as re:
#         logger.error(f"RequestException: {re}")
#         if isinstance(re, requests.exceptions.ConnectionError):
#             error_message = "Connection to the TTS server failed. Please try again later."
#         elif isinstance(re, requests.exceptions.Timeout):
#             error_message = "The request to the TTS server timed out. Please try again."
#         else:
#             error_message = "An error occurred while communicating with the TTS server."
#         raise HTTPException(status_code=503, detail=error_message)
#     except Exception as e:
#         logger.error(f"Unhandled exception during speech synthesis: {e}", exc_info=True)
#         raise HTTPException(status_code=500, detail=f"Error synthesizing speech: {e}")

# Configure ngrok with your authtoken
ngrok.set_auth_token(userdata.get('NGROK_AUTHTOKEN'))

# This allows the FastAPI app to run within the notebook
nest_asyncio.apply()

# Expose the app using ngrok
try:
    public_url = ngrok.connect(8000)
    logger.info(f"Public URL: {public_url}")
    logger.info("Colab-hosted Voice Interaction API is now running!")
    print(f"Public URL: {public_url}")
except Exception as e:
    logger.error(f"Error connecting to ngrok: {e}")

# Run the app
uvicorn.run(app, host="0.0.0.0", port=8000)



ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-34' coro=<Server.serve() done, defined at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/main.py", line 577, in run
    server.run()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 65, in run
    return asyncio.run(self.serve(sockets=sockets))
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/lib/python3.10/asyncio/tasks.py", 

Public URL: NgrokTunnel: "https://069f-34-105-43-87.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [444]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     49.207.193.200:0 - "POST /transcribe HTTP/1.1" 200 OK
INFO:     49.207.193.200:0 - "OPTIONS /generate HTTP/1.1" 200 OK
INFO:     49.207.193.200:0 - "POST /generate HTTP/1.1" 422 Unprocessable Entity


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


INFO:     49.207.193.200:0 - "POST /generate HTTP/1.1" 200 OK




INFO:     49.207.193.200:0 - "POST /transcribe HTTP/1.1" 200 OK
INFO:     49.207.193.200:0 - "POST /generate HTTP/1.1" 422 Unprocessable Entity
