# VARIABLES

In [None]:
from google.colab import userdata

# populate the values in the secrets section
STT_WHISPER_MODEL = "medium.en"
STT_BERT_MODEL = "prajjwal1/bert-tiny"
STT_BERT_MODEL_DRIVE_LOCATION = userdata.get('STT_BERT_MODEL_DRIVE_LOCATION')
QA_MODEL= "mistralai/Mistral-7B-Instruct-v0.1"
QA_LOG_DRIVE_LOCATION = userdata.get('QA_LOG_DRIVE_LOCATION')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
NGROK_TOKEN = userdata.get('NGROK_TOKEN')


# TTS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%capture
!pip install TTS
!sudo apt-get install espeak-ng
!pip install fastapi uvicorn pydantic pyngrok nest_asyncio
!pip install python-multipart
!pip install onnx
!pip install onnxruntime

import IPython
import tempfile
import subprocess
from fastapi import FastAPI,Response
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from fastapi.responses import StreamingResponse,FileResponse
from fastapi import FastAPI, UploadFile, File
import shutil
from pydantic import BaseModel
from IPython.display import Audio
import uvicorn
import nest_asyncio
from pyngrok import ngrok
import base64
import time
from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import Vits
from TTS.utils.audio.numpy_transforms import save_wav
import numpy as np

nest_asyncio.apply()

In [None]:
class OutputTTSText(BaseModel):
  answer: str

class DemoText(BaseModel):
  text: str
  voice: str

class LiveText(BaseModel):
  text: str

In [None]:
def demo_audio(text:str,voice:str):
  if voice=="1":
    text_inputs = np.asarray(
        demo_vits.tokenizer.text_to_ids(text, language="en"),
        dtype=np.int64,
    )[None, :]
    audio = demo_vits.inference_onnx(text_inputs)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
      out_path = temp_file.name
    save_wav(wav=audio[0], path=out_path,sample_rate=22050)
    return out_path
  else:
    text_inputs = np.asarray(
        live_vits.tokenizer.text_to_ids(text, language="en"),
        dtype=np.int64,
    )[None, :]
    audio = live_vits.inference_onnx(text_inputs)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
      out_path = temp_file.name
    save_wav(wav=audio[0], path=out_path,sample_rate=22050)
    return out_path

def live_audio(text:str):
  text_inputs = np.asarray(
      live_vits.tokenizer.text_to_ids(text, language="en"),
      dtype=np.int64)[None, :]
  audio = live_vits.inference_onnx(text_inputs,speaker_id=0)
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
    out_path = temp_file.name
  save_wav(wav=audio[0], path=out_path,sample_rate=22050)
  return out_path

# QA

In [None]:
# Model
!pip install -q einops
!pip install -q langchain
!pip install -q bitsandbytes

!pip install -q -U transformers accelerate
# update or install the necessary libraries
!pip install -q openai
!pip install -q langchain-community
!pip install -q python-dotenv

# For API
!pip -q install fastapi
!pip -q install pyngrok
!pip -q install uvicorn
!pip -q install nest_asyncio

In [None]:
import pandas as pd
import os

import openai
import os
import IPython
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv


load_dotenv()

False

In [None]:
import torch
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
qa_model_id = "mistralai/Mistral-7B-Instruct-v0.1"
qa_model = AutoModelForCausalLM.from_pretrained(qa_model_id, device_map="auto")
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_id)


pipe = pipeline(
        "text-generation",
        model=qa_model,
        tokenizer=qa_tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=500,
        temperature = 1,
        do_sample=True,
        top_k=1,
        num_return_sequences=1,
        eos_token_id=qa_tokenizer.eos_token_id,
        pad_token_id=qa_tokenizer.eos_token_id,
        torch_dtype=torch.float16,
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 1.0})

def answer_with_mistral(riddle):
  template = """  <s>[INST]   You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must first reason through the clues of the given riddle and then provide a short answer. Remember, your answer should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
      Here's an example to guide you:
      Riddle: You might think i am a rather unstable character because i never stay at one place. However my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time. I can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules. In order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency.
      Answer: oscillator

      Read the riddle below and provide the three possible correct answers as a json with keys: answer1, answer2, answer3

      NOTE: You are allowed to include an answer multiple times if your reasoning shows that it is likely the correct answer. Do not provide any explanations.

      Riddle: {riddle}

      [/INST] </s>

  """

  prompt = PromptTemplate(template=template, input_variables=["riddle"])
  llm_chain = LLMChain(prompt=prompt, llm=llm)
  answer = llm_chain.run({"riddle":riddle})
  return answer

In [None]:
demo_llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0.0})

def demo_qa_mistral_answer(riddle_content):
  template = """ <s>[INST] You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must provide a short answer to a riddle. Remember, your answer should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
      Here's an example to guide you:
      Riddle: you might think i am a rather unstable character because i never stay at one place, however my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time, i can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules, in order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency,
      Answer: oscillator

      Read the riddle below and provide the correct answer.

     Riddle: {riddle}

      [/INST] </s>
  """

  prompt = PromptTemplate(template=template, input_variables=["riddle"])
  falcon_chain = LLMChain(prompt=prompt, llm=demo_llm)
  answer = falcon_chain.run({"riddle":riddle_content})
  return answer.strip()

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

# chat mode instance
chat = ChatOpenAI(temperature=1.0)

In [None]:
# ChatGPT
def live_qa_chatgpt_answer(riddle):
  #template = """
  #  You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must first reason through the clues of the given riddle and then provide three short answers.
  #  Your answers should be in a list of the form ['answer'1, 'answer2', 'answer3'].
  #  Remember, each of your three answers should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
  #  Here is an example to guide you:
  #  Riddle: you might think i am a rather unstable character because i never stay at one place, however my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time, i can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules, in order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency,
  #  Answer: [oscilator, oscilator, oscilator]
  #  The answers above are just an example, and are not the answer to the new riddle below. It is you job to figure out what the answer(s) to the new riddle is.
  #  NOTE: YOUR ANSWER MUST STRICTLY BE A LIST OF THREE ANSWERS. You are allowed to include an answer multiple times if your reasoning shows that it is likely the correct answer, BUT UTLIMATELY, YOU MUST RETURN ONLY THREE ANSWERS IN TOTAL IN THE FORMAT: [Answer1, Answer2, Answer3].


  #  Read the riddle below and provide the three possible correct answers in a list.

  #  Riddle: {riddle}

  #  Answer:"""

  template = """You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must first reason through the clues of the given riddle and then provide a short answer. Remember, your answer should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
      Here's an example to guide you:
      Riddle: You might think i am a rather unstable character because i never stay at one place. However my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time. I can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules. In order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency.
      Answer: oscillator

      Read the riddle below and provide the three possible correct answers as a json with keys: answer1, answer2, answer3

      NOTE: You are allowed to include an answer multiple times if your reasoning shows that it is likely the correct answer. Do not provide any explanations.

      Riddle: {riddle}

  """

  answer = chat([HumanMessage(content=template.format(riddle=riddle))])
  return answer.content

In [None]:
def demo_qa_chatgpt_answer(riddle_content):
  template = """
      You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must provide a short answer to a riddle. Remember, your answer should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
      Here's an example to guide you:
      Riddle: you might think i am a rather unstable character because i never stay at one place, however my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time, i can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules, in order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency,
      Answer: oscillator

      Read the riddle below and provide the correct answer.
      Riddle: {riddle}

      Answer:"""

  answer = chat([HumanMessage(content=template.format(riddle=riddle_content))])
  return answer.content

In [None]:
import uvicorn
import fastapi
from pyngrok import ngrok
from pydantic import BaseModel
import nest_asyncio

nest_asyncio.apply()

In [None]:
class DemoInputText(BaseModel):
  text: str

class LiveInputText(BaseModel):
  clues: str
  is_start_of_riddle: bool = False
  is_end_of_riddle: bool = False
  clue_count: int = 0

class LiveDemoInputText(BaseModel):
  clues: str
  is_start_of_riddle: bool = False
  is_end_of_riddle: bool = False
  clue_count: int = 0
  threshold = 4

class OutputText(BaseModel):
  mistral: str
  chatGPT: str = None

# STT

In [None]:
# Import and install the required libraries for asr

%%capture
!pip install git+https://github.com/openai/whisper.git
!pip install jiwer
!pip install tabulate
!pip install pydub
!pip install transformers
import torch
import numpy as np
import whisper
import jiwer
import time
import pandas as pd
from tabulate import tabulate
from pydub import AudioSegment
import os
import joblib
import re
from transformers import BertTokenizer, BertModel
import torch
import torch.nn.functional as F
from torch import nn, Tensor

In [None]:
%%capture
# Install required libraries for web api
!pip -q install fastapi
!pip -q install pyngrok
!pip -q install uvicorn
!pip -q install nest_asyncio
!pip -q install python-multipart

In [None]:
# Import libraries
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pyngrok import ngrok
from pydantic import BaseModel
import nest_asyncio
import shutil

# # # Import models for serialisation/ deserialisation
from pydantic import BaseModel
import base64
import io
import wave

In [None]:
nest_asyncio.apply()

In [None]:
# Load whisper model
torch.cuda.is_available()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"

model = whisper.load_model(STT_WHISPER_MODEL, device = DEVICE) # Select whisper model size (tiny, base, small, medium, large)

In [None]:
def transcribe(path_to_audio):
  """Loads whisper model to transcribe audio"""

  # Load audio
  audio = whisper.load_audio(path_to_audio)

  # Transcribe audio
  result = model.transcribe(audio)

  # Print transcript
  return result["text"]
  # audio = whisper.load_audio(path_to_audio)
  # audio = whisper.pad_or_trim(audio)

  # # Make log-Mel spectrogram and move to the same device as the model
  # mel = whisper.log_mel_spectrogram(audio).to(model.device)

  # # Decode the audio
  # options = whisper.DecodingOptions(language= "en", without_timestamps= True, fp16 = False)
  # result = whisper.decode(model, mel, options)

  # return result.text

In [None]:
def detect_start_point(transcribed_text):
  """Detects start points/ riddle cues present in audio transcripts"""

  # Sample list of riddle start points
  sample_start_points = ["we begin", "i begin", "let's begin",\
                         "first riddle", "1st riddle", "riddle number one", "riddle number 1",\
                         "second riddle", "2nd riddle", "riddle number two", "riddle number 2",\
                         "third riddle", "3rd riddle", "riddle number three", "riddle number 3",\
                         "fourth riddle", "4th riddle", "riddle number four", "riddle number 4",\
                         "fifth riddle", "5th riddle", "riddle number five", "riddle number 5",\
                         "last riddle", "final riddle", "last one", "next one", "first one", \
                         "second one", "third one", "fourth one", "fifth one",\
                         "first redo", "second redo", "third redo", "fourth redo", "last redo",\
                         "final redo", "fifth redo", "best one", "fast riddle", "test riddle"
                         ]

  # Check for a matching start point
  matching_start_point = None
  for start_point in sample_start_points:
      if start_point in transcribed_text.lower():
          matching_start_point = start_point
          break

  return matching_start_point

In [None]:
def detect_end_point(transcribed_text):
  """Detects end points present in audio transcripts"""
  #global matching_end_point

  # Sample list of riddle start points
  end_points = ["who am i"]

  # Check for a matching start point
  matching_end_point = None
  for end_point in end_points:
      if end_point in transcribed_text.lower():
          matching_end_point = end_point
          break

  return matching_end_point

In [None]:
class BertClassifier(nn.Module):
  def __init__(self, pretrained_bert, num_classes):
    super(BertClassifier, self).__init__()
    self.model = pretrained_bert
    self.input_size = self.model.config.hidden_size
    # Fully connected classifier
    self.classifier = nn.Sequential(
        nn.Linear(self.input_size, 256),
        nn.ReLU(),
        nn.Linear(256, num_classes)
    )
  def forward(self, input_ids, attention_mask,labels=None):
      outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
      embedding = outputs.last_hidden_state.mean(dim=1)  #mean pooling
      logits = self.classifier(embedding)

      if labels is not None:
          loss_fn=nn.CrossEntropyLoss()
          loss=loss_fn(logits,labels)
          return loss
      return logits

def preprocess_bert_features(sentence,tokenizer):
  tokenized_input = tokenizer.encode_plus(sentence,padding='max_length',  max_length=512,truncation=True, return_tensors='pt', )
  return tokenized_input["input_ids"], tokenized_input["attention_mask"]

def predict_clue(sentence, model, tokenizer):
    # Preprocess the sentence
    input_ids, attention_mask = preprocess_bert_features(sentence, tokenizer)

    # Ensure tensors are on the same device as the model and perform inference
    with torch.no_grad():
        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        predicted_class = torch.argmax(logits, dim=1).item()

    return predicted_class

tokenizer = BertTokenizer.from_pretrained(STT_BERT_MODEL)
pretrained_bert_model = BertModel.from_pretrained(STT_BERT_MODEL)
bert_model = BertClassifier(pretrained_bert=pretrained_bert_model, num_classes=2)
bert_model.load_state_dict(torch.load(STT_BERT_MODEL_DRIVE_LOCATION))
#bert_model.load_state_dict(torch.load('/content/drive/MyDrive/Text classification/Models/bert_classifier_model.pth'))
bert_model.eval()  # Set the model to evaluation mode

In [None]:
# Define a global variable to store the accumulated clues
accumulated_clues = []
store_to_count = []

def process_audio_chunk(audio_filename):
  """Performs final piece audio transcription and riddle clue concatenation for the QA model"""

  # Initialize variables
  transcribed_text = " "  # To store the concatenated text
  previous_end_index = 0  # To keep track of the end index of the previous riddle
  clue_counter = 0 # count clues per new line
  is_new_riddle = False
  end_of_clues = False

  # Transcribe audio chunk
  chunk_transcript = transcribe(audio_filename)

  # Detect start point
  start_point = detect_start_point(chunk_transcript)

  # Detect end point
  end_point = detect_end_point(chunk_transcript)

  # If a matching start point is found, concatenate text
  if start_point:
    is_new_riddle = True
    accumulated_clues.clear()
    store_to_count.clear()
    start_index = chunk_transcript.lower().find(start_point.lower()) # identify first position of start-point phrase
    previous_end_index = start_index + len(start_point) # set end position of start-point phrase

  if end_point:
    end_of_clues = True

  # Add the transcribed chunk to the continuous text
  transcribed_text = chunk_transcript[previous_end_index:].strip() # transcribed_text += chunk_transcript[previous_end_index:].strip()

  # Process the text for riddle clues
  sentences = re.split(r'(?<=[.,?])', transcribed_text)
  # sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
  clues_found = []

  for sentence in sentences:
    pred = predict_clue(sentence.strip(),bert_model,tokenizer)
    if pred == 1:
      accumulated_clues.append(sentence)  # Append the clue to the list
      clues_found.append(sentence)

  if clues_found:
    grouped_clues = " ".join(clues_found)
    store_to_count.append(grouped_clues)
    for i in range(len(store_to_count)):
      clue_counter+=1
    return chunk_transcript, " ".join(accumulated_clues), clue_counter, is_new_riddle, end_of_clues #transcribed chunks, concatenated riddle clues, counter for clues, boolean if new riddle, boolean if riddle ends

  return chunk_transcript, " ", 0, is_new_riddle,  end_of_clues
  clue_counter.clear()

# TTS API Call

In [None]:
from starlette.middleware.gzip import GZipMiddleware
app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)
app.add_middleware(GZipMiddleware, minimum_size=50)

In [None]:
# @app.on_event("startup")
# def load_vits():
# demo_config = VitsConfig()
# live_config=VitsConfig()
# demo_config.load_json("/content/drive/MyDrive/NSMQ AI Project/Technical/TTS/Prof Elsie Kauffmann/VITS model/vits-elsie/traineroutput/vits_vctk-May-24-2023_11+05PM-23a7a9a3/config.json")
# live_config.load_json("/content/drive/MyDrive/NSMQ AI Project/Technical/TTS/Isaac sesi/VITS model/sesi_vits/traineroutput/tts_multispeaker-October-13-2023_11+03AM-0000000/config.json")
# demo_vits = Vits.init_from_config(demo_config)
# demo_vits.load_onnx("/content/drive/MyDrive/NSMQ AI Project/Technical/TTS/Prof Elsie Kauffmann/VITS model/vits-elsie/elsie.onnx")
# live_vits = Vits.init_from_config(live_config)
# live_vits.load_onnx("/content/drive/MyDrive/NSMQ AI Project/Technical/TTS/Isaac sesi/VITS model/sesi.onnx")

demo_config = VitsConfig()
demo_config.load_json("/path/to/onnx/config")
demo_vits = Vits.init_from_config(demo_config)
demo_vits.load_onnx("path/to/onnx/model")

live_config=VitsConfig()
live_config.load_json("path/to/onnx/config")
live_vits = Vits.init_from_config(live_config)
live_vits.load_onnx("/path/to/onnx/model")

@app.get('/demo_tts')
def onnx_audio(payload:DemoText):
  out_path=demo_audio(payload.text,payload.voice)
  return FileResponse(out_path, media_type="audio/wav")

@app.get('/live_tts')
def onnx_audio(payload:LiveText):
  out_path=live_audio(payload.text)
  return FileResponse(out_path, media_type="audio/wav")

@app.get("/tts-test", response_model=OutputTTSText)
async def tts_test():
    return {"answer": "Hello from TTS"}

# QA API CAll

In [None]:
import os
import csv
import json
import random
from glob import glob
import time
import string
import re

RIDDLE_ANSWERED_FILE_LIVE = os.path.join(QA_LOG_DRIVE_LOCATION, "ans_live_logs.json")
with open(RIDDLE_ANSWERED_FILE_LIVE, 'w') as f:
    json.dump({"Mistral": '', "ChatGPT": ''}, f)

def remove_articles(text):
    """
    Remove articles [the|a|an] from `text`

    Args:
        text: str

    Returns:
        text with articles removed: str
    """
    regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
    return re.sub(regex, " ", text)

def normalize_text(s):
    """
    Removing articles and punctuation, and standardizing whitespace are all typical text processing steps.

    Args:
        s: (str) string to normalize

    Returns:
        normalized string: str
    """

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation.replace("/", ""))
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_punc(lower(s)))



def model_answer_confidence(model, threshold, chunk_num, model_answer, is_start_of_riddle):
    cur_time = time.strftime("%Y-%m-%d_%H-%M-%S")  # Get the current time

    if is_start_of_riddle or chunk_num == 1:
        # Create a new JSON log file if it is a new riddle.
        print(f"Is Start of Riddle: {is_start_of_riddle}\t Clue Count: {chunk_num}")
        filename = os.path.join(QA_LOG_DRIVE_LOCATION, f"{model}_log_{cur_time}.json")
        answer_counts = {}
    else:
        # Find the most recent generated JSON log file, if it is not a new riddle.
        log_files = glob(os.path.join(QA_LOG_DRIVE_LOCATION, f"{model}_log_*.json"))
        if log_files:
            # Sort the log files by modification time to get the most recent one.
            log_files.sort(key=os.path.getmtime, reverse=True)
            filename = log_files[0]
            with open(filename, 'r') as f:
                #answer_counts = json.load(f)
                logged_data = json.load(f)
                answer_counts = logged_data["answer_counts"]
                print("Loaded Answer Counts Dictionary:", answer_counts)
        else:
            # If no log files exist, create a new one.
            filename = os.path.join(QA_LOG_DRIVE_LOCATION, f"{model}_log_{cur_time}.json")
            answer_counts = {}

    # Update answer_counts based on the model answer and chunk number
    for ans in model_answer:
        ans = remove_articles(normalize_text(ans).replace('"', '')).strip()
        answer_counts[ans] = answer_counts.get(ans, 0) + int(chunk_num)

    answer_counts[''] = 0

    print("Answer Counts Dictionary:", answer_counts)

    # Find the top answers and write answer_counts to the JSON log file
    top_count = max(answer_counts.values())
    top_answers = [ans for ans, count in answer_counts.items() if count == top_count]

    top_answer = ""

    if top_count >= threshold:
        top_answer = random.choice(top_answers)

    with open(filename, 'w') as f:
        data_to_save = {
            "answer_counts": answer_counts,
            "top_answer": (top_answer, top_count)
        }
        #json.dump(answer_counts, f)
        json.dump(data_to_save, f)

        print("Saved Data:", data_to_save)

    return top_answer, top_count


In [None]:
import ast
import json


def preprocess_model_output(model_output):
    if isinstance(model_output, dict):
        return model_output

    # Convert model output to string
    model_output = str(model_output).replace("\n", '').strip()

    pattern = r'{.*?}'
    m = re.search(pattern, model_output)
    model_output = m.group(0)

    # Remove ` characters if any
    model_output = model_output.replace('```', '').replace('json', '')

    # Answer not surrounded in curly braces
    if not model_output.startswith("{") or not model_output.endswith("}"):
        model_output = '{' + model_output + '}'

    # Replace null in quotes and replace with none
    model_output = model_output.replace(": null", "'null'")
    print("State of Model Output:", model_output)

    # Try converting answer to json
    try:
        #model_output_dict = ast.literal_eval(model_output)
        json_data = json.loads(model_output)
        return json_data
    except (SyntaxError, ValueError):
        print("SOMETHING WENT WRONG!")
        return None


In [None]:
import time


def filter_answers(ans_data, confidence_threshold, is_end_of_riddle):
    confidence = ans_data[1]
    if int(confidence) >= confidence_threshold or is_end_of_riddle:
      return ans_data[0]

    return ''


def load_riddle_answered_log(is_start_of_riddle=True, chunk_num=1):
    if is_start_of_riddle == True:
      return {"mistral": '' , "chatGPT": ''}
    else:
      if os.path.exists(RIDDLE_ANSWERED_FILE_LIVE):
        with open(RIDDLE_ANSWERED_FILE_LIVE, "r") as file:
          return json.load(file)
      else:
        return {"mistral": '', "chatGPT": ''}

def save_riddle_answered_log(data):
    with open(RIDDLE_ANSWERED_FILE_LIVE, "w") as file:
        json.dump(data, file)


@app.get("/live_qa", response_model=OutputText)
def live_answer(input_data: LiveInputText):
    ct = 10.0  # ct represents the confidence threshold.
    chunk_num = input_data.clue_count
    is_start_of_riddle = input_data.is_start_of_riddle
    is_end_of_riddle = input_data.is_end_of_riddle

    print()
    print("Chunk Num:", chunk_num)
    print("="*70)
    print(input_data.clues)
    print()

    # Load data previously computed answers
    ans_file_st = time.time()
    answer_file = load_riddle_answered_log(is_start_of_riddle, chunk_num)
    ans_file_et = time.time()
    print("Time to load answers file:", (ans_file_et-ans_file_st))

    if answer_file['mistral'] == '' and chunk_num != 0:
        # Send clues to falcon and get answer
        mistral_start_time = time.time()
        mistral_output = answer_with_mistral(riddle=input_data.clues)
        mistral_end_time = time.time()
        print("\nMistral Inference Time:", mistral_end_time-mistral_start_time)
        #print(f"Mistral Output: {mistral_output}")
        p_st = time.time()
        mistral_output = preprocess_model_output(mistral_output)
        p_et = time.time()
        print("Time to process Mistral Output:", p_et - p_st)

        #Put answers in a list
        if mistral_output is not None:
            mistral_output = [mistral_output[key] for key in mistral_output.keys()]
        else:
          mistral_output = ['']
          print("Failed to convert Mistral response to dict/json")
        mistral_conf_st = time.time()
        mistral_ans_data = model_answer_confidence("Mistral", ct, chunk_num, mistral_output, is_start_of_riddle)
        mistral_conf_et = time.time()
        print("\nMistral Confidence Modelling Time:", mistral_conf_et-mistral_conf_st)
        filter_start_time = time.time()
        mistral_final_ans = filter_answers(mistral_ans_data, ct, is_end_of_riddle)
        filter_end_time = time.time()
        print("\nMistral Time Elapsed for Filtering:", (filter_end_time-filter_start_time))
        #print(mistral_ans_data)
    else:
      mistral_final_ans = answer_file['mistral']

    if answer_file['chatGPT'] == '' and chunk_num != 0:
        # Send clues to ChatGPT and get answer
        chatgpt_start_time = time.time()
        #chatGPT_output = live_qa_chatgpt_answer(input_data.clues)
        chatGPT_output = {"answer1": ''}
        chatgpt_end_time = time.time()
        print("\nChatGPT Inference Time:", chatgpt_end_time-chatgpt_start_time)
        #print(f"ChatGPT Output: {chatGPT_output}")
        cp_st = time.time()
        chatGPT_output = preprocess_model_output(chatGPT_output)
        cp_et = time.time()
        print("Time to preprocess ChatGPT output:", (cp_et - cp_st))
        # Put answers in a list
        if chatGPT_output is not None:
            chatGPT_output = [chatGPT_output[key] for key in chatGPT_output.keys()]
        else:
          chatGPT_output = ['',]
          print("Failed to convert ChatGPT response to dict/json")
        c_conf_st = time.time()
        chatGPT_ans_data = model_answer_confidence("ChatGPT", ct, chunk_num, chatGPT_output, is_start_of_riddle)
        c_conf_et = time.time()
        print("\nChatGPT Confidence Modelling Time:", c_conf_et-c_conf_st)
        filter_start_time = time.time()
        chatgpt_final_ans = filter_answers(chatGPT_ans_data, ct, is_end_of_riddle)
        filter_end_time = time.time()
        print("\nChatGPT Time Elapsed for Filtering:", (filter_end_time-filter_start_time))
        #print(chatGPT_ans_data)
    else:
      chatgpt_final_ans = answer_file['chatGPT']


    answers = {
        "mistral": mistral_final_ans,
        "chatGPT": chatgpt_final_ans
    }

    # Write answers to file
    save_st = time.time()
    save_riddle_answered_log(answers)
    save_et = time.time()
    print("\nTime to save file:", (save_et-save_st))

    return answers


@app.get("/live_demo_qa", response_model=OutputText)
def live_demo_answer(input_data: LiveDemoInputText):
    ct = 3  # ct represents the confidence threshold.
    chunk_num = input_data.clue_count
    is_start_of_riddle = input_data.is_start_of_riddle
    is_end_of_riddle = input_data.is_end_of_riddle

    print()
    print("Chunk Num:", chunk_num)
    print("="*70)
    print(input_data.clues)
    print()

    # Load data previously computed answers
    ans_file_st = time.time()
    answer_file = load_riddle_answered_log(is_start_of_riddle, chunk_num)
    ans_file_et = time.time()
    print("Time to load answers file:", (ans_file_et-ans_file_st))

    if answer_file['mistral'] == '' and chunk_num != 0:
        # Send clues to falcon and get answer
        mistral_start_time = time.time()
        mistral_output = answer_with_mistral(riddle=input_data.clues)
        mistral_end_time = time.time()
        print("\nMistral Inference Time:", mistral_end_time-mistral_start_time)
        #print(f"Mistral Output: {mistral_output}")
        p_st = time.time()
        mistral_output = preprocess_model_output(mistral_output)
        p_et = time.time()
        print("Time to process Mistral Output:", p_et - p_st)

        #Put answers in a list
        if mistral_output is not None:
            mistral_output = [mistral_output[key] for key in mistral_output.keys()]
        else:
          mistral_output = ['']
          print("Failed to convert Mistral response to dict/json")
        mistral_conf_st = time.time()
        mistral_ans_data = model_answer_confidence("Mistral", ct, chunk_num, mistral_output, is_start_of_riddle)
        mistral_conf_et = time.time()
        print("\nMistral Confidence Modelling Time:", mistral_conf_et-mistral_conf_st)
        filter_start_time = time.time()
        mistral_final_ans = filter_answers(mistral_ans_data, ct, is_end_of_riddle)
        filter_end_time = time.time()
        print("\nMistral Time Elapsed for Filtering:", (filter_end_time-filter_start_time))
        #print(mistral_ans_data)
    else:
      mistral_final_ans = answer_file['mistral']

    if answer_file['chatGPT'] == '' and chunk_num != 0:
        # Send clues to ChatGPT and get answer
        chatgpt_start_time = time.time()
        #chatGPT_output = live_qa_chatgpt_answer(input_data.clues)
        chatGPT_output = {"answer1": ''}
        chatgpt_end_time = time.time()
        print("\nChatGPT Inference Time:", chatgpt_end_time-chatgpt_start_time)
        #print(f"ChatGPT Output: {chatGPT_output}")
        cp_st = time.time()
        chatGPT_output = preprocess_model_output(chatGPT_output)
        cp_et = time.time()
        print("Time to preprocess ChatGPT output:", (cp_et - cp_st))
        # Put answers in a list
        if chatGPT_output is not None:
            chatGPT_output = [chatGPT_output[key] for key in chatGPT_output.keys()]
        else:
          chatGPT_output = ['',]
          print("Failed to convert ChatGPT response to dict/json")
        c_conf_st = time.time()
        chatGPT_ans_data = model_answer_confidence("ChatGPT", ct, chunk_num, chatGPT_output, is_start_of_riddle)
        c_conf_et = time.time()
        print("\nChatGPT Confidence Modelling Time:", c_conf_et-c_conf_st)
        filter_start_time = time.time()
        chatgpt_final_ans = filter_answers(chatGPT_ans_data, ct, is_end_of_riddle)
        filter_end_time = time.time()
        print("\nChatGPT Time Elapsed for Filtering:", (filter_end_time-filter_start_time))
        #print(chatGPT_ans_data)
    else:
      chatgpt_final_ans = answer_file['chatGPT']


    answers = {
        "mistral": mistral_final_ans,
        "chatGPT": chatgpt_final_ans
    }

    # Write answers to file
    save_st = time.time()
    save_riddle_answered_log(answers)
    save_et = time.time()
    print("\nTime to save file:", (save_et-save_st))

    return answers


@app.get('/demo_qa', response_model=OutputText)
def demo_answer(input_data: DemoInputText):
    print("Demo mode")
    print(input_data)
    riddle_content = input_data.text
    print(riddle_content)
    falcon_ans = demo_qa_mistral_answer(riddle_content)

    print("Mistral:", falcon_ans)

    answers = {
        "mistral": falcon_ans
    }
    return answers


@app.get("/qa-test", response_model=OutputText)
async def qa_test():
    return {
        "mistral": "Mistral Says Hello!",
        "chatGPT": "ChatGPT Says Hello!"
    }


#STT API CALL

In [None]:
class AudioBytes(BaseModel):
  data: bytes
  filename: str

@app.get("/get-transcript")
async def get_transcript(audio: AudioBytes):
  try:
    decoded_data = base64.b64decode(audio.data)

    # Write bytes data to a .wav file
    with io.BytesIO(decoded_data) as audio_file:
        with wave.open(audio_file, "wb") as wav:
          wav.setnchannels(1)
          wav.setsampwidth(2)
          wav.setframerate(16000)

          # Write .wav files
          wav.writeframes(decoded_data)

    # Save the audio file with the custom name
    audio_filename = audio.filename
    with open(audio_filename, "wb") as file:
        file.write(decoded_data)

    # Get transcript and delete temporary audio file
    chunk_transcript, current_clues, clue_counter, is_new_riddle, end_of_clues = process_audio_chunk(audio_filename) # current clues contains previous+recently identified clues
    os.remove(audio_filename)
    print("transcript:", chunk_transcript)
    return {"transcript": chunk_transcript, "clues": current_clues, "clue_count":clue_counter, "is_start_of_riddle":is_new_riddle, "is_end_of_riddle":end_of_clues}
  except Exception as e:
    return {"error":str(e)}

@app.get("/stt-test")
async def stt_test():
  return {"transcript":"Hello from STT.", "clues":"", "clue_count":"", "is_start_of_riddle":"", "is_end_of_riddle":""}

# Setup required API Dependencies

In [None]:
!pip install fastapi uvicorn pydantic pyngrok nest_asyncio

# GENERIC API CALL

In [None]:
@app.get("/")
async def root():
    return {"response": "Hello from NSMQ AI"}

# API SERVER

Run the cells below to set up the API server and public URL

In [None]:
!ngrok config add-authtoken NGROK_TOKEN

In [None]:
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)
uvicorn.run(app, port=8000)