In [1]:
import keyring
import os 
import openai
import streamlit as st 
from audio_recorder_streamlit import audio_recorder
from elevenlabs import ElevenLabs
from langchain.chains import RetrievalQA
from langchain.chat_models.openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from streamlit_chat import message

# Constants
TEMP_AUDIO_PATH = './temp/temp_audio.wav'
AUDIO_FORMAT = 'audio/wav'

# API Key
OPENAI_API_KEY = keyring.get_password('openai', 'key_for_windows')
ELEVENLABS_API_KEY = keyring.get_password('elevenlabs', 'key_for_windows')
ACTIVELOOP_TOKEN = keyring.get_password('activeloop', 'key_for_windows')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['ELEVEN_API_KEY'] = ELEVENLABS_API_KEY




In [2]:
def load_embeddings_and_database(active_loop_data_set_path):
    embeddings = OpenAIEmbeddings()
    db = DeepLake(
        dataset_path=active_loop_data_set_path,
        read_only=True,
        embedding_function=embeddings
    )
    return db

In [None]:
# def transcribe_audio(audio_file_path, openai_key):
#     """
#     Transcribe audio using OpenAI Whisper API (updated for openai>=1.0.0).

#     Args:
#         audio_file_path (str): Path to the audio file.
#         openai_key (str): Your OpenAI API key.

#     Returns:
#         str: Transcribed text if successful, None otherwise.
#     """
#     # Set the OpenAI API key
#     openai.api_key = openai_key

#     try:
#         with open(audio_file_path, "rb") as audio_file:
#             # Use the updated method to transcribe
#             response = openai.Audio.transcribe(
#                 model="whisper-1",
#                 file=audio_file,
#             )
#         return response["text"]
#     except Exception as e:
#         print(f"Error calling Whisper API: {str(e)}")
#         return None



In [7]:
import whisper

def transcribe_audio(audio_file_path):
   
    try:
        with open(audio_file_path, 'rb') as audio_file:
            # load the Whisper model
            model = whisper.load_model("base")
            # perform transcription
            result = model.transcribe(audio_file_path)
            return result['text']
    except Exception as e:
        print(f"Error calling Whisper API: {str(e)}")
        return None

In [14]:
# whisper low-level access
model = whisper.load_model("base")

# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio('./download/harvard.wav')
audio = whisper.pad_or_trim(audio)

# make log-mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)

# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

  checkpoint = torch.load(fp, map_location=device)


Detected language: en


In [15]:
# display the transcription of the audio on the app
def display_transcription(transcription):
    if transcription:
        st.write(f"Transciption: {transcription}")
        with open("/data/transcription.txt", 'w+') as f:
            f.write(transcription)
    else:
        st.write("Error transcribing audio.")

# get user input from Streamlit text input field
def get_user_input(transcription):
    return st.text_input("", value=transcription if transcription else "", key="input")


# record audio using audio_recorder and transcribe using transcibe_audio
def record_and_transribe_audio():
    audio_bytes = audio_recorder()
    transcription = None
    if audio_bytes:
        st.audio(audio_bytes, format=AUDIO_FORMAT)
        
        with open(TEMP_AUDIO_PATH, 'wb') as f:
            f.write(audio_bytes)
            
        if st.button("Transcribe"):
            transcription = transcribe_audio(TEMP_AUDIO_PATH)
            
        os.remove(TEMP_AUDIO_PATH)
        display_transcription(transcription)
        
    return transcription
        

In [16]:
# search the database for a response based on the user's query

def search_db(user_input, db):
    print(user_input)
    retriever = db.as_retriever()
    retriever.search_kwargs['distance_metric'] = 'cos'
    retriever.search_kwargs['fetch_k'] = 100
    retriever.search_kwargs['maximal_marginal_relevance'] = True
    retriever.search_kwargs['k'] = 4
    model = ChatOpenAI(model_name='gpt-3.5-turbo')
    qa = RetrievalQA.from_llm(model, retriever=retriever, return_source_documents=True)
    return qa({'query': user_input})