In [None]:
#install ffmpeg
# brew install ffmpeg -> if needed
# conda install -c conda-forge ffmpeg

%pip install langchain_community
%pip install gradio
%pip install git+https://github.com/openai/whisper.git
%pip install whisper

In [1]:
from langchain_ollama import OllamaLLM
import gradio as gr
import json
import whisper
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Check if ffmpeg is available

import shutil
print(shutil.which("ffmpeg"))

/opt/anaconda3/envs/Travel_assistance/bin/ffmpeg


In [3]:
def get_language(country, file):
    with open(file, 'r') as f:
        data = json.load(f)
    return data[country]

In [4]:
def get_examples(file):
    with open(file, 'r') as f:
        data = f.read()
    return data

In [5]:
def llm(text, audio, country, number):
    task = match_task(text, audio)
    llm = OllamaLLM(model="mistral-nemo")
    lang = get_language(country, file="utils/country_to_language.json")
    few_shot = get_examples(file="utils/fewshot_learning.txt")
    context = f"You are a helpful assistant. You give an enumerated list of phrases. You answer concisely and only in {lang}."
    icl = f"For example, {few_shot}"
    query = f"I'm travelling to {country}. Which {number} most popular phrases should I learn to {task}?"
    phrases = llm.invoke(context+icl+query)
    return phrases

In [6]:
def match_task(text, audio):
    if text and audio:
        return text
    elif text:
        return text
    elif audio:
        return transcribe_audio(audio)
    else:
        return ReferenceError("No input provided.")

In [7]:
def transcribe_audio(audio_file):
    model = whisper.load_model("base")
    audio = whisper.load_audio(audio_file,sr=16000)
    audio_tensor = torch.from_numpy(audio).to(torch.float32)
    result = model.transcribe(audio_tensor, fp16=False)['text']
    return result

In [None]:
#function to launch the application    
demo = gr.Blocks()
#create a gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Assistant for Travelers")
    gr.Markdown("### What do you want to do: order food in the restaurant, ask for direction, buy tickets?")
    gr.Markdown("### Record audio or enter text.")
    #create a row with two columns
    with gr.Row():
        with gr.Column():
            text = gr.Textbox(label="Enter text", placeholder="order food, ask for directions, etc.")
        with gr.Column():
            audio = gr.Audio(sources=["microphone"], label="Record your voice", type="filepath", max_length=20) 
    # create a row with two blocks
    with gr.Row():
        country = gr.Radio(["France", "Germany", "Italy", "Spain"], label="Location", info="Where are you travelling?")
        num = gr.Slider(0, 10, value=5, step=1, info="How many phrases?", label="Number of phrases")    

    #create a row for response
    with gr.Row():
        out = gr.Textbox(label="Response")
    #create a row with two buttons
    with gr.Row():
        with gr.Column():
            response = gr.Button("Generate response", variant="primary")
        with gr.Column():
            clear = gr.ClearButton([text, audio, country, out])
    
    response.click(fn=llm, inputs=[text, audio, country, num], outputs=out)

demo.launch(share=False, debug=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


  checkpoint = torch.load(fp, map_location=device)
