### wikipedia list of articles "wiki_list"

In [None]:
# Wikipedia topics
# wiki_list = [
#     'Colorado_Rockies',
#     'List_of_Colorado_Rockies_seasons',
#     'Major_League_Baseball_uniforms',
#     'History_of_the_Colorado_Rockies',
#     'List_of_Major_League_Baseball_awards',
#     'Baseball_awards',
#     'List of Colorado Rockies team records',
#     'List of Colorado Rockies minor league affiliates',
#     'List of Colorado Rockies broadcasters',
#     '2024_Colorado_Rockies_season',
#     '2024_Major_League_Baseball_draft',
#     'List_of_Colorado_Rockies_owners_and_executives',
#     "Monfort_brothers",
#     'List_of_Colorado_Rockies_seasons',
    # 'Baseball',
    # 'Baseball_rules',
    # 'Major_League_Baseball',
    # 'Origins_of_baseball',
    # '2024_Major_League_Baseball_season',
    # 'List_of_World_Series_champions',
    # 'Baseball_positioning',
    # 'Baseball_positions',
    # 'Batting_(baseball)',
    # 'Base_running']
    # 'The_Official_Professional_Baseball_Rules_Book',
    # 'Inside_baseball_(strategy)',
    # 'Pitch_(baseball)',
    # 'Pitcher',
    # 'Starting_pitcher',
    # 'Win_probability',
    # 'Batting_order_(baseball)',
    # 'Bunt_(baseball)',
    # 'Double_switch_(baseball)',
    # 'Lefty-righty_switch',
    # 'Pickoff',
    # 'Power_hitter',
    # 'Power_pitcher',
    # 'Pull_hitter',
    # 'Left-handed_specialist#Right-handed_specialist',
    # 'Small_ball_(baseball)',
    # 'Intentional_balk',
    # 'Infield_shift',
    # 'The_Hidden_Game_of_Baseball',
    # 'Wins_Above_Replacement'
    # ]

In [None]:
# Wikipedia topics
# wiki_list = [
#     'Colorado_Rockies',
#     'List_of_Colorado_Rockies_seasons',
#     'History_of_the_Colorado_Rockies',
#     'List of Colorado Rockies team records',
#     '2024_Colorado_Rockies_season',
#     'List_of_Colorado_Rockies_owners_and_executives',
#     'List_of_Colorado_Rockies_seasons',
#     'Baseball',
#     'Baseball_rules',
#     'Major_League_Baseball',
#     '2024_Major_League_Baseball_season']

In [2]:
# Wikipedia topics
wiki_list = [
    'Colorado_Rockies',
    'List of Colorado Rockies team records',
    '2024_Colorado_Rockies_season',
    'List_of_Colorado_Rockies_owners_and_executives',
    'List_of_Colorado_Rockies_seasons']

# Chatbot

In [3]:
# load dependencies
import gradio as gr
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WikipediaLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
import speech_recognition as sr
import os
from dotenv import load_dotenv

In [None]:
# Load environment variables
load_dotenv()

# Set the model name for our LLMs
OPENAI_MODEL = "gpt-4"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


def recognize_speech(audio):
    recognizer = sr.Recognizer()
    
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
    
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"



def chat_with_ai(text):
    llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.5)
    
    # Load documents from multiple Wikipedia topics
    documents = []
    for topic in wiki_list:
        wiki_docs = WikipediaLoader(query=topic, load_max_docs=1, load_all_available_meta=True).load()
        documents.extend(wiki_docs)
    
    # Combine all documents into a single text
    combined_text = "\n\n".join([doc.page_content for doc in documents])
    combined_doc = Document(page_content=combined_text)
    
    chain = load_qa_chain(llm)
    result = chain.invoke({"input_documents": [combined_doc], "question": text})
    return result["output_text"]



def speech_to_ai_chat(audio, history):
    text_input = recognize_speech(audio)
    
    if text_input.startswith("Could not"):
        return text_input, "", history

    ai_response = chat_with_ai(text_input)
    
    updated_history = history + [(text_input, ai_response)]
    return text_input, ai_response, updated_history



# Define Gradio interface
iface = gr.Interface(
    fn=speech_to_ai_chat,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.State([])  # This will store the conversation history
    ],
    outputs=[
        gr.Textbox(label="Audio Conversion"),
        gr.Textbox(label="AI Response"),
        gr.State()  # This will update the conversation history
    ],
    title="Speech-to-Text Baseball Coach AI",
    description="Speak into your microphone to ask questions about the Colorado Rockies!"
)

iface.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d84bc8f32f35396efb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm)
