In [1]:
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [2]:
wiki_article_title_list = ["Chelsea F.C.", "Liverpool F.C.", "Manchester United F.C.", "Manchester City F.C.", "Tottenham Hotspur F.C.", "Arsenal F.C."]

wiki_documents = []

for team in wiki_article_title_list:
    loader = WikipediaLoader(query = team, load_max_docs = 1, doc_content_chars_max = 12000)
    data = loader.load()
    wiki_documents.append(data[0])

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 200)
wiki_document_chunks = text_splitter.split_documents(wiki_documents)

In [4]:
embeddings = OllamaEmbeddings(model = "llama3")
db = Chroma.from_documents(wiki_document_chunks, embeddings)

In [5]:
prompt = ChatPromptTemplate.from_template("""
                    Please answer this question about football based on the context provided: 
                    <context>
                    {context}
                    </context>
                    Question: {input}""")

chat_model = Ollama(model = "llama3")
retriever = db.as_retriever()

combine_docs_chain = create_stuff_documents_chain(chat_model, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)


In [6]:
# question = input("Enter your question about the top Premier League teams: ")
question = "When was Manchester United formed and what was it called?"
llm_response = retrieval_chain.invoke({"input": question})

In [7]:
llm_response["answer"]

'According to the context, Manchester United Football Club was founded as Newton Heath LYR Football Club in 1878. It changed its name to Manchester United in 1902.'