In [2]:
from langchain_community.document_loaders import TextLoader, UnstructuredXMLLoader
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain.llms import OpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, RetrievalQA
import multiprocessing

from tqdm import tqdm
import streamlit as st
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = 'true'
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [4]:
# xmlLoader = UnstructuredXMLLoader('/Users/harshmodi/Harsh/Academic/NLP/majorProject/dataPrep/enwikivoyage-latest-pages-articles-multistream.xml')
# xmlText = xmlLoader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
# docs = text_splitter.split_documents(xmlText)

# persistDir = "./chromaDB/"
# vectorDB = Chroma.from_documents(docs[0:1000], OpenAIEmbeddings(), persist_directory = persistDir)
# vectorDB.persist()
# vectorDB = None

In [5]:
vectorDB = Chroma(persist_directory="./chromaDB", embedding_function=OpenAIEmbeddings())

In [6]:
retriever = vectorDB.as_retriever()

In [8]:
llm = Ollama(model="gemma:2b")

In [14]:
# create the chain to answer questions 
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                chain_type="stuff", 
                                retriever=retriever, 
                                return_source_documents=True)

## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])


# full example
query = \
    f"As the travel expert, imagine a user is preparing for a trip and asks you about a specific location and timing. " \
    f"Your task is to provide them with essential information, weather advice, and local recommendations for that location. " \
    f"The result should be in a structured format including 'Essentials, precautions, place to visit, recommended hotel and recommended restaurant'. " \
    f"The Travel location is Miami." \
    f"Additional Context and Goals:" \
    f"Upon subsequent user queries, ensure to include personal special needs of having a Adventure trip."

llm_response = qa_chain.invoke(input=query)
final_response = process_llm_response(llm_response)

## Miami Travel Guide

**Essentials:**

* **Location:** Miami, Florida
* **Time of year:** February (mild weather)
* **Season:** High season (March-May)
* **Price range:** $50-$200 per night
* **Accommodation:** The Betsy Hotel (luxury), The Plymouth Miami (mid-range), Freehand Miami Beach (budget)

**Precautions:**

* Miami is hot and humid. Stay hydrated and wear sunscreen.
* The water quality is not ideal for swimming or snorkeling.
* Be aware of the high number of street vendors and avoid walking alone at night.

**Things to do:**

* **Beach days:** Relax on the pristine beaches of South Beach, Ocean Drive, and Coconut Grove.
* **Art and culture:** Explore the Wynwood Walls, Vizcaya Museum & Gardens, and the Miami Design District.
* **Nightlife:** Catch a vibrant performance at The Box Theatre or enjoy live music at a rooftop bar.
* **Adventure activities:** Explore the Everglades by airboat, kayak, or take a nature hike.
* **Culture:** Visit the Perez Art Museum Miami and the Fros

In [30]:
query = "Where is Den Bosch?"
results = vectorDB.similarity_search(query)
results[0].page_content

"''''s-Hertogenbosch''', commonly known as '''Den Bosch''', is a city in the south of the [[Netherlands]] and the capital of the province of [[North Brabant]]. Once a stronghold, vital in the protection of the young Dutch nation, Den Bosch has a charming and well-preserved medieval centre. Wander through the winding streets to see Saint John's Cathedral and then pick out a street terrace on the market square to relax with a chilling beer. Take a boat to see part of the unique Binnendieze, a"

In [31]:
vectorDB

<langchain_community.vectorstores.chroma.Chroma at 0x412735400>

In [46]:
prompt = ChatPromptTemplate.from_template("""
    As the travel expert, imagine a user is preparing for a trip and asks you about a 
    specific location and timing.
    Your task is to provide them with essential information, 
    weather advice, and local recommendations for that location.
    The context for the location is this:
    <context>
    {context}
    </context>
    Here is the location - {input}"""
)