# Chatbot with different vector store options (AstraDB and Neo4j)

## Install libraries

In [2]:
!pip install langchain openai tiktoken cassio neo4j

Collecting langchain
  Using cached langchain-0.0.295-py3-none-any.whl (1.7 MB)
Collecting openai
  Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Collecting tiktoken
  Using cached tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
Collecting cassio
  Downloading cassio-0.1.1-py3-none-any.whl (27 kB)
Collecting neo4j
  Downloading neo4j-5.12.0.tar.gz (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.9/190.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)
Collecting langsmith<0.1.0,>=0.0.38 (from langchain)
  Downloading langsmith-0.0.38-py3-none-any.whl (38 kB)
Collecting cassa

## Setup OpenAI

In [4]:
import openai
import getpass
import os

os.environ['OPENAI_API_KEY']  = getpass.getpass('Your secret for LLM provider OpenAI: ')

Your secret for LLM provider OpenAI: ··········


## Setup OpenAI model

In [5]:
ft_model = "gpt-3.5-turbo-0613"

## Upload a text file that contains context

In [34]:
from google.colab import files

print('Please upload a txt file that contains context for the chatbot')
uploaded = files.upload()
if uploaded:
    contextFileTitle = list(uploaded.keys())[0]
    SAMPLEDATA_PATH = os.path.join(os.getcwd(), contextFileTitle)
else:
    raise ValueError(
        'Cannot proceed without a context txt file. Please re-run the cell.'
    )

Please upload a txt file that contains context for the chatbot


Saving usa.txt to usa (1).txt


## Astra DB connection

In [29]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

In [30]:
#upload secure connect bundle
print('Please upload your Secure Connect Bundle')
uploaded = files.upload()
if uploaded:
    astraBundleFileTitle = list(uploaded.keys())[0]
    SECURE_CONNECT_BUNDLE_PATH = os.path.join(os.getcwd(), astraBundleFileTitle)
else:
    raise ValueError(
        'Cannot proceed without Secure Connect Bundle. Please re-run the cell.'
    )

Please upload your Secure Connect Bundle


Saving secure-connect-vector-search-db.zip to secure-connect-vector-search-db (1).zip


In [31]:
ASTRA_DB_TOKEN_BASED_USERNAME = getpass.getpass('What Astra DB token username do you want to use? ')

What Astra DB token username do you want to use? ··········


In [32]:
ASTRA_DB_TOKEN_BASED_PASSWORD = getpass.getpass('What Astra DB token password do you want to use? ')

What Astra DB token password do you want to use? ··········


In [39]:
ASTRA_DB_KEYSPACE = input(f'What Astra DB keyspace do you want to use?')

What Astra DB keyspace do you want to use?finetuning


In [33]:
cloud_config = {
   'secure_connect_bundle': SECURE_CONNECT_BUNDLE_PATH
}
auth_provider = PlainTextAuthProvider(ASTRA_DB_TOKEN_BASED_USERNAME, ASTRA_DB_TOKEN_BASED_PASSWORD)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect()

ERROR:cassandra.connection:Closing connection <AsyncoreConnection(135802358576416) 124d3bb1-d384-430d-9af5-cb6ee656fe28-us-east1.db.astra.datastax.com:29042:b76400fb-5d89-4041-8fac-032b8afcdffd> due to protocol error: Error from server: code=000a [Protocol error] message="Beta version of the protocol used (5/v5-beta), but USE_BETA flag is unset"


## Reading and chunking the provided context file

In [8]:
#Import the needed libraries and declare the LLM model
import langchain
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader

# read the documents into a list called docs
from langchain.document_loaders import TextLoader
loader = TextLoader(SAMPLEDATA_PATH)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

print(f'Docs created, it has {len(docs)} elements')




Docs created, it has 42 elements


## Configure the embedding function

In [12]:
from langchain.embeddings import OpenAIEmbeddings

# creation of the LLM resources
embedding_function = OpenAIEmbeddings()

## Store and index the context with Astra DB

In [45]:
from langchain.vectorstores import Cassandra

ASTRA_DB_TABLE_NAME = 'vdocuments'

astra_vector = Cassandra.from_documents(
    documents=docs,
    embedding=embedding_function,
    session=session,
    keyspace=ASTRA_DB_KEYSPACE,
    table_name=ASTRA_DB_TABLE_NAME,
)

## Store and index the context with Neo4j

In [13]:
NEO4J_URL = input(f'What is your Neo4j URL (neo4j+s://.databases.neo4j.io)?')
NEO4J_PASSWORD = getpass.getpass('What Neo4j password do you want to use? ')

What is your Neo4j URL (neo4j+s://.databases.neo4j.io)?neo4j+s://27828b0d.databases.neo4j.io
What Neo4j password do you want to use? ··········


In [15]:
from langchain.vectorstores.neo4j_vector import Neo4jVector

# Neo4j Aura credentials
url=NEO4J_URL
username="neo4j"
password=NEO4J_PASSWORD

# Instantiate Neo4j vector from documents
neo4j_vector = Neo4jVector.from_documents(
    documents=docs,
    embedding=embedding_function,
    url=url,
    username=username,
    password=password
)

## Similarity search with Astra DB and Neo4J

In [43]:
# similarity search:
prompt = "Which documents do Germans require to enter the USA?"

# matched_docs is a list with the found documents from the similarity search
matched_docs_astra = astra_vector.similarity_search(prompt, k=2)
matched_docs_neo4j = neo4j_vector.similarity_search(prompt, k=2)
# for each of the found documents, print the content
print(f"\n## Astra DB similarity search")
for i, d in enumerate(matched_docs_astra):
    print(f"\n## Document {i}\n")
    print(d.page_content)
print(f"\n\n## Neo4J similarity search")
for i, d in enumerate(matched_docs_neo4j):
    print(f"\n## Document {i}\n")
    print(d.page_content)


## Astra DB similarity search

## Document 0

Bitte erkundigen Sie sich ggf. vor Reiseantritt bei Ihrer Fluggesellschaft.
Führen Sie Ihren Reisepass bzw. eine Kopie Ihres Reisepasses mit Einreisestempel oder Visum ständig mit sich. In einigen Staaten (z.B. Louisiana) ist dies sogar Pflicht.
Bewahren Sie eine Kopie Ihrer Reisedokumente separat an einem sicheren Ort auf.
Visum
Als Teilnehmer am U.S.-Visa Waiver Programm können deutsche Staatsangehörige zu Zwecken des Tourismus, für Geschäftsreisen oder im Transit visafrei in die USA einreisen, sofern sie über einen elektronischen Reisepass (e-Pass mit Chip), eine gültige elektronische Einreisegenehmigung (ESTA) sowie ein gültiges Rück- oder Weiterflugticket verfügen. Es gibt folgende Ausnahmen von der Teilnahme am U.S.-Visa Waiver-Programm sowie Ausnahmen bei Reisenden mit Bezug zu Kuba, s. Reisen nach Kuba.

## Document 1

Bitte erkundigen Sie sich ggf. vor Reiseantritt bei Ihrer Fluggesellschaft.
Führen Sie Ihren Reisepass bzw. eine K

## Chatbot with Astra DB and Neo4J

In [23]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

In [24]:
def remove_before_colon(input_string):
    parts = input_string.split(":", 1)
    if len(parts) > 1:
        return parts[1].lstrip()
    else:
        return input_string

In [46]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

systemTemplate = """
You are a chatbot supporting users with questions around traveling to foreign countries.

context:
{context}

chat history:
{chat_history}
"""
systemMessagePrompt = SystemMessagePromptTemplate.from_template(systemTemplate)
humanTemplate = "<tag>{question}<tag>"
humanMessagePrompt = HumanMessagePromptTemplate.from_template(humanTemplate)

cassChatPrompt = ChatPromptTemplate.from_messages(
    [systemMessagePrompt, humanMessagePrompt]
)

memoryAstra = ConversationBufferMemory(
    memory_key='chat_history',
    return_messages=True,
    output_key='answer'
)

memoryNeo4j = ConversationBufferMemory(
    memory_key='chat_history',
    return_messages=True,
    output_key='answer'
)

qaAstra = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.8, model_name=ft_model), astra_vector.as_retriever(), memory=memoryAstra, return_source_documents=True, combine_docs_chain_kwargs={"prompt": cassChatPrompt})
qaNeo4j = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.8, model_name=ft_model), neo4j_vector.as_retriever(), memory=memoryNeo4j, return_source_documents=True, combine_docs_chain_kwargs={"prompt": cassChatPrompt})

print(f"## Astra chat")
print("query='Which documents do Germans require to enter the USA?'")
query='Which documents do Germans require to enter the USA?'
result = qaAstra({"question": query})
print("Answer:")
print(remove_before_colon(result["answer"]))

print(f"\n\n## Neo4j chat")
print("query='Which documents do Germans require to enter the USA?'")
query='Which documents do Germans require to enter the USA?'
result = qaNeo4j({"question": query})
print("Answer:")
print(remove_before_colon(result["answer"]))



## Astra chat
query='Which documents do Germans require to enter the USA?'
Answer:
1. A valid passport: German citizens must have a valid passport to travel to the USA.
2. Electronic Passport (e-Pass): German citizens must have a passport with an electronic chip (e-Pass).
3. Electronic System for Travel Authorization (ESTA): German citizens traveling under the Visa Waiver Program (VWP) must obtain a valid ESTA before their trip. ESTA is an online system that determines eligibility to travel to the USA.
4. Return or onward ticket: German citizens must have a valid return or onward ticket to show proof of their planned departure from the USA.

Please note that there are exceptions to the Visa Waiver Program and additional requirements for travelers with connections to Cuba. It is advisable to check with your airline or the US embassy before your trip.


## Neo4j chat
query='Which documents do Germans require to enter the USA?'
Answer:
1. Valid passport: German citizens must have a valid 