## ChromaDB Vector Database Deployment on AWS
###### Create the vector database and deploy it on AWS

In [1]:
from chromadb.utils import embedding_functions

# Set embedding model
chromadb_embeddings = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set up LangChain's embedding model
from langchain.embeddings import SentenceTransformerEmbeddings

langchain_embeddings = SentenceTransformerEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

In [8]:
# Set up instance on AWS for ChromaDB
from chromadb.config import Settings
import chromadb


client = chromadb.HttpClient(host="15.206.208.236", port="8000")

In [13]:
client_settings = Settings(
        chroma_api_impl="rest",
        chroma_server_host="15.206.208.236",
        chroma_server_http_port="8000",
    )

In [10]:
# Check if working
client.heartbeat()

1689941189253264346

In [11]:
collection = client.create_collection(name="mediData",
                                      embedding_function=chromadb_embeddings)

In [12]:
collection.count()

0

In [14]:
from langchain.vectorstores.chroma import Chroma
vectordb = Chroma(client=client,client_settings=client_settings, collection_name="mediData", embedding_function=langchain_embeddings)

## Create a vectordb

##### Load the dataframe

In [16]:
import pandas as pd
medical_data = pd.read_csv('data\medicalInformation.csv')

In [17]:
medical_data.drop(medical_data.columns[0], axis =1, inplace=True)

In [18]:
#Serialise the dataframe
medical_data.index+=1

In [19]:
from langchain.schema import Document

# Function to convert the rows into document
def create_docs(documents, row):
    document = Document(
        page_content=row['text'],
        metadata={
            'source':row['source'],
            'url':row['url'],
            'disease':row['disease'],
            'rating':5
        }
    )
    documents.append(document)

In [20]:
documents=[]
for i in medical_data.index:
    create_docs(documents, medical_data.loc[i])

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Splitting text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20
)

In [23]:
docs = text_splitter.split_documents(documents)

In [24]:
len(docs)

142

##### Add documents in Vector DB

In [25]:
# To add data in vectordb
def add_document_to_vectordb(collection, doc):    
    collection.add(
        ids=str(collection.count()+1),
        documents=doc.page_content,
        metadatas=doc.metadata
    )

In [26]:
client.heartbeat()

1689942030953362503

In [30]:
collection.count()

0

In [32]:
# Add to vector database
for doc in docs:
    add_document_to_vectordb(collection, doc)

In [33]:
collection.count()

142

In [34]:
print(vectordb._client_settings)

environment='' chroma_db_impl=None chroma_api_impl='rest' chroma_telemetry_impl='chromadb.telemetry.posthog.Posthog' chroma_sysdb_impl='chromadb.db.impl.sqlite.SqliteDB' chroma_producer_impl='chromadb.db.impl.sqlite.SqliteDB' chroma_consumer_impl='chromadb.db.impl.sqlite.SqliteDB' chroma_segment_manager_impl='chromadb.segment.impl.manager.local.LocalSegmentManager' tenant_id='default' topic_namespace='default' is_persistent=False persist_directory='./chroma' chroma_server_host='15.206.208.236' chroma_server_headers=None chroma_server_http_port='8000' chroma_server_ssl_enabled=False chroma_server_grpc_port=None chroma_server_cors_allow_origins=[] anonymized_telemetry=True allow_reset=False migrations='apply'


In [35]:
docs = vectordb.similarity_search('I think I have Covid. What should I do?')

In [36]:
(docs[0].page_content)



#### Use OpenAI to Check

In [37]:
# Set up OpenAI
from langchain.llms import OpenAI
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

llm = OpenAI()

In [38]:
retriever = vectordb.as_retriever()

In [39]:
search_kwargs = {
    'k':1,
    'filter':{
        "$and": [
            {
                "source": 'webmd'
            },
            {
                "rating": {
                    '$gt': 4
                }
            }
        ]
    }
}

In [40]:
from langchain.chains import ConversationalRetrievalChain

qa = ConversationalRetrievalChain.from_llm(llm = llm, retriever = vectordb.as_retriever(search_kwargs=search_kwargs), return_source_documents = True)

In [41]:
chat_history = []
query = "“I have severe headaches, cold and cough. What should I do?”"
result = qa({"question": query, "chat_history": chat_history})

In [42]:
result['answer']

' It is likely that you have a cold, and the best things to do are to rest at home, drink plenty of fluids, and avoid contact with other people. If you are at higher risk of complications, such as if you are very sick or have other medical conditions, it is recommended that you talk to your doctor about taking antiviral medications.'

In [43]:
result['source_documents']

[Document(page_content='more likely to have this with a cold than with the flu.Cough and chest discomfort. This is common with both but tends to be severe with the flu.Stuffy nose. This sometimes happens with the flu but is common with a cold.Sore throat. This can be common with the flu but is more likely with a cold.Headache. This is common when you have the flu but rare with a cold.How to Treat Flu SymptomsIn most cases, the best things to do when you have the flu are:Rest at home.Drink plenty of fluids.Avoid contact with other people.Your doctor might give you prescription medications called antiviral drugs if youâ\x80\x99re at higher risk of complications, such as if you:Are very sick or have other medical conditionsAre younger than 2 or older than 65Have a weak immune systemAre pregnantAre a Native American or an Alaska NativeThese medicines -- baloxavir marboxil (Xofluza), oseltamivir (Tamiflu), peramivir (Rapivab), and zanamivir (Relenza) -- work best when you take them 48 hours

In [44]:
target_document = result['source_documents'][0]

In [45]:
target_document


Document(page_content='more likely to have this with a cold than with the flu.Cough and chest discomfort. This is common with both but tends to be severe with the flu.Stuffy nose. This sometimes happens with the flu but is common with a cold.Sore throat. This can be common with the flu but is more likely with a cold.Headache. This is common when you have the flu but rare with a cold.How to Treat Flu SymptomsIn most cases, the best things to do when you have the flu are:Rest at home.Drink plenty of fluids.Avoid contact with other people.Your doctor might give you prescription medications called antiviral drugs if youâ\x80\x99re at higher risk of complications, such as if you:Are very sick or have other medical conditionsAre younger than 2 or older than 65Have a weak immune systemAre pregnantAre a Native American or an Alaska NativeThese medicines -- baloxavir marboxil (Xofluza), oseltamivir (Tamiflu), peramivir (Rapivab), and zanamivir (Relenza) -- work best when you take them 48 hours 

In [46]:
collection.query(query_texts=target_document.page_content,
                 n_results=1)['ids'][0][0]

'33'