In [None]:
# Module Installation

!pip install langchain
!pip install openai
!pip install weaviate-client
!pip install tiktoken
!pip install faiss-gpu
!pip install langchain_experimental

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import JSONLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import tiktoken

### How to Get **`WEAVIATE_API_KEY`** and **`WEAVIATE_CLUSTER`** ❓❓

1. Go on [Weaviate console](https://console.weaviate.cloud/) and make Account
2. Go on [Weaviate Dashboard](https://console.weaviate.cloud/dashboard) and click on "Create Index" and make new index
3. After creating index, you can see **Cluster URL**. Click on copy button and copy **Cluster URL**
4. and you can also see **Enabled (Authentication)**. Click on *API keys* Button and copy Your **WEAVIATE_API_KEY**

<br>
<br>

<img alt="weaviate_DB Image" src="./img/weaviate_DB.png">

<br>
<br>

In [None]:
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"            # Go on "https://platform.openai.com/api-keys" and get Your OPENAI_API_KEY
WEAVIATE_API_KEY = "YOUR_WEAVIATE_API_KEY"
WEAVIATE_CLUSTER = "YOUR_WEAVIATE_CLUSTER"

In [None]:
llm_model = "gpt-3.5-turbo"

## 📑 Data Reading

### TXT Loader

In [None]:
txt_file_path = './Data/SyllabusData.json'
loader = TextLoader(file_path=txt_file_path, encoding="utf-8")
data = loader.load()

In [None]:
data

## ✂️ Text Splitting

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(data)

In [None]:
docs

In [None]:
len(docs)

14

## 👨‍💻 Embedding Convertion

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key= OPENAI_API_KEY)

  warn_deprecated(


# 📊 Text to Weaviate DB

In [None]:
import weaviate
from langchain.vectorstores import Weaviate

#Connect to weaviate Cluster
auth_config = weaviate.auth.AuthApiKey(api_key = WEAVIATE_API_KEY)
WEAVIATE_URL = WEAVIATE_CLUSTER

client = weaviate.Client(
    url = WEAVIATE_URL,
    additional_headers = {"X-OpenAI-Api-key": OPENAI_API_KEY},
    auth_client_secret = auth_config,
    startup_period = 10
)

In [None]:
client.is_ready()

True

In [None]:
# define input structure
client.schema.delete_all()
client.schema.get()
schema = {
    "classes": [
        {
            "class": "Chatbot",
            "description": "Documents for chatbot",
            "vectorizer": "text2vec-openai",
            "moduleConfig": {"text2vec-openai": {"model": "ada", "type": "text"}},
            "properties": [
                {
                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-openai": {
                            "skip": False,
                            "vectorizePropertyName": False,
                        }
                    },
                    "name": "content",
                },
            ],
        },
    ]
}

client.schema.create(schema)
vectorstore = Weaviate(client, "Chatbot", "content", attributes=["source"])

In [None]:
# load text into the vectorstore
text_meta_pair = [(doc.page_content, doc.metadata) for doc in docs]
texts, meta = list(zip(*text_meta_pair))
vectorstore.add_texts(texts, meta)

['ffacdcce-b312-4b0a-9fa2-96ca8032b17d',
 '69c4cd39-bb0f-4494-88e9-a40a6fe425ee',
 '3f4a99a9-2e91-44c8-8f7c-55c9b1c6e61a',
 '3c124afa-df23-421b-bb4b-df327f208b10',
 '58e49ac4-8f20-4a63-9c12-99c43b428dcc',
 '2198ab0b-eee2-4ef3-9ae1-1f27767b91e3',
 'ed06db8f-c9c3-4f3c-b106-b8cfc23f9eca',
 '515e9b5e-09be-4933-80df-d49815443157',
 'efbb402b-3054-494a-b907-d4a3aedfc499',
 '4127b29d-5724-44fa-80c3-0bca1e74e215',
 'ebadf4a6-298c-40f3-98fa-3c1fe60ba501',
 '8ec4ca57-f04a-4875-ba58-7405e9b7a404',
 '2ab35189-0202-4c76-91a1-e94fac25b864',
 'b89f7d05-a2eb-4282-8b31-85f715fb6f2e']

In [None]:
import weaviate

# Set these environment variables
URL = WEAVIATE_CLUSTER
APIKEY = WEAVIATE_API_KEY

# Connect to a WCS instance
client = weaviate.connect_to_wcs(
    cluster_url=URL,
    auth_credentials=weaviate.auth.AuthApiKey(APIKEY))

# 🔗 Create conversation chain

In [None]:
llm = ChatOpenAI(temperature=0.7, model_name=llm_model, openai_api_key=OPENAI_API_KEY)
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(),
        memory=memory
        )

  warn_deprecated(


In [None]:
query = "what is a history ?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

  warn_deprecated(


'History is the study of past events, particularly in human societies, through the examination of sources such as artifacts, documents, and records. Historians and archaeologists explore and interpret these sources to understand and explain the development of civilizations, cultures, and societies over time. It helps us learn about our ancestors, their way of life, customs, traditions, and the changes that have occurred in human life.'