In [None]:
import os
import json
# from fastapi import FastAPI
# from pydantic import BaseModel
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings, OpenAI
# from langchain_openai import ChatOpenAI
# from langchain.prompts import PromptTemplate
# from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter, RecursiveJsonSplitter
# from langchain.schema import Document
from langchain_community.document_loaders import TextLoader, JSONLoader
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA
from langchain.schema import Document


In [None]:
with open('data/data.json', 'r') as file:
    workshops_data = json.load(file)

workshop_documents = []

for workshop in workshops_data:
    content = json.dumps(workshop, indent=2)
    
    doc = Document(
        page_content=content,
        metadata={
            "title": workshop["title"],
            "duration": workshop["duration"],
            "max_participants": workshop["max_participants"]
        }
    )
    workshop_documents.append(doc)

workshop_documents

[Document(metadata={'title': 'AI Chatbot Workshop', 'duration': '2 hours', 'max_participants': 20}, page_content='{\n  "title": "AI Chatbot Workshop",\n  "duration": "2 hours",\n  "max_participants": 20,\n  "topics": [\n    "What are chatbots and their purposes",\n    "How to start creating a chatbot",\n    "Practical chatbot development"\n  ]\n}'),
 Document(metadata={'title': 'comp science Workshop', 'duration': '5 hours', 'max_participants': 10}, page_content='{\n  "title": "comp science Workshop",\n  "duration": "5 hours",\n  "max_participants": 10,\n  "topics": [\n    "What are comp science subjects and their purposes",\n    "How to start comp science"\n  ]\n}')]

In [68]:

# Load environment variables
load_dotenv()
open_api_key = os.getenv("OPENAI_API_KEY")
if not open_api_key:
    raise ValueError("OPENAI_API_KEY is not set")

In [3]:
loader = TextLoader("data/data.txt", encoding="utf-8")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=0,
    length_function=len
)



In [None]:
docs = text_splitter.split_documents(documents)
docs

[Document(metadata={'source': 'data/data.txt'}, page_content='Azercell is the only company in Azerbaijan and CIS region which has been awarded Platinum'),
 Document(metadata={'source': 'data/data.txt'}, page_content='Certificate of International “Investors in People” Standard. The mobile operator is the only'),
 Document(metadata={'source': 'data/data.txt'}, page_content='company in the country to receive Gold Award in nominations of "Company of the Year" and “The Most'),
 Document(metadata={'source': 'data/data.txt'}, page_content='Innovative Company of the Year” from the International Business Award  STEVIE.')]

In [82]:
docs=workshop_documents

In [83]:
docs

[Document(metadata={'title': 'AI Chatbot Workshop', 'duration': '2 hours', 'max_participants': 20}, page_content='{\n  "title": "AI Chatbot Workshop",\n  "duration": "2 hours",\n  "max_participants": 20,\n  "topics": [\n    "What are chatbots and their purposes",\n    "How to start creating a chatbot",\n    "Practical chatbot development"\n  ]\n}'),
 Document(metadata={'title': 'comp science Workshop', 'duration': '5 hours', 'max_participants': 10}, page_content='{\n  "title": "comp science Workshop",\n  "duration": "5 hours",\n  "max_participants": 10,\n  "topics": [\n    "What are comp science subjects and their purposes",\n    "How to start comp science"\n  ]\n}')]

In [84]:
embeddings = OpenAIEmbeddings()
faiss_search = FAISS.from_documents(docs, embeddings)

In [88]:
query1 = "what is the duration of the AI Chatbot Workshop?"
query1_answer = faiss_search.similarity_search(query1)

In [89]:
query1_answer
#query1_answer[0].page_content

[Document(id='fb37440d-bdb5-4006-82c9-611071ce788e', metadata={'title': 'AI Chatbot Workshop', 'duration': '2 hours', 'max_participants': 20}, page_content='{\n  "title": "AI Chatbot Workshop",\n  "duration": "2 hours",\n  "max_participants": 20,\n  "topics": [\n    "What are chatbots and their purposes",\n    "How to start creating a chatbot",\n    "Practical chatbot development"\n  ]\n}'),
 Document(id='b12415b5-b82a-42b2-a71e-4127b95ecf6e', metadata={'title': 'comp science Workshop', 'duration': '5 hours', 'max_participants': 10}, page_content='{\n  "title": "comp science Workshop",\n  "duration": "5 hours",\n  "max_participants": 10,\n  "topics": [\n    "What are comp science subjects and their purposes",\n    "How to start comp science"\n  ]\n}')]

In [90]:
docs_and_scores = faiss_search.similarity_search_with_score(query1)
docs_and_scores

[(Document(id='fb37440d-bdb5-4006-82c9-611071ce788e', metadata={'title': 'AI Chatbot Workshop', 'duration': '2 hours', 'max_participants': 20}, page_content='{\n  "title": "AI Chatbot Workshop",\n  "duration": "2 hours",\n  "max_participants": 20,\n  "topics": [\n    "What are chatbots and their purposes",\n    "How to start creating a chatbot",\n    "Practical chatbot development"\n  ]\n}'),
  np.float32(0.2670169)),
 (Document(id='b12415b5-b82a-42b2-a71e-4127b95ecf6e', metadata={'title': 'comp science Workshop', 'duration': '5 hours', 'max_participants': 10}, page_content='{\n  "title": "comp science Workshop",\n  "duration": "5 hours",\n  "max_participants": 10,\n  "topics": [\n    "What are comp science subjects and their purposes",\n    "How to start comp science"\n  ]\n}'),
  np.float32(0.3904156))]

In [91]:
retreiver = faiss_search.as_retriever()
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retreiver)

In [92]:
retreiver_query = "what is the duration of the compscience workshop?"
results = qa.invoke(retreiver_query)

In [93]:
results

{'query': 'what is the duration of the compscience workshop?',
 'result': ' The duration of the comp science workshop is 5 hours.'}