## Imports

In [1]:
import os
from dotenv import load_dotenv
import openai
from langchain import OpenAI
from langchain.llms import AzureOpenAI
from langchain.retrievers import AzureCognitiveSearchRetriever
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import AzureOpenAI
from langchain.chains import RetrievalQA

## Connection Strings

In [2]:
# Azure
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
print(OPENAI_API_KEY)

OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")
OPENAI_EMBEDDING_VERSION = os.getenv("OPENAI_EMBEDDING_VERSION")
OPENAI_SIMILARITY_DEPLOYMENT_NAME = os.getenv("OPENAI_SIMILARITY_DEPLOYMENT_NAME")

# cognitive service
vector_store_address = os.getenv("VECTOR_STORE_ADDRESS")
vector_store_password = os.getenv("VECTOR_STORE_PASSWORD")
AZURE_COGNITIVE_SEARCH_SERVICE_NAME = os.getenv("AZURE_COGNITIVE_SEARCH_SERVICE_NAME")
AZURE_COGNITIVE_SEARCH_INDEX_NAME = os.getenv("AZURE_COGNITIVE_SEARCH_INDEX_NAME")
AZURE_COGNITIVE_SEARCH_API_KEY = os.getenv("AZURE_COGNITIVE_SEARCH_API_KEY")

#init Azure OpenAI
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

load_dotenv()


8c81bcc4d96147ea87e99e7ff66528b1


True

## Testing Connectivity


In [3]:
# using model engine for testing the connectivity OpenAI
llm = OpenAI(engine=OPENAI_MODEL_NAME, temperature=0)
print(llm('tell me about yourself'))

                engine was transferred to model_kwargs.
                Please confirm that engine is what you intended.


” question. It’s a great way to start a conversation and get to know someone better. It’s also a great way to learn about someone’s interests, hobbies, and passions. So, if you’re looking for some fun and interesting questions to ask someone, here are 50 questions to get you started.

1. What’s your favorite thing to do in your free time?

2. What’s your favorite book?

3. What’s your favorite movie?

4. What’s your favorite TV show?

5. What’s your favorite song?

6. What’s your favorite band?

7. What’s your favorite food?

8. What’s your favorite restaurant?

9. What’s your favorite place to travel?

10. What’s your favorite thing about your job?

11. What’s your favorite thing about your family?

12. What’s your favorite thing about your friends?

13. What’s your favorite thing about yourself?

14. What’s your favorite thing to do on a weekend?

15. What’s your favorite thing to do on a vacation?

16. What’s your favorite thing to do on a rainy day?

17. What’s your favorite thing 

In [4]:
# using model engine for testing the connectivity AzureOpenAI

llm = AzureOpenAI(engine=OPENAI_DEPLOYMENT_NAME, temperature=0)
print(llm('define large language model'))

                engine was transferred to model_kwargs.
                Please confirm that engine is what you intended.


} (GPT-2) \cite{radford2019language} and \textit{BERT} \cite{devlin2018bert} have been shown to be effective in a wide range of NLP tasks. However, these models are computationally expensive and require large amounts of data to train. In addition, they are not designed to handle the complexity of scientific text, which often contains domain-specific terminology and complex sentence structures. 

To address these challenges, several recent studies have proposed domain-specific language models that are pre-trained on scientific text. For example, \textit{SciBERT} \cite{beltagy2019scibert} is a BERT-based model that is pre-trained on scientific text and has been shown to outperform BERT on several scientific NLP tasks. Similarly, \textit{BioBERT} \cite{lee2020biobert} is a BERT-based model that is pre-trained on biomedical text and has been shown to outperform BERT on several biomedical NLP tasks. 

In addition to pre-trained language models, several recent studies have proposed task-spec

## Load Documents

In [5]:
# load documents
from langchain.document_loaders import DirectoryLoader
loader = DirectoryLoader('data/', glob='*.pdf', show_progress=True)
documents = loader.load()

 33%|████████████████████████████                                                        | 1/3 [00:05<00:10,  5.48s/it]

ImportError: partition_pdf is not available. Install the pdf dependencies with pip install "unstructured[pdf]"

In [None]:
# split documents into chunks
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)	

## Create Embeddings

In [None]:
embeddings = OpenAIEmbeddings()
embeddings

## Create Vector Search and perform similarity search

In [None]:
# vector search
from langchain.vectorstores.azuresearch import AzureSearch


index_name: str = " azureblob-index"
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=OPENAI_EMBEDDING_DEPLOYMENT_NAME, chunk_size=1)
vector_store: AzureSearch = AzureSearch(
     azure_search_endpoint = vector_store_address,
azure_search_key=vector_store_password,
index_name=index_name,
embedding_function=embeddings.embed_query,
)
list_of_docs = vector_store.add_documents(documents=docs)


In [None]:
# Perform a similarity search
docs_search = vector_search.similarity_search(
    query="What are some good places in goa to visit in December",
    k=3,
    search_type="similarity",
)
print(docs_search[0].page_content)


## Create Chain using llm - AzureOpenAI

In [None]:
chain = RetrievalQA.from_chain_type(llm=AzureOpenAI(deployment_name=OPENAI_DEPLOYMENT_NAME,model=OPENAI_DEPLOYMENT_NAME),chain_type="stuff", retriever=vector_search.as_retriever())
chain

## Querying Data

In [None]:
query = " What are some good places in goa to visit in December"
chain.run(query)