# GPT call

In [None]:
import os

from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
# nbstripout is a tool to remove the output from Jupyter notebooks
!nbstripout --install
!export PYTHONWARNINGS="ignore:NotOpenSSLWarning"
load_dotenv()

In [None]:
from openai import OpenAI

def call_openai_api(path):
    loader = PyPDFLoader(path)
    pages = loader.load_and_split()
    embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
    faiss = FAISS.from_documents(pages, embeddings)
    question = "What is the molecule of the paper?"
    docs_db = faiss.similarity_search(question, k=3)
    client = OpenAI()
    client.api_key = os.environ["OPENAI_API_KEY"]
    return client.chat.completions.create(
        messages=[{"role": "system",
                   "content": "You are a chemist expert in natural products. You give the answer in JSON format: [{\"compoundName\": \"Example Compound Name\", \"bioactivity\": \"Example Bioactivity\", \"species\": \"Example Species\", \"collectionSite\": \"Example Collection Site\", \"isolationType\": \"Example Isolation Type\"}]. Answer user's questions utilizing your background knowledge or the information given below if its not specified leave it empty like \"\""},
                  {"role": "user", "content": str(docs_db[0])}],
        stream=True,
        response_format={"type": "json_object"},
        model="gpt-4-1106-preview",
    )

stream = call_openai_api("pdfs/10.1002@bscb.19810900913.pdf")
for chunk in stream:
    print(chunk.choices[0].delta.content or "", end="")