In [1]:
# Import Langchain modules
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

# Other modules and packages
import os
import tempfile
import streamlit as st
import pandas as pd
from dotenv import load_dotenv


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
load_dotenv()

True

In [5]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

## Define our LLM

In [8]:
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)
llm.invoke("Tell me a joke about dogs")

AIMessage(content="Why did the dog sit in the shade?\n\nBecause he didn't want to become a hot dog!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 13, 'total_tokens': 32, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-970cddc8-87e0-4b4a-9193-1210076976f6-0', usage_metadata={'input_tokens': 13, 'output_tokens': 19, 'total_tokens': 32})

## Process PDF document

### Load PDF document

In [19]:
loader = PyPDFLoader("data/carvalho2007.pdf")
pages = loader.load()

In [31]:
print(pages[0].page_content)

Optimal Dynamical Characterization of Entanglement
Andre ´R. R. Carvalho,1,2Marc Busse,1Olivier Brodier,1Carlos Viviescas,1and Andreas Buchleitner1
1Max-Planck-Institut fu ¨r Physik komplexer Systeme, No ¨thnitzer Strasse 38, 01187 Dresden, Germany
2Department of Physics, Faculty of Science, Australian National University, ACT 0200, Australia
(Received 2 April 2006; published 7 May 2007)
We show that, for experimentally relevant systems, there is an optimal measurement strategy to monitor
the time evolution of entanglement under open system dynamics. This suggests an efﬁcient, dynamical
characterization of the entanglement of composite, open quantum systems.
DOI: 10.1103/PhysRevLett.98.190501 PACS numbers: 03.67.Mn, 03.65.Ud, 03.65.Yz, 42.50.Lc
Quantum-information processing requires the ability to
produce entangled states and coherently perform opera-tions on them. Under realistic laboratory conditions, how-
ever, entanglement is degraded through uncontrolled cou-
pling to the environ

In [33]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n\n", " "],
)
chunks = text_splitter.split_documents(pages)

## Create embeddings

In [35]:
def get_embeddings_function():
    embeddings = OpenAIEmbeddings(
        model="text-embedding-ada-002",
        openai_api_key=OPENAI_API_KEY,
    )
    return embeddings

embedding_function = get_embeddings_function()
test_vector = embedding_function.embed_query("cat")

In [38]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator(evaluator="embedding_distance", embeddings=embedding_function)

evaluator.evaluate_strings(prediction="Amnsterdam", reference="coffeeshop")

{'score': 0.17885535636240357}