# Load

## Load Package

In [1]:
import os
from dotenv import load_dotenv

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever
from langchain.embeddings.openai import OpenAIEmbeddings


In [2]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Retrieve The Full Document

## Document Loader

In [3]:
loaders = [
    TextLoader("Amazon_Transcript.txt"),
    TextLoader("Aetna_TextSequence.txt")
]

document = []
for i_document in loaders:
    document.extend(i_document.load())

document

[Document(page_content="Hello, and welcome to our Q4 2016 financial results conference call\nJoining us today to answer your questions is Brian Olsavsky, our CFO\nAs you listen to today's conference call, we encourage you to have our press release in front of you, which includes our financial results as well as metrics and commentary on the quarter\nPlease note, unless otherwise stated, all comparisons in this call will be against our results for the comparable period of 2015. Our comments and responses to your questions reflect management's views as of today, February 2, 2017 only and will include forward-looking statements\nActual results may differ materially\nAdditional information about factors that could potentially impact our financial results is included in today's press release and our filings with the SEC, including our most recent Annual Report on Form 10-K and subsequent filings\nDuring this call, we may discuss certain non-GAAP financial measures\nIn our press release, sli

## Document Splitter

In [4]:
child_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000)



## Embedding And Vector Stores

In [5]:
embedding = OpenAIEmbeddings()

vectorstore = Chroma(
    collection_name = "full_documents",
    embedding_function = embedding
    
)

  warn_deprecated(


## Retriever

In [6]:
store = InMemoryStore()
# * this is to store parent documents

retriever = ParentDocumentRetriever(
    vectorstore = vectorstore,
    docstore = store,
    child_splitter = child_splitter
)


retriever.add_documents(document, ids = ["text_1", "text_2"])



In [7]:
vectorstore.similarity_search("what is the plan of amazon")

[Document(page_content="Our strategy there has been one of bringing a trusted and authentic product to customers in China, both domestically and from international offers\nSo we'll continue to focus on the Global Store there\nAs you may know, we've launched the Prime program that's focused around the availability of international goods in China and that we're pleased with what we're seeing there\nHi, Mark\nOn the unit growth, we're very happy with the 24% unit growth that we saw in Q4 like you mentioned\nOur unit growth has been strong and that's primarily attributable to our Prime program and the customers and members that enjoy that program\nAs Brian mentioned, that 28% is only part of the story\nOur Amazon Fulfilled units, the amount going through our fulfillment centers and which essentially includes our first-party retail and our FBA sales and it grew nearly 40% over 2016. So we're very pleased with those results and happy with the fundamentals of the business from that perspectiv

In [8]:
retriever.get_relevant_documents("what is the plan of amazon")

[Document(page_content="Hello, and welcome to our Q4 2016 financial results conference call\nJoining us today to answer your questions is Brian Olsavsky, our CFO\nAs you listen to today's conference call, we encourage you to have our press release in front of you, which includes our financial results as well as metrics and commentary on the quarter\nPlease note, unless otherwise stated, all comparisons in this call will be against our results for the comparable period of 2015. Our comments and responses to your questions reflect management's views as of today, February 2, 2017 only and will include forward-looking statements\nActual results may differ materially\nAdditional information about factors that could potentially impact our financial results is included in today's press release and our filings with the SEC, including our most recent Annual Report on Form 10-K and subsequent filings\nDuring this call, we may discuss certain non-GAAP financial measures\nIn our press release, sli

In [10]:
a = retriever.get_relevant_documents("what is the plan of amazon")

In [14]:
dir(a[0])

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompose_class',
 '_enfo

# Retrieve Large Chunk

## Document Loader

In [None]:
loaders = [
    TextLoader("Amazon_Transcript.txt"),
    TextLoader("Aetna_TextSequence.txt")
]

document = []
for i_document in loaders:
    document.extend(i_document.load())

document

[Document(page_content="Hello, and welcome to our Q4 2016 financial results conference call\nJoining us today to answer your questions is Brian Olsavsky, our CFO\nAs you listen to today's conference call, we encourage you to have our press release in front of you, which includes our financial results as well as metrics and commentary on the quarter\nPlease note, unless otherwise stated, all comparisons in this call will be against our results for the comparable period of 2015. Our comments and responses to your questions reflect management's views as of today, February 2, 2017 only and will include forward-looking statements\nActual results may differ materially\nAdditional information about factors that could potentially impact our financial results is included in today's press release and our filings with the SEC, including our most recent Annual Report on Form 10-K and subsequent filings\nDuring this call, we may discuss certain non-GAAP financial measures\nIn our press release, sli

## Document Splitter

In [21]:
parent_splitter = RecursiveCharacterTextSplitter(chunk_size = 4000)
child_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000)


## Embedding And Vector Store

In [22]:
vectorstore = Chroma(
    collection_name = "split_parents",
    embedding_function = OpenAIEmbeddings()
)


## Retriever

In [23]:
store = InMemoryStore()

retriever = ParentDocumentRetriever(
    parent_splitter = parent_splitter,
    child_splitter = child_splitter,
    vectorstore = vectorstore,
    docstore = store
)



In [24]:
retriever.add_documents(document)

In [25]:
retriever.get_relevant_documents("what is the plan of amazon")

[Document(page_content="As you may know, we've launched the Prime program that's focused around the availability of international goods in China and that we're pleased with what we're seeing there\nHi, Mark\nOn the unit growth, we're very happy with the 24% unit growth that we saw in Q4 like you mentioned\nOur unit growth has been strong and that's primarily attributable to our Prime program and the customers and members that enjoy that program\nAs Brian mentioned, that 28% is only part of the story\nOur Amazon Fulfilled units, the amount going through our fulfillment centers and which essentially includes our first-party retail and our FBA sales and it grew nearly 40% over 2016. So we're very pleased with those results and happy with the fundamentals of the business from that perspective\nCustomers continue to respond very well to the low prices\nThe vast selection which is helped by the FBA sellers and the strong convenience that we can offer through FREE Two-Day Shipping and the mul