In [None]:
!pip install virtualenv
!virtualenv ai_course_env

In [None]:
!pip install accelerate==0.33.0
!pip install apify-client==1.6.4
!pip install arxiv==2.1.0
!pip install duckduckgo_search==5.2.1
!pip install faiss-cpu==1.8.0
!pip install google-api-python-client==2.124.0
!pip install google-cloud-aiplatform==1.60.0
!pip install google-generativeai==0.4.1
!pip install gpt4all==2.3.2
!pip install gradio==3.50.2
!pip install huggingface-hub==0.24.5
!pip install jupyter
!pip install lanarky==0.8.6
!pip install langchain-anthropic==0.1.4
!pip install langchain-chroma==0.1.2
!pip install langchain-cli==0.0.21
!pip install langchain-cohere==0.1.1
!pip install langchain-core==0.1.42
!pip install langchain-experimental==0.0.55
!pip install langchain-google-genai==1.0.1
!pip install langchain-google-vertexai==0.1.2
!pip install langchain-openai==0.1.1
!pip install langchain[docarray]==0.1.13
!pip install langchainhub==0.1.20
!pip install langserve[all]==0.0.51
!pip install numexpr==2.10.0
!pip install opencv-python==4.9.0.80
!pip install pandoc==1.1.0  # needs system install as well!
!pip install pdf2image==1.17.0
!pip install pdfminer.six==20231228
!pip install pikepdf==8.15.1
!pip install pillow_heif==0.16.0
!pip install pymupdf==1.24.1
!pip install pypdf==4.1.0
!pip install replicate==0.30.1
!pip install ruff==0.3.4
!pip install sentence-transformers==2.6.1
!pip install streamlit==1.37.0
!pip install tiktoken==0.6.0
!pip install unstructured==0.15.0
!pip install wikipedia==1.4.0
!pip install wolframalpha==5.0.0

In [None]:
import os
OPENAI_API_KEY = "OPENAI_API_KEY"
# I'm omitting all other keys
def set_environment():
    variable_dict = globals().items()
    for key, value in variable_dict:
        if "API" in key or "ID" in key:
            os.environ[key] = value

In [1]:
# setting the environment variables
import sys
import os

sys.path.insert(0, os.path.abspath('..'))

# from config import set_environment
set_environment()

In [2]:
from langchain_community.document_loaders.wikipedia import WikipediaLoader
loader = WikipediaLoader("Terry Fox")
documents = loader.load()

In [3]:
len(documents)

25

In [4]:
from langchain_text_splitters import CharacterTextSplitter

splitter = CharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=25,
    separator="\nReport"
)
split_docs = splitter.split_documents(documents)

In [5]:
for doc in split_docs:
    print(len(doc.page_content))

3999
3025
4000
2479
4000
4000
4000
3999
4000
4000
4000
4000
3999
4000
4000
4000
4000
3999
4000
4000
4000
4000
2681
4000
4000


In [None]:
!python -m pip install pysqlite3

In [6]:
# setting up sqlite3 for Chroma vector store to use locally
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# import Chroma vector store and HuggingFace embeddings
from langchain_community.vectorstores.chroma import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# compute HF embeddings
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# construct vector store
vectorstore = Chroma.from_documents(documents=split_docs, embedding=hf)

In [26]:
dir(vectorstore)

['_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_client',
 '_client_settings',
 '_collection',
 '_cosine_relevance_score_fn',
 '_embedding_function',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_persist_directory',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aadd_texts',
 'add_documents',
 'add_images',
 'add_texts',
 'adelete',
 'afrom_documents',
 'afrom_texts',

In [7]:
similar_vectors = vectorstore.from_texts("Obama", hf)
similar_vectors_content = similar_vectors.get()

In [8]:
similar_vectors_content.keys()

dict_keys(['ids', 'embeddings', 'metadatas', 'documents', 'uris', 'data', 'included'])

In [9]:
len(similar_vectors_content['documents'])

30

In [10]:
similar_vectors_content['documents']

["Richard Milhous Nixon (January 9, 1913 – April 22, 1994) was  the 37th president of the United States, serving from 1969 to 1974. A member of the Republican Party, he previously served as a representative and senator from California and as the 36th vice president from 1953 to 1961 under President Dwight D. Eisenhower. His presidency saw the reduction of U.S. involvement in the Vietnam War, détente with the Soviet Union and China, the Apollo 11 Moon landing, and the establishment of the Environmental Protection Agency and Occupational Safety and Health Administration. Nixon's second term ended early when he became the only U.S. president to resign from office, as a result of the Watergate scandal.\nNixon was born into a poor family of Quakers in Yorba Linda, Southern California. He graduated from Duke Law School in 1937, practiced law in California, and then moved with his wife Pat to Washington, D.C., in 1942 to work for the federal government. After serving active duty in the Naval 

In [11]:
similar_vectors_content = [item for item in similar_vectors_content['documents'] if len(item) > 5]
len(similar_vectors_content)

25

In [12]:
similar_vectors_content

["Richard Milhous Nixon (January 9, 1913 – April 22, 1994) was  the 37th president of the United States, serving from 1969 to 1974. A member of the Republican Party, he previously served as a representative and senator from California and as the 36th vice president from 1953 to 1961 under President Dwight D. Eisenhower. His presidency saw the reduction of U.S. involvement in the Vietnam War, détente with the Soviet Union and China, the Apollo 11 Moon landing, and the establishment of the Environmental Protection Agency and Occupational Safety and Health Administration. Nixon's second term ended early when he became the only U.S. president to resign from office, as a result of the Watergate scandal.\nNixon was born into a poor family of Quakers in Yorba Linda, Southern California. He graduated from Duke Law School in 1937, practiced law in California, and then moved with his wife Pat to Washington, D.C., in 1942 to work for the federal government. After serving active duty in the Naval 