In [None]:
%pip install faiss-cpu --quiet
# %pip install faiss-gpu --quiet

In [None]:
%pip install dotenv langchain_community langchain langchain_openai --quiet --ignore-installed

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
for key in ['OPENAI_API_KEY']:
    if not key in os.environ:
        try:
            from google.colab import userdata
            os.environ[key] = userdata.get(key)
        except:
            print(f"You need to set the {key} key either in colab or in a .env var.")

In [None]:
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

documents = [
    "James Phoenix worked at JustUnderstandingData.",
    "James phoenix currently is 31 years old.",
    "Data engineering is the designing and building systems for collecting, storing, and analysing data at scale.",
]

vectorstore = FAISS.from_texts(texts=documents, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
---
Context: {context}
---
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

In [None]:
# Input to this chain is just a  string "What is data engineering"
chain = (
    # We build a dictionary as input to the prompt template
    {
        # We add the key 'context' and set ist value
        "context": retriever,
        # We add the key 'question' and set it to the input of the chain
        "question": RunnablePassthrough()
    }
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
res = chain.invoke("What is data engineering?")
import textwrap
print(textwrap.fill(res))

In [None]:
res = chain.invoke("Who is James Phoenix?")
print(textwrap.fill(res))

In [None]:
res = chain.invoke("What is the president of the US?") # Testing for fake knowledge
print(textwrap.fill(res))