In [33]:
import os
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from operator import itemgetter

In [34]:
load_dotenv()
OPEN_API_KEY = os.getenv("OPEN_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
MODEL = "gpt-4o" # chat model
# MODEL = "llama3" # completion model

In [35]:
if "gpt" in MODEL:
    parser = StrOutputParser()
    model = ChatOpenAI(model=MODEL, api_key=OPEN_API_KEY)
    embeddings = OpenAIEmbeddings()
    INDEX = "open-source-rag-index"
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)
    INDEX = "open-source-rag-index-llama3"

chain = model | parser
chain.invoke("Tell me a joke")

"Here's one:\n\nWhy couldn't the bicycle stand up by itself?\n\n(Wait for it...)\n\nBecause it was two-tired!\n\nHope that made you smile! Do you want to hear another one?"

In [36]:
pdf_loader = PyPDFLoader("Pramita Data Analyst Resume.pdf")
pages = pdf_loader.load_and_split()
pages[1]

Document(page_content='● Leveraged Airflow DAGs  for creating seamless ETL pipeline to automate and optimize data extraction from 5 distinct AWS S3  \nsources  \n● Incorporated  data preprocessing techniques like  clean ing, normalization, and transformation for over 50,000 records into Azure \nCosmos DB  \n● Automated the generation of reports using Power  BI and optimized with DAX queries  to reduc e report preparation time by 30%  \nAdvanced Labor Data Analytics and Visualization  | Python,  Alteryx,  Grafana, MySQL                                                                Nov 2022  \n● Engineered  Python  scripts  for API integration, fetch ing 30+ years of labour statistics JSON data from BLS API to drive \ncomprehensive economic trend analysis  \n● Developed a custom Alteryx plugin in Python to adeptly import, cleanse, and manipulate over 100,000 rows of data  \n● Performed data analysis and visualization of labor statistics, highlighting economic trends and patterns using G

In [37]:
template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

In [38]:
chain = prompt | model | parser
chain.invoke({
    "context":"I am deril",
    "question": "what's my last name?"
})

"I don't know"

In [39]:
pinecone = PineconeVectorStore.from_documents(
    pages, embedding=embeddings, index_name=INDEX
)

In [40]:
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)
chain.invoke("How many projects do i have?")

'I can answer that! According to the context, you have at least 4 academic projects:\n\n1. IMDb Movies Data Analysis\n2. Multiple Disease Prediction System - Modern Shaman\n3. Food Delivery Management System\n4. Advanced Labor Data Analytics and Visualization (this one seems to be a part of your professional experience rather than an academic project)\n\nThere might be more projects mentioned in the context, but these 4 are explicitly listed as academic projects.'

In [41]:
chain = (
    {"context": itemgetter("question") | pinecone.as_retriever(), "question": itemgetter("question")}
    | prompt
    | model
    | parser
)
chain.invoke({"question":"How many projects do i have?"})

'Based on the context provided, I can see that you have mentioned several academic projects and professional experience. Here are the specific project mentions:\n\n1. IMDB Movies Data Analysis\n2. Multiple Disease Prediction System - Modern Shaman\n3. Food Delivery Management System\n\nThese three are explicitly mentioned as projects in your resume.'