In [None]:
!pip install python-dotenv

In [1]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone

load_dotenv("../.env")

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

pinecone_client = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("../data").load_data()

In [3]:
import openai
from llama_index.embeddings.openai import OpenAIEmbedding
embed_model = OpenAIEmbedding()

# ingestion pipeline
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
)

from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.pinecone import PineconeVectorStore

pinecone_index = pinecone_client.Index("jobqa")
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=100),
        embed_model
    ]
)

pipeline.run(documents=documents)

Upserted vectors:   0%|          | 0/1 [00:00<?, ?it/s]

[TextNode(id_='055f536c-b884-4b33-9222-ac1929f08e4c', embedding=[-0.022852640599012375, -0.008047681301832199, 0.01981394737958908, -0.014360949397087097, 0.004204219672828913, 0.015290595591068268, -0.035160042345523834, 0.004977768752723932, -0.023796161636710167, -0.020563215017318726, 0.030803196132183075, 0.0012106210924685001, 0.0009443885064683855, -0.0012002146104350686, 0.002488884376361966, -0.015013088472187519, 0.0021298606880009174, -0.021340232342481613, -0.005008988082408905, -0.02833339013159275, -0.02947116456925869, 0.0037047085352241993, -0.006889093201607466, 0.0023952259216457605, -0.0225751344114542, 0.017344141378998756, 0.0011507837334647775, -0.02603008784353733, -0.0038469305727630854, -0.006115544121712446, -0.002561729634180665, 0.00037983671063557267, -0.027958756312727928, -0.006049636751413345, -0.01635899394750595, 0.0012643878581002355, -0.004273596685379744, -0.004627416841685772, 0.03185771778225899, 0.007818738929927349, 0.02682098001241684, 0.013722

In [8]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

retriever = VectorIndexRetriever(index=index, similarity_top_k=3)
query_engine = RetrieverQueryEngine(retriever=retriever)

response = query_engine.query("What is this person's job history?")
print(response)

This person's job history includes working as a Software Development Engineer at Amazon Web Services Pte. Ltd. from March 2022 to the present, and as a Software Engineer at PayPal Pte. Ltd. from June 2020 to February 2022. Additionally, they worked as a Software Engineer Intern at PayPal Pte. Ltd. from May 2019 to August 2019.


In [7]:
# Using retriever to fetch the most relevant context given a user query

pinecone_retriever = index.as_retriever(
    retriever_mode="llm",
)

In [8]:
nodes = pinecone_retriever.retrieve("What is this person's job history?")
nodes

[NodeWithScore(node=TextNode(id_='f29b8825-0a58-4d1f-9aca-b3ffc7c81a18', embedding=[-0.0228511691, -0.00808185, 0.0198542941, -0.0143322758, 0.00429760152, 0.0152757363, -0.0351577811, 0.00497397967, -0.0237946305, -0.0205618907, 0.0307734627, 0.00118279434, 0.00089576724, -0.00117238844, 0.00249045831, -0.0149566242, 0.00215053512, -0.0213249829, -0.00498438533, -0.0283454396, -0.0294415187, 0.00375649915, -0.0069233356, 0.00231182529, -0.0226291791, 0.0173014011, 0.00114984252, -0.0259867888, -0.00388136902, -0.00608393317, -0.00252861297, 0.000373958959, -0.0278875846, -0.00607699575, -0.0162746944, 0.00127644662, -0.00421435479, -0.00467914809, 0.0319111645, 0.00781823602, 0.0268053785, 0.0137287388, -0.00684355758, -0.0191328246, -0.0214359779, 0.0198126715, -0.00539714936, -0.0218105875, 0.00549427047, 0.0464238077, 0.00355185149, 0.00480054924, -0.0148872528, -0.00103017571, 0.0046097762, -0.000913110271, -0.00429760152, 0.0287755467, -0.0274713505, -0.0148595041, 0.0131113268, 

In [10]:
# Web Reader
from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
import os

web_document = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://www.amazon.jobs/en/jobs/2722768/associate-solutions-architect"]
)

In [11]:
web_document

[Document(id_='https://www.amazon.jobs/en/jobs/2722768/associate-solutions-architect', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="Skip to main content\n\n  * [Home](/en)\n\n  * [Your job application](https://www.amazon.jobs/applicant/check_application_status)\n\n  * [Amazon culture & benefits](/en/landing_pages/working-at-amazon)\n\n  * [Diversity at Amazon](/en/diversity)\n\n  * [Locations](/en/locations)\n\n  * [Teams](/en/business_categories)\n\n  * [Job categories](/en/job_categories)\n\n  * Resources\n    * [Interview tips](/en/landing_pages/interviewing-at-amazon#onlineapp)\n    * [Disability accommodations](/en/disability/us)\n    * [About Amazon](https://blog.aboutamazon.com)\n\n  * [FAQ](/faqs)\n\n[×](javascript:void\\(0\\))\n\n# Associate Solutions Architect\n\nJob ID: 2722768 | Amazon Web Services Singapore Private Limited\n\n[Apply now](https://www.amazon.jobs/applicant/jobs/2722768/apply)\n\n## DESCR

In [16]:
# text document

from llama_index.core import Document
text = '''
DESCRIPTION
The AWS Solutions Architect (SA) team is a dynamic, collaborative group that partners with customers to design and implement cutting-edge cloud infrastructures and applications. As a diverse and inclusive organization, we're passionate about empowering all technologists to thrive and make a real impact.

Do you have a curious mind and a passion for problem-solving? Are you eager to dive deep into the latest cloud computing innovations and guide customers to capitalize on them? As an AWS Solutions Architect, you'll receive comprehensive technical training, immerse yourself in cloud concepts, and develop valuable business acumen.

#AUTA-sg-2025



Key job responsibilities
You'll have the opportunity to:

* Work closely with a supportive team of accomplished cloud experts, tapping into their knowledge and mentorship
* Take on stimulating technical challenges, using your creativity to deliver innovative customer solutions
* Develop strong communication and presentation skills as you evangelize AWS capabilities to a wide range of stakeholders
* Contribute to our diverse, inclusive culture where different perspectives are celebrated

If you're an aspiring technologist looking to accelerate your career in the cloud, we encourage you to join our team. Together, we can build the future of cloud computing and make a real difference for our customers.

At AWS, we believe the most innovative cloud solutions come from diverse teams with varied backgrounds and perspectives. We encourage applicants from all educational and professional paths to apply, as long as you have the technical aptitude and passion to learn. If this opportunity aligns with your skills and aspirations, we welcome you to join our dynamic team.

BASIC QUALIFICATIONS
*This is a full-time fresh graduate role for students who are graduating in 2024/2025
*Degree in Computer Science, Engineering, Mathematics, Technology, or a related scientific/technical field - OR - equivalent training, certifications, and/or experience, regardless of your academic background (0 - 1 years)
* Genuine curiosity and aptitude for learning about cloud-based solutions and how to deliver them to customers
* Familiarity with at least one programming language, or a strong interest and ability to quickly pick up coding skills

PREFERRED QUALIFICATIONS
* Demonstrated ability to adapt to new technologies and learn quickly, even if you don't have prior experience
* Understanding of IT fundamentals, including topics like security, networking, storage, databases (relational and/or NoSQL), and operating systems (Unix, Linux, and/or Windows)
* Knowledge of core AWS services such as EC2, ALB, RDS, VPC, Route53, and S3
* AWS Cloud Practitioner or/and Associate Solutions Architect Certificate
* Proficiency in any of the following languages: Malay-Bahasa, Indonesia-Bahasa, Thai, Vietnamese, Japanese, Korean - including the ability to speak, write and understand the language effectively'''

text_document = Document(text=text)

text_document

Document(id_='ca4cb320-8369-4c4d-abaf-f029db2aae48', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="\nDESCRIPTION\nThe AWS Solutions Architect (SA) team is a dynamic, collaborative group that partners with customers to design and implement cutting-edge cloud infrastructures and applications. As a diverse and inclusive organization, we're passionate about empowering all technologists to thrive and make a real impact.\n\nDo you have a curious mind and a passion for problem-solving? Are you eager to dive deep into the latest cloud computing innovations and guide customers to capitalize on them? As an AWS Solutions Architect, you'll receive comprehensive technical training, immerse yourself in cloud concepts, and develop valuable business acumen.\n\n#AUTA-sg-2025\n\n\n\nKey job responsibilities\nYou'll have the opportunity to:\n\n* Work closely with a supportive team of accomplished cloud experts, tapping into their know

In [18]:
index = VectorStoreIndex.from_documents([text_document])

query_engine = index.as_query_engine()

response = query_engine.query("What are the preferred qualifications")
response

Response(response='The preferred qualifications include the ability to adapt to new technologies quickly, understanding of IT fundamentals such as security and networking, knowledge of core AWS services, possession of AWS Cloud Practitioner or Associate Solutions Architect Certificate, and proficiency in languages like Malay-Bahasa, Indonesia-Bahasa, Thai, Vietnamese, Japanese, or Korean.', source_nodes=[NodeWithScore(node=TextNode(id_='2325d843-2e82-4e75-b35c-2c5b72f137aa', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ca4cb320-8369-4c4d-abaf-f029db2aae48', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='9a58a5fa77dac1cad9dca971c3a9d12e64f04da1a57f8776c2deadb52be758e8')}, text="DESCRIPTION\nThe AWS Solutions Architect (SA) team is a dynamic, collaborative group that partners with customers to design and implement cutting-edge cloud infrastructures and applica

In [19]:
response.response

'The preferred qualifications include the ability to adapt to new technologies quickly, understanding of IT fundamentals such as security and networking, knowledge of core AWS services, possession of AWS Cloud Practitioner or Associate Solutions Architect Certificate, and proficiency in languages like Malay-Bahasa, Indonesia-Bahasa, Thai, Vietnamese, Japanese, or Korean.'

In [22]:
index = VectorStoreIndex.from_documents(web_document)

query_engine = index.as_query_engine()

response = query_engine.query("What are the preferred qualifications")
response

Response(response="Demonstrated ability to adapt to new technologies and learn quickly, even if you don't have prior experience, understanding of IT fundamentals including security, networking, storage, databases, and operating systems, knowledge of core AWS services, AWS Cloud Practitioner or/and Associate Solutions Architect Certificate, proficiency in specific languages including the ability to speak, write, and understand effectively.", source_nodes=[NodeWithScore(node=TextNode(id_='ab99a3ab-cc15-4a93-997a-0226f746b5db', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='https://www.amazon.jobs/en/jobs/2722768/associate-solutions-architect', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='0dd60c2d367ddb8f8c2186e429111eea5a79a9073102f060a4346e7a80813efd'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='ea740e57-e8e7-4e1b-af98-784edcf89425', node_type

In [23]:
response.response

"Demonstrated ability to adapt to new technologies and learn quickly, even if you don't have prior experience, understanding of IT fundamentals including security, networking, storage, databases, and operating systems, knowledge of core AWS services, AWS Cloud Practitioner or/and Associate Solutions Architect Certificate, proficiency in specific languages including the ability to speak, write, and understand effectively."