In [2]:
import json
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import os

  from tqdm.autonotebook import tqdm


In [3]:
load_dotenv()

True

In [4]:
with (open('../data.json', 'r')) as file:
    resume_data = json.load(file)

In [5]:
resume_data

{'summary': 'A dedicated Electronics and Telecommunication professional with a strong background in machine learning, web development, and cloud platforms. Proven ability to lead projects, solve complex problems, and continuously learn new technologies to drive innovation and efficiency.',
 'contact': {'name': 'Aneesh Patne',
  'email': 'aneeshpatne12@gmail.com',
  'linkedin': 'https://www.linkedin.com/in/aneeshpatne',
  'github': 'https://github.com/aneeshpatne',
  'leetcode': 'https://leetcode.com/aneeshpatne'},
 'education': [{'degree': 'M.Tech in Electronics and Telecommunication',
   'institution': 'Veermata Jijabai Technological Institute',
   'location': 'Mumbai, Maharashtra',
   'duration': '2023 - 2025',
   'details': 'Specialized in Machine Learning and Signal Processing. Relevant coursework includes Advanced Algorithms, Neural Networks, and Communication Systems.'},
  {'degree': 'B.Tech in Electronics and Telecommunication',
   'institution': 'Thakur College of Engineering a

In [4]:
resume_text = json.dumps(resume_data, indent=2)

In [9]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [15]:
chunks =[]

In [18]:
chunks.append({
    "id": "summary",
    "text":resume_data['summary'],
    "metadata": {"section": "summary"}

})

In [21]:
contact_text = json.dumps(resume_data["contact"], indent=2)
chunks.append({
        "id": "contact",
        "text": contact_text,
        "metadata": {"section": "contact"}
        })

In [23]:
for idx, edu in enumerate(resume_data["education"], start=1):
        edu_text = json.dumps(edu, indent=2)
        chunks.append({
            "id": f"education_{idx}",
            "text": edu_text,
            "metadata": {"section": "education"}
        })

In [24]:
chunks

[{'id': 'summary',
  'text': 'A dedicated Electronics and Telecommunication professional with a strong background in machine learning, web development, and cloud platforms. Proven ability to lead projects, solve complex problems, and continuously learn new technologies to drive innovation and efficiency.',
  'metadata': {'section': 'summary'}},
 {'id': 'contact',
  'text': '{\n  "name": "Aneesh Patne",\n  "email": "aneeshpatne12@gmail.com",\n  "linkedin": "https://www.linkedin.com/in/aneeshpatne",\n  "github": "https://github.com/aneeshpatne",\n  "leetcode": "https://leetcode.com/aneeshpatne"\n}',
  'metadata': {'section': 'contact'}},
 {'id': 'education_1',
  'text': '{\n  "degree": "M.Tech in Electronics and Telecommunication",\n  "institution": "Veermata Jijabai Technological Institute",\n  "location": "Mumbai, Maharashtra",\n  "duration": "2023 - 2025",\n  "details": "Specialized in Machine Learning and Signal Processing. Relevant coursework includes Advanced Algorithms, Neural Net

In [25]:
skills_text = json.dumps(resume_data["skills"], indent=2)
chunks.append({
    "id": "skills",
    "text": skills_text,
    "metadata": {"section": "skills"}
})

In [26]:
chunks

[{'id': 'summary',
  'text': 'A dedicated Electronics and Telecommunication professional with a strong background in machine learning, web development, and cloud platforms. Proven ability to lead projects, solve complex problems, and continuously learn new technologies to drive innovation and efficiency.',
  'metadata': {'section': 'summary'}},
 {'id': 'contact',
  'text': '{\n  "name": "Aneesh Patne",\n  "email": "aneeshpatne12@gmail.com",\n  "linkedin": "https://www.linkedin.com/in/aneeshpatne",\n  "github": "https://github.com/aneeshpatne",\n  "leetcode": "https://leetcode.com/aneeshpatne"\n}',
  'metadata': {'section': 'contact'}},
 {'id': 'education_1',
  'text': '{\n  "degree": "M.Tech in Electronics and Telecommunication",\n  "institution": "Veermata Jijabai Technological Institute",\n  "location": "Mumbai, Maharashtra",\n  "duration": "2023 - 2025",\n  "details": "Specialized in Machine Learning and Signal Processing. Relevant coursework includes Advanced Algorithms, Neural Net

In [28]:
for idx, project in enumerate(resume_data["projects"], start=1):
    project_text = json.dumps(project, indent=2)
    chunks.append({
        "id": f"project_{idx}",
        "text": project_text,
        "metadata": {"section": "project"}
    })

In [29]:
chunks

[{'id': 'summary',
  'text': 'A dedicated Electronics and Telecommunication professional with a strong background in machine learning, web development, and cloud platforms. Proven ability to lead projects, solve complex problems, and continuously learn new technologies to drive innovation and efficiency.',
  'metadata': {'section': 'summary'}},
 {'id': 'contact',
  'text': '{\n  "name": "Aneesh Patne",\n  "email": "aneeshpatne12@gmail.com",\n  "linkedin": "https://www.linkedin.com/in/aneeshpatne",\n  "github": "https://github.com/aneeshpatne",\n  "leetcode": "https://leetcode.com/aneeshpatne"\n}',
  'metadata': {'section': 'contact'}},
 {'id': 'education_1',
  'text': '{\n  "degree": "M.Tech in Electronics and Telecommunication",\n  "institution": "Veermata Jijabai Technological Institute",\n  "location": "Mumbai, Maharashtra",\n  "duration": "2023 - 2025",\n  "details": "Specialized in Machine Learning and Signal Processing. Relevant coursework includes Advanced Algorithms, Neural Net

In [30]:
for idx, cert in enumerate(resume_data["certifications"], start=1):
    cert_text = json.dumps(cert, indent=2)
    chunks.append({
        "id": f"cert_{idx}",
        "text": cert_text,
        "metadata": {"section": "certification"}
    })


In [31]:
vectors_to_upsert = []

In [32]:
for chunk in chunks:
    text = chunk["text"]
    chunk_id = chunk["id"]
    metadata = chunk["metadata"]

    # Embed the chunk
    embedding = model.encode(text).tolist()

    # Prepare the vector for upsert
    vectors_to_upsert.append((chunk_id, embedding, metadata))

In [36]:
embedding_dimension = len(embedding)


In [10]:
pc = Pinecone(os.getenv("PINECONE_KEY"))

In [12]:
index_name = "resume-index-full"

In [38]:
metric = "cosine"
if index_name not in pc.list_indexes().names():
    # Create the index
    pc.create_index(
        name=index_name,
        dimension=embedding_dimension,
        metric=metric,
        spec=ServerlessSpec(
            cloud="aws",  # Choose your cloud provider
            region="us-east-1"  # Choose your region
        )
    )
print(f"Index '{index_name}' is ready.")

Index 'resume-index-full' is ready.


In [13]:
index = pc.Index(index_name)

In [41]:
index.upsert(vectors=vectors_to_upsert)
print(f"Upserted {len(chunks)} chunks into the Pinecone index: '{index_name}'")

Upserted 10 chunks into the Pinecone index: 'resume-index-full'


In [17]:
def query_index(query_text: str, top_k: int = 3):
    q_embedding = model.encode(query_text).tolist()
    results = index.query(
        vector=q_embedding,
        top_k=top_k,
        include_metadata=True
    )
    print(f"\nQuery: {query_text}")
    print(f"Top {top_k} Chunks:\n" + "-"*50)
    for match in results.matches:
        print(f"Score: {match.score}")
        print("Section:", match.metadata.get("section", "N/A"))
        print("-" * 50)

In [16]:
user_query = "Does Aneesh Know React.JS ?"
query_index(user_query, top_k=3)


Query: Does Aneesh Know React.JS ?
Top 3 Chunks:
--------------------------------------------------
Score: 0.275810212
Value: None
Section: contact
--------------------------------------------------
Score: 0.244253948
Value: None
Section: skills
--------------------------------------------------
Score: 0.186942801
Value: None
Section: project
--------------------------------------------------
