In [42]:
import os
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv

load_dotenv()

oapi_key=os.getenv("OPENAI_API_KEY")
api_key=os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=api_key)

In [43]:
job_description = "Job Title: TypeScript Developer. Overview: We are looking for a TypeScript Developer to join our team. You will work on building and maintaining web applications, collaborating with other developers to deliver high-quality software. This is a great opportunity to work in a dynamic environment and grow your skills. Responsibilities: Develop and maintain applications using TypeScript. Build and maintain APIs. Troubleshoot and fix software bugs. Collaborate with team members to deliver projects. Participate in code reviews and testing. Qualifications: Experience with TypeScript and JavaScript. Familiarity with modern front-end frameworks like React, Angular, or Vue. Understanding of RESTful APIs. Experience with version control (Git). Good problem-solving skills. Nice-to-Have: Experience with Node.js or backend development. Familiarity with cloud services (AWS, Azure). Experience with testing tools like Jest. How to Apply: Submit your application through our website or via [Application Link]. Only selected candidates will be contacted."

In [44]:
index_name = "applicants-resume"

if not pc.has_index(index_name):
    print("creating new index")

    openai_embedding_small_3_dimentions = 1536

    pc.create_index(
        name=index_name,
        dimension=openai_embedding_small_3_dimentions,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws", 
            region="us-east-1"
        ) 
    ) 

In [45]:
import time

while not pc.describe_index(index_name).status["ready"]:
    time.sleep(1)

index = pc.Index(index_name)


In [46]:
from langchain_community.document_loaders import PyPDFLoader


namespace = "resume1"
data_applicant = (
    {
        "name": "sarah",
        "file_path": "./dummy-cv/sarah-highly_accurate.pdf",
        "gender": "female",
    },
    {
        "name": "michael",
        "file_path": "./dummy-cv/michael-highly_accurate.pdf",
        "gender": "male",
    },
    {
        "name": "emily",
        "file_path": "./dummy-cv/emily-somewhat-relevant.pdf",
        "gender": "female",
    },
    {
        "name": "david",
        "file_path": "./dummy-cv/david-less-relevant.pdf",
        "gender": "male",
    },
    {
        "name": "anna",
        "file_path": "./dummy-cv/anna-irelevant.pdf",
        "gender": "female"
    },
    {
        "name": "fathur",
        "file_path": "./dummy-cv/sample-cv.pdf",
        "gender": "male"
    }
)





In [47]:
from langchain_pinecone import PineconeVectorStore, PineconeEmbeddings
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=oapi_key,
)

In [50]:
from langchain_core.documents import Document

for x in data_applicant:
    loader = PyPDFLoader(x["file_path"], extract_images=True)
    pages = loader.load()

    doc: list[Document]= []

    for y in pages:
        doc.append(
            Document(y.page_content, metadata={ "gender": x["gender"], "name": x["name"]})
        )

    docsearch = PineconeVectorStore.from_documents(
        embedding=embedding,
        index_name=index_name,
        documents=doc,
        namespace=namespace
    )

    print(docsearch)

[Document(metadata={'gender': 'female', 'name': 'sarah'}, page_content='1. Sarah Johnson (Senior TypeScript Developer)\nSummary: Sarah has over 6 years of experience as a full-stack developer, specializing in TypeScript. She has led teams in building enterprise-\nlevel applications, focusing on clean, maintainable code and scalable architectures. She has worked closely with DevOps teams for continuous \nintegration and deployment, ensuring smooth workflows for complex projects.\n \nExperience:\n \nLead TypeScript Developer at XYZ Corp (4 years): Led a team of 10 developers in building a cloud-based application for financial data \nmanagement using TypeScript, React, and Node.js. Developed scalable microservices and integrated third-party APIs for real-time data \nupdates. Worked with Agile methodologies to ensure regular sprint reviews and code quality.\nFull-Stack Developer at ABC Tech (2 years): Built and maintained a highly interactive e-commerce platform using TypeScript and Angula

In [53]:

vectors = embedding.embed_query(job_description)

qr = index.query(
    namespace=namespace, 
    top_k=5,
    # filter={
    #     "gender": {"$eq": "male"},
    # },
    vector=vectors,
    include_values=False,
    include_metadata=True
)

qr

{'matches': [{'id': 'f6bc7eb9-fe24-41b2-a4e0-cf1fc92a0ab8',
              'metadata': {'gender': 'male',
                           'name': 'michael',
                           'text': 'Michael Lee (Mid-Level TypeScript '
                                   'Engineer)\n'
                                   'Summary: Michael is a mid-level engineer '
                                   'with 5 years of experience focusing on '
                                   'TypeScript and JavaScript development. He '
                                   'has contributed \n'
                                   'to multiple large-scale applications and '
                                   'is proficient in both front-end and '
                                   'back-end technologies, with experience in '
                                   'integrating various APIs \n'
                                   'and deploying services to cloud '
                                   'environments.\n'
               