In [5]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
import os
from dotenv import load_dotenv

load_dotenv()
endpoint = os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT")
key = os.getenv("DOCUMENT_INTELLIGENCE_KEY")
client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))
model_id = "prebuilt-read"
pdf_path = r"D:\Python\ResuMetrics\data\resumes\10030015.pdf"

with open(pdf_path, "rb") as f:
    poller = client.begin_analyze_document(model_id, f)
result = poller.result()

resume_text = ""
for page in result.pages:
    for line in page.lines:
        resume_text += line.content + "\n"
print(resume_text)

ENGINEERING LAB TECHNICIAN
Career Focus
My main objective in seeking employment with Triumph Actuation Systems Inc. is to work in a professional atmosphere where I can utilize my
skills and continue to gain experience in the aerospace industry to advance in my career.
Professional Experience
Engineering Lab Technician Oct 2016 to Current
Company Name 11/4 City, State
· Responsible for testing various seat structures to meet specific certification requirements. Â
. Maintain and calibrate test instruments to ensure testing capabilities are maintained.
. Ensure data is captured and recorded correctly for certification test reports.
· Duties also dynamic test set-up and static suite testing.
Engineering Lab Technician, Sr. Specialist Apr 2012 to Oct 2016
Company Name 11/4 City , State
. Utilized skills learned from Lab View Course 1 training to construct and maintain Lab View VI programs.
· Responsible for fabricating and maintaining hydraulic/electrical test equipment to complete developm

In [2]:
from openai import AzureOpenAI
import os
from dotenv import load_dotenv

load_dotenv()
client = AzureOpenAI(
    azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
    api_key=os.getenv("OPENAI_KEY"),
    api_version="2024-02-01"
)

text = "Software Engineer with 5 years of experience in Python and Azure."
response = client.embeddings.create(input=text, model="text-embedding-ada-002")
embedding = response.data[0].embedding
print(len(embedding), embedding[:10])  # Should be 1536-dimensional

1536 [-0.010151837021112442, -0.01857059635221958, 0.010838402435183525, -0.022716650739312172, 0.0009323625708930194, 0.018064003437757492, -0.025582896545529366, -0.012264858931303024, -0.011791595257818699, -0.007732193451374769]


In [None]:
import os
import glob
import pandas as pd
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorQuery
from azure.ai.documentintelligence import DocumentIntelligenceClient
from openai import AzureOpenAI
from dotenv import load_dotenv

load_dotenv()
openai_client = AzureOpenAI(
    azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
    api_key=os.getenv("OPENAI_KEY"),
    api_version="2024-02-01"
)
doc_intel_client = DocumentIntelligenceClient(
    endpoint=os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT"),
    credential=AzureKeyCredential(os.getenv("DOCUMENT_INTELLIGENCE_KEY"))
)
service_name = os.getenv("SEARCH_SERVICE_NAME")
index_name = "resume-index"
api_key = os.getenv("SEARCH_KEY")
endpoint = f"https://{service_name}.search.windows.net"
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=AzureKeyCredential(api_key))

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as f:
        poller = doc_intel_client.begin_analyze_document("prebuilt-read", f)
        result = poller.result()
    text = ""
    for page in result.pages:
        for line in page.lines:
            text += line.content + "\n"
    return text

resume_dir = "data/resumes/"
documents = []
for idx, pdf_path in enumerate(glob.glob(os.path.join(resume_dir, "*.pdf"))):
    resume_text = extract_text_from_pdf(pdf_path)
    resume_embedding = openai_client.embeddings.create(
        input=resume_text, model="text-embedding-ada-002"
    ).data[0].embedding
    document = {
        "id": f"resume_{idx+1}",
        "content": resume_text,
        "embedding": resume_embedding
    }
    documents.append(document)
if documents:
    search_client.upload_documents(documents)
    print(f"Uploaded {len(documents)} resumes to resume-index.")
else:
    print("No resumes found in data/resumes/.")

job_desc_csv = "data/job_descriptions/job_descriptions.csv"
job_df = pd.read_csv(job_desc_csv)
print("Available job descriptions:")
for idx, row in job_df.iterrows():
    print(f"Index {idx}: {row['description'][:50]}...")

job_index = 26  # Example: Select first job description
# job_index = 1  # For second job description
# job_index = 2  # For third job description
job_description = job_df.iloc[job_index]["description"]
print(f"Selected job description: {job_description}")

job_embedding = openai_client.embeddings.create(
    input=job_description, model="text-embedding-ada-002"
).data[0].embedding
vector_query = VectorQuery(
    vector=job_embedding,
    k_nearest_neighbors=10,
    fields="embedding"
)
results = search_client.search(
    search_text=None,
    vector_queries=[vector_query],
    select=["id", "content"],
    top=10
)

best_match = None
highest_score = -1
print("\nSearch Results:")
for result in results:
    score = result["@search.score"]
    print(f"Resume ID: {result['id']}, Content: {result['content'][:100]}..., Score: {score}")
    if score > highest_score:
        highest_score = score
        best_match = result

if best_match:
    print(f"\nBest Match: Resume ID: {best_match['id']}, Content: {best_match['content'][:100]}..., Score: {highest_score}")
else:
    print("No matches found.")

TypeError: Session.request() got an unexpected keyword argument 'vector'