# Installing LlamaParse

In [1]:
%pip install llama-parse

Collecting llama-parse
  Downloading llama_parse-0.6.2-py3-none-any.whl.metadata (6.9 kB)
Collecting llama-cloud-services>=0.6.2 (from llama-parse)
  Downloading llama_cloud_services-0.6.3-py3-none-any.whl.metadata (2.9 kB)
Collecting llama-cloud<0.2.0,>=0.1.11 (from llama-cloud-services>=0.6.2->llama-parse)
  Downloading llama_cloud-0.1.13-py3-none-any.whl.metadata (800 bytes)
Collecting llama-index-core>=0.11.0 (from llama-cloud-services>=0.6.2->llama-parse)
  Downloading llama_index_core-0.12.22-py3-none-any.whl.metadata (2.5 kB)
Collecting python-dotenv<2.0.0,>=1.0.1 (from llama-cloud-services>=0.6.2->llama-parse)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting dataclasses-json (from llama-index-core>=0.11.0->llama-cloud-services>=0.6.2->llama-parse)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core>=0.11.0->llama-cloud-services>=0.6.2->llama-parse)
  Downloading dirtyjson-

## Initializing LlamaParse

In [None]:
import asyncio
import nest_asyncio
from llama_parse import LlamaParse
import os

nest_asyncio.apply()

# Initialize LlamaParse
llama_parse = LlamaParse(api_key="llx-")

data_folder = "data"

### Testing single document using LlamaParse

In [4]:
pdf_files = [f for f in os.listdir(data_folder) if f.endswith(".pdf")]
if pdf_files:
    first_pdf = os.path.join(data_folder, pdf_files[0])
    print(f"Processing: {first_pdf}")

    # Extract text asynchronously using LlamaParse
    text = await llama_parse.aload_data(first_pdf)
else:
    print("No PDFs found in the data folder.")

print("Extracted Text:\n", text[0].text_resource.text)
resume = text[0].text_resource.text

Processing: data/10641230.pdf
Started parsing the file under job_id 8d5c6807-35e8-466d-a037-c254d35e5d0f
Extracted Text:
 IT MANAGEMENT
Career Overview
Detail-oriented professional with extensive Information Technology experience in hardware and software troubleshooting/management. With
additional experience in networking, task automation, server technologies, digital graphic design, client + server side programming technologies and
popular industry software packages including Microsoft Office and Adobe Creative Suite.
Qualifications
 Education and certificatesTroubleshooting q CompTIA A+ certified (2012) q Advanced troubleshooting q CompTIA Net+ certification class q
 Virus and spyware removal qMCSA certification class q Computer diagnostics and repair q Web Development I + II class q Gaming console
 repair and q Digital media manipulation class modification q Java programming class q Mobile device repair q Hardware support and
 Programming and Web troubleshooting q HTML - HTML5 q Opt

Create a folder to save parsed resumes

In [5]:
output_folder = "parsed"

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

Parse and extract from multiple pdfs

In [6]:
for pdf_file in pdf_files:
    pdf_path = os.path.join(data_folder, pdf_file)
    print(f"Processing: {pdf_path}")

    # Extract text asynchronously using LlamaParse
    extracted_text = await llama_parse.aload_data(pdf_path)

    # Define output text file path
    text_file_path = os.path.join(output_folder, f"{os.path.splitext(pdf_file)[0]}.txt")

    # Write extracted text to file
    with open(text_file_path, "w", encoding="utf-8") as text_file:
        text_file.write(extracted_text[0].text_resource.text)

    print(f"Saved extracted text to: {text_file_path}")


Processing: data/10641230.pdf
Started parsing the file under job_id 6016539b-f909-474d-af59-ae42f41f954e
Saved extracted text to: parsed/10641230.txt
Processing: data/11584809.pdf
Started parsing the file under job_id 1d6dc686-5a41-4daf-8c51-d996f3e75417
Saved extracted text to: parsed/11584809.txt
Processing: data/10839851.pdf
Started parsing the file under job_id 40d5a3c1-c1c6-4029-9187-0f59f303656a
Saved extracted text to: parsed/10839851.txt
Processing: data/11580408.pdf
Started parsing the file under job_id f79317eb-52a4-4add-9334-82251b16380d
Saved extracted text to: parsed/11580408.txt
Processing: data/10840430.pdf
Started parsing the file under job_id 0691c837-74c5-4ac8-a08a-d1c6199f3f91
Saved extracted text to: parsed/10840430.txt
Processing: data/12045067.pdf
Started parsing the file under job_id fc1cf6a8-8b6b-4f23-b21c-1a8459a488f9
.

CancelledError: 

In [7]:
job_role = """
INFORMATION TECHNOLOGY TECHNICIAN I

Summary:
Versatile Systems Administrator possessing superior troubleshooting skills for networking issues, end user problems, and network security.
Experienced in server management, systems analysis, and offering in-depth understanding of IT infrastructure areas.

Highlights:
- Active Directory, Office 365, Azure
- PowerShell, VBScript, Microsoft Exchange
- VMWare, Disaster Recovery, Storage Management
"""

In [8]:
import json
rubric_json = {
    "total_points": 50,
    "categories": {
        "technical_skills": {
            "max_points": 20,
            "criteria": {
                "matches_required_skills": 10,
                "matches_preferred_skills": 5,
                "demonstrates_relevant_projects": 5
            }
        },
        "experience": {
            "max_points": 15,
            "criteria": {
                "years_of_experience_meets_or_exceeds_requirement": 10,
                "relevant_industry_experience": 5
            }
        },
        "certifications_education": {
            "max_points": 5,
            "criteria": {
                "relevant_certifications": 3,
                "degree_in_related_field": 2
            }
        },
        "soft_skills_achievements": {
            "max_points": 10,
            "criteria": {
                "communication_teamwork_problem_solving": 5,
                "leadership_or_notable_contributions": 5
            }
        }
    }
}

In [9]:
system_prompt = f"""Your job is to rank a candidate for the job role.
{job_role}
Return as a JSON object."""
print(system_prompt)

Your job is to rank a candidate for the job role.

INFORMATION TECHNOLOGY TECHNICIAN I

Summary:
Versatile Systems Administrator possessing superior troubleshooting skills for networking issues, end user problems, and network security.
Experienced in server management, systems analysis, and offering in-depth understanding of IT infrastructure areas.

Highlights:
- Active Directory, Office 365, Azure
- PowerShell, VBScript, Microsoft Exchange
- VMWare, Disaster Recovery, Storage Management

Return as a JSON object.


In [10]:
user_message = f"""
resume:
{resume}
rubric:
{json.dumps(rubric_json, indent=2)}
Please ensure output is exactly matching json structure because the output will be directly used in code
"""
print(user_message)


resume:
IT MANAGEMENT
Career Overview
Detail-oriented professional with extensive Information Technology experience in hardware and software troubleshooting/management. With
additional experience in networking, task automation, server technologies, digital graphic design, client + server side programming technologies and
popular industry software packages including Microsoft Office and Adobe Creative Suite.
Qualifications
 Education and certificatesTroubleshooting q CompTIA A+ certified (2012) q Advanced troubleshooting q CompTIA Net+ certification class q
 Virus and spyware removal qMCSA certification class q Computer diagnostics and repair q Web Development I + II class q Gaming console
 repair and q Digital media manipulation class modification q Java programming class q Mobile device repair q Hardware support and
 Programming and Web troubleshooting q HTML - HTML5 q Optimizing and performance tuning q XML q Audio and video technologies q
 CSS - CSS3 q Medical technology installati

In [11]:
%pip install huggingface_hub



In [None]:
from huggingface_hub import InferenceClient

client = InferenceClient(api_key="hf_")

In [13]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_message}
]

completion = client.chat.completions.create(
    model="Qwen/Qwen2.5-72B-Instruct",
	messages=messages,
	temperature=0.5,
	max_tokens=2048,
	top_p=0.7
)

print(completion.choices[0].message.content)


```json
{
  "total_points": 42,
  "categories": {
    "technical_skills": {
      "max_points": 20,
      "criteria": {
        "matches_required_skills": 10,
        "matches_preferred_skills": 5,
        "demonstrates_relevant_projects": 5
      },
      "score": 18
    },
    "experience": {
      "max_points": 15,
      "criteria": {
        "years_of_experience_meets_or_exceeds_requirement": 10,
        "relevant_industry_experience": 5
      },
      "score": 12
    },
    "certifications_education": {
      "max_points": 5,
      "criteria": {
        "relevant_certifications": 3,
        "degree_in_related_field": 2
      },
      "score": 4
    },
    "soft_skills_achievements": {
      "max_points": 10,
      "criteria": {
        "communication_teamwork_problem_solving": 5,
        "leadership_or_notable_contributions": 5
      },
      "score": 8
    }
  }
}
```


In [14]:
response_text = completion.choices[0].message.content

In [15]:
import re
cleaned_text = re.sub(r'\\_', '_', response_text)

print(cleaned_text)

```json
{
  "total_points": 42,
  "categories": {
    "technical_skills": {
      "max_points": 20,
      "criteria": {
        "matches_required_skills": 10,
        "matches_preferred_skills": 5,
        "demonstrates_relevant_projects": 5
      },
      "score": 18
    },
    "experience": {
      "max_points": 15,
      "criteria": {
        "years_of_experience_meets_or_exceeds_requirement": 10,
        "relevant_industry_experience": 5
      },
      "score": 12
    },
    "certifications_education": {
      "max_points": 5,
      "criteria": {
        "relevant_certifications": 3,
        "degree_in_related_field": 2
      },
      "score": 4
    },
    "soft_skills_achievements": {
      "max_points": 10,
      "criteria": {
        "communication_teamwork_problem_solving": 5,
        "leadership_or_notable_contributions": 5
      },
      "score": 8
    }
  }
}
```


In [16]:
os.makedirs("scores", exist_ok=True)

for filename in os.listdir("parsed/"):
    if filename.endswith(".txt"):
        file_path = os.path.join("parsed/", filename)

        # Read resume content
        with open(file_path, "r", encoding="utf-8") as f:
            resume = f.read()

        # Construct user message
        user_message = f"""
        resume:
        {resume}
        rubric:
        {json.dumps(rubric_json, indent=2)}
        Please ensure output is exactly matching JSON structure because the output will be directly used in code
        """

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_message}
        ]

        # Get LLM completion
        completion = client.chat.completions.create(
            model="Qwen/Qwen2.5-72B-Instruct",
            messages=messages,
            temperature=0.5,
            max_tokens=2048,
            top_p=0.7
        )

        # Extract response and clean it
        response_text = completion.choices[0].message.content
        cleaned_text = re.sub(r'\\_', '_', response_text)
        cleaned_text = re.search(r"```json\n(.*?)\n```", cleaned_text, re.DOTALL)
        print(cleaned_text.group(1))
        # Save cleaned JSON output
        output_path = os.path.join("scores/", filename.replace(".txt", ".json"))
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(cleaned_text.group(1))

        print(f"Processed {filename} → Saved to {output_path}")

{
  "total_points": 44,
  "categories": {
    "technical_skills": {
      "points": 18,
      "details": {
        "matches_required_skills": 10,
        "matches_preferred_skills": 5,
        "demonstrates_relevant_projects": 3
      }
    },
    "experience": {
      "points": 15,
      "details": {
        "years_of_experience_meets_or_exceeds_requirement": 10,
        "relevant_industry_experience": 5
      }
    },
    "certifications_education": {
      "points": 4,
      "details": {
        "relevant_certifications": 3,
        "degree_in_related_field": 1
      }
    },
    "soft_skills_achievements": {
      "points": 7,
      "details": {
        "communication_teamwork_problem_solving": 5,
        "leadership_or_notable_contributions": 2
      }
    }
  }
}
Processed 11584809.txt → Saved to scores/11584809.json
{
  "total_points": 46,
  "categories": {
    "technical_skills": {
      "points": 18,
      "details": {
        "matches_required_skills": 10,
        "matches_pr