In [16]:
!pip install langchain-google-genai langchain-together pypdf docx2txt

Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Downloading docx2txt-0.9-py3-none-any.whl (4.0 kB)
Installing collected packages: docx2txt
Successfully installed docx2txt-0.9




## Imports

In [17]:
import json
import os
from dotenv import load_dotenv
from langchain_together import ChatTogether
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.prompts import PromptTemplate

In [3]:
load_dotenv()

True

In [4]:
model_google = ChatGoogleGenerativeAI(model='gemini-1.5-flash',api_key=os.getenv('GOOGLE_API_KEY'))

In [5]:
# model_google.invoke('Summarize the bias-variance tradeoff.').content

In [6]:
PROMPT_TEMPLATE = """
You are an expert resume parser. Your task is to extract structured information from the resume text below.

Return the output as a **single valid JSON object** with the exact following schema:

{{
  "Name": "string",
  "Email": "string",
  "Phone": "string",
  "LinkedIn": "string",
  "Skills": ["string"],
  "Education": ["string"],
  "Experience": ["string"],
  "Projects": ["string"],
  "Certifications": ["string"],
  "Languages": ["string"]
}}

Rules:
- If a field cannot be found, set its value to "No idea".
- Do not add explanations, notes, or extra text — output JSON only.
- For lists (Skills, Education, Experience, Projects, Certifications, Languages), return an array of short strings.
- Keep the JSON compact and properly formatted.

Resume text:
{text}
"""


In [7]:
# prompt = PromptTemplate(
#     template=PROMPT_TEMPLATE,
#     input_variables=["text"])
# formatted_prompt = prompt.format(text="Jane Doe, ML Engineer skilled in TensorFlow, PyTorch...")


# Instantiation using from_template (recommended, automatically detect variables)
# build prompt
prompt = PromptTemplate.from_template(PROMPT_TEMPLATE)

In [8]:
formatted_prompt = prompt.format(text="John Doe, Software Engineer with skills in Python, SQL...")

In [9]:
print(formatted_prompt)


You are an expert resume parser. Your task is to extract structured information from the resume text below.

Return the output as a **single valid JSON object** with the exact following schema:

{
  "Name": "string",
  "Email": "string",
  "Phone": "string",
  "LinkedIn": "string",
  "Skills": ["string"],
  "Education": ["string"],
  "Experience": ["string"],
  "Projects": ["string"],
  "Certifications": ["string"],
  "Languages": ["string"]
}

Rules:
- If a field cannot be found, set its value to "No idea".
- Do not add explanations, notes, or extra text — output JSON only.
- For lists (Skills, Education, Experience, Projects, Certifications, Languages), return an array of short strings.
- Keep the JSON compact and properly formatted.

Resume text:
John Doe, Software Engineer with skills in Python, SQL...



In [18]:
def load_resume(file_path):
    if file_path.lower().endswith(".pdf"):
        loader = PyPDFLoader(file_path)
    elif file_path.lower().endswith(".docx"):
        loader = Docx2txtLoader(file_path)
    elif file_path.lower().endswith(".txt"):
        loader = TextLoader(file_path)
    else:
        return None
    return loader.load()

In [32]:
extracted_text = load_resume("info resume.pdf")

In [33]:
extracted_text

[Document(metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2024-04-30T04:48:28-07:00', 'author': 'ismail - [2010]', 'moddate': '2024-04-30T04:48:28-07:00', 'source': 'info resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Name: John Doe \nAddress: 123 Main Street, Cityville, State, Zip Code \nPhone: (123) 456-7890 \nEmail: johndoe@email.com \n \nObjective: \nDedicated and results-oriented software engineer with a passion for developing innovative solutions. \nSeeking a challenging position in a dynamic company where I can utilize my technical skills to contribute \nto project success and personal growth. \n \nSummary of Qualifications: \n- Bachelor of Science in Computer Science from XYZ University \n- Proficient in multiple programming languages including Python, Java, and C++ \n- Extensive experience with web development frameworks such as Django and React \n- Strong understanding of software development metho

In [35]:
# [str(doc) for doc in extracted_text]

["page_content='Name: John Doe \nAddress: 123 Main Street, Cityville, State, Zip Code \nPhone: (123) 456-7890 \nEmail: johndoe@email.com \n \nObjective: \nDedicated and results-oriented software engineer with a passion for developing innovative solutions. \nSeeking a challenging position in a dynamic company where I can utilize my technical skills to contribute \nto project success and personal growth. \n \nSummary of Qualifications: \n- Bachelor of Science in Computer Science from XYZ University \n- Proficient in multiple programming languages including Python, Java, and C++ \n- Extensive experience with web development frameworks such as Django and React \n- Strong understanding of software development methodologies, including Agile and Scrum \n- Excellent problem-solving and analytical skills \n- Effective communication and teamwork abilities \n \nEducation: \n- Bachelor of Science in Computer Science \n  XYZ University, Cityville, State \n  GPA: 3.9/4.0 \n \nTechnical Skills: \n- P

In [37]:
extracted_text = "\n\n".join([str(doc) for doc in extracted_text])

In [40]:
extracted_text

"page_content='Name: John Doe \nAddress: 123 Main Street, Cityville, State, Zip Code \nPhone: (123) 456-7890 \nEmail: johndoe@email.com \n \nObjective: \nDedicated and results-oriented software engineer with a passion for developing innovative solutions. \nSeeking a challenging position in a dynamic company where I can utilize my technical skills to contribute \nto project success and personal growth. \n \nSummary of Qualifications: \n- Bachelor of Science in Computer Science from XYZ University \n- Proficient in multiple programming languages including Python, Java, and C++ \n- Extensive experience with web development frameworks such as Django and React \n- Strong understanding of software development methodologies, including Agile and Scrum \n- Excellent problem-solving and analytical skills \n- Effective communication and teamwork abilities \n \nEducation: \n- Bachelor of Science in Computer Science \n  XYZ University, Cityville, State \n  GPA: 3.9/4.0 \n \nTechnical Skills: \n- Pr

In [None]:
model_google.i