In [1]:
!pip install langchain-google-genai langchain-together pypdf docx2txt





## Imports

In [2]:
import json
import os
from dotenv import load_dotenv
from langchain_together import ChatTogether
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.prompts import PromptTemplate

In [3]:
load_dotenv()

True

In [4]:
model = ChatGoogleGenerativeAI(model='gemini-1.5-flash',api_key=os.getenv('GOOGLE_API_KEY'))

In [5]:
# model.invoke('Summarize the bias-variance tradeoff.').content

In [6]:
PROMPT_TEMPLATE = """
You are an expert resume parser. Your task is to extract structured information from the resume text below.

Return the output as a **single valid JSON object** with the exact following schema:

{{
  "Name": "string",
  "Email": "string",
  "Phone": "string",
  "LinkedIn": "string",
  "Skills": ["string"],
  "Education": ["string"],
  "Experience": ["string"],
  "Projects": ["string"],
  "Certifications": ["string"],
  "Languages": ["string"]
}}

Rules:
- If a field cannot be found, set its value to "No idea".
- Do not add explanations, notes, or extra text — output JSON only.
- For lists (Skills, Education, Experience, Projects, Certifications, Languages), return an array of short strings.
- Keep the JSON compact and properly formatted.

Resume text:
{text}
"""


In [7]:
# prompt = PromptTemplate(
#     template=PROMPT_TEMPLATE,
#     input_variables=["text"])
# formatted_prompt = prompt.format(text="Jane Doe, ML Engineer skilled in TensorFlow, PyTorch...")


# Instantiation using from_template (recommended, automatically detect variables)
# build prompt
prompt = PromptTemplate.from_template(PROMPT_TEMPLATE)

In [8]:
# formatted_prompt = prompt.format(text="John Doe, Software Engineer with skills in Python, SQL...")

In [9]:
# print(formatted_prompt)

In [10]:
def load_resume(file_path):
    if file_path.lower().endswith(".pdf"):
        loader = PyPDFLoader(file_path)
    elif file_path.lower().endswith(".docx"):
        loader = Docx2txtLoader(file_path)
    elif file_path.lower().endswith(".txt"):
        loader = TextLoader(file_path)
    else:
        return None
    return loader.load()

In [18]:
extracted_text = load_resume("Color block resume.docx")

In [19]:
extracted_text

[Document(metadata={'source': 'Color block resume.docx'}, page_content='IAN\n\nHANSSON\n\n\n\n\n\nGraphic Designer\n\n\n\n\n\n\n\nUI/UX Engineer\n\n\n\n\n\n\n\nDeveloper\n\nPROFILE\n\n\n\n\n\nCONTACT\n\nEnthusiastic and creative graphic designer with a passion for translating ideas into visually compelling designs. With experience in both print and digital mediums, I thrive on bringing concepts to life through innovative and impactful designs.\n\n\n\n\n\n\n\n816-555-0146\n\n\n\n\n\n\n\n\n\nian_hansson\n\n\n\n\n\n\n\n\n\nhansson@example.com\n\n\n\n\n\n\n\n\n\nwww.example.com\n\nEXPERIENCE\n\n\n\n\n\nSKILLS\n\nAdatum Corporation\n\n20xx-present\n\nDeveloped and evolved brand identities, crafted compelling collateral, oversaw end-to-end project lifecycles, consistently met tight deadlines, contributed to award-winning projects, and mentored junior designers.\n\n\n\n\n\nDesign software\n\nTypography\n\nUI/UX design\n\nPrint design\n\nProject management\n\nCreative problem solving\n\nCommun

In [20]:
# [str(doc) for doc in extracted_text]

In [21]:
extracted_text = "\n\n".join([str(doc) for doc in extracted_text])

In [22]:
extracted_text

"page_content='IAN\n\nHANSSON\n\n\n\n\n\nGraphic Designer\n\n\n\n\n\n\n\nUI/UX Engineer\n\n\n\n\n\n\n\nDeveloper\n\nPROFILE\n\n\n\n\n\nCONTACT\n\nEnthusiastic and creative graphic designer with a passion for translating ideas into visually compelling designs. With experience in both print and digital mediums, I thrive on bringing concepts to life through innovative and impactful designs.\n\n\n\n\n\n\n\n816-555-0146\n\n\n\n\n\n\n\n\n\nian_hansson\n\n\n\n\n\n\n\n\n\nhansson@example.com\n\n\n\n\n\n\n\n\n\nwww.example.com\n\nEXPERIENCE\n\n\n\n\n\nSKILLS\n\nAdatum Corporation\n\n20xx-present\n\nDeveloped and evolved brand identities, crafted compelling collateral, oversaw end-to-end project lifecycles, consistently met tight deadlines, contributed to award-winning projects, and mentored junior designers.\n\n\n\n\n\nDesign software\n\nTypography\n\nUI/UX design\n\nPrint design\n\nProject management\n\nCreative problem solving\n\nCommunication skills\n\nProseware, Inc.\n\n20xx-20xx\n\nActivel

In [23]:
formated_text = prompt.format(text = extracted_text)
response = model.invoke(formated_text).content

In [24]:
try:
    structured_output = json.loads(response)
except json.JSONDecodeError:
    print("Model returned invalid JSON. Raw output:")
    print(response)
    structured_output = None

print(structured_output)

Model returned invalid JSON. Raw output:
```json
{
  "Name": "IAN HANSSON",
  "Email": "hansson@example.com",
  "Phone": "816-555-0146",
  "LinkedIn": "No idea",
  "Skills": ["Design software", "Typography", "UI/UX design", "Print design", "Project management", "Creative problem solving", "Communication skills"],
  "Education": ["Master of Fine Arts, Graphic Design, Graphic Design Institute", "Bachelor of Arts, Graphic Design, Jasper University"],
  "Experience": ["Adatum Corporation, 20xx-present", "Proseware, Inc., 20xx-20xx", "Relecloud, 20xx-20xx"],
  "Projects": ["No idea"],
  "Certifications": ["No idea"],
  "Languages": ["No idea"]
}
```
None
