In [4]:
# setting the environment variables
import sys
import os

sys.path.insert(0, os.path.abspath('..'))

from config import set_environment
set_environment()

In [15]:
from typing import Optional
import pydantic

class Config:
    arbitrary_types_allowed = True
    frozen = True

@pydantic.dataclasses.dataclass(config=Config)
class Experience(BaseModel):
    start_date: Optional[str]
    end_date: Optional[str]
    description: Optional[str]

@pydantic.dataclasses.dataclass(config=Config)
class Study(Experience):
    degree: Optional[str]
    university: Optional[str]
    country: Optional[str]
    grade: Optional[str]

@pydantic.dataclasses.dataclass(config=Config)
class WorkExperience(Experience):
    company: str
    job_title: str

@pydantic.dataclasses.dataclass(config=Config)
class Resume(BaseModel):
    first_name: str
    last_name: str
    linkedin_url: Optional[str]
    email_address: Optional[str]
    nationality: Optional[str]
    skill: Optional[str]
    study: Optional[Study]
    work_experience: Optional[WorkExperience]
    hobby: Optional[str]


In [None]:
import os
from langchain_community.document_loaders.pdf import PyPDFLoader

pdf_file_path = os.path.expanduser("~/Downloads/openresume-resume.pdf")
pdf_loader = PyPDFLoader(pdf_file_path)
docs = pdf_loader.load_and_split()

In [22]:
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

parser = PydanticOutputParser(pydantic_object=Resume)
prompt = PromptTemplate(
    template="Extract information from the provided document.\n{format_instructions}\n{document}\n",
    input_variables=["document"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
llm = ChatOpenAI(model_name="gpt-4-turbo-preview")
chain = prompt | llm | SimpleJsonOutputParser()  # or parser

chain.invoke({"document": docs})

{'first_name': 'John',
 'last_name': 'Doe',
 'linkedin_url': 'linkedin.com/in/john-doe',
 'email_address': 'hello@openresume.com',
 'nationality': None,
 'skill': 'HTML, TypeScript, CSS, React, Python, C++, Tech: React Hooks, GraphQL, Node.js, SQL, Postgres, NoSql, Redis, REST API, Git, Soft: Teamwork, Creative Problem Solving, Communication, Learning Mindset, Agile',
 'study': {'start_date': 'Sep 2019',
  'end_date': 'May 2023',
  'description': 'Won 1st place in 2022 Education Hackathon, 2nd place in 2023 Health Tech Competition\nTeaching Assistant for Programming for the Web (2022 - 2023)\nCoursework: Object-Oriented Programming (A+), Programming for the Web (A+), Cloud Computing (A), Introduction to Machine Learning (A-), Algorithms Analysis (A-)',
  'degree': 'Bachelor of Science in Computer Science',
  'university': 'XYZ University',
  'country': None,
  'grade': '3.8 GPA'},
 'work_experience': {'start_date': 'May 2023',
  'end_date': 'Present',
  'description': 'Lead a cross-fun