# Setup

## Setting up Output Format using Pydantic

In [1]:
from pydantic import BaseModel, Field
from typing import List, Optional
from datetime import date


class Experience(BaseModel):
    title: Optional[str] = Field(description="Job title")
    organization: Optional[str] = Field(description="Organization name")
    start_date: Optional[date] = Field(description="Start date")
    end_date: Optional[date] = Field(
        description="End date (Null if current job)")
    achievements: List[str] = Field(
        description="List of achievements (Leave empty if none provided))")
    responsibilities: List[str] = Field(
        description="List of responsibilities (Leave empty if none provided)))")


class Education(BaseModel):
    credential_name: Optional[str] = Field(description="Credential name")
    institution_name: Optional[str] = Field(description="Institution name")
    start_date: Optional[date] = Field(description="Start date")
    end_date: Optional[date] = Field(description="End date")
    description: str = Field(description="Education description")


class Experiences(BaseModel):
    experiences: List[Experience] = Field(description="List of experiences")


class EducationHistory(BaseModel):
    education_history: List[Education] = Field(
        description="List of education history")


class Skills(BaseModel):
    skills: List[str] = Field(
        description="List of skills (Programming Languages, Frameworks)")


class ProfileLinks(BaseModel):
    github_url: str = Field(description="Github Profile URL")
    linkedin_url: str = Field(description="Linkedin Profile URL")


class ParsedResume(Experiences, EducationHistory, ProfileLinks):
    pass


## Large-Language Model (LLM)

In [2]:
from langchain.llms import OpenAI

model_name = "text-davinci-003"
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

# Trying it out

## Taking resume input

In [3]:
with open('test_data/fictional-resume.md') as input_file:
    resume_content = input_file.read()

In [4]:
print(resume_content)

**Jonathon Bracken**

1234 Silicon Valley Rd., San Jose, CA 95126
(408) 123-4567 | jonathon.bracken@example.com
LinkedIn: linkedin.com/in/jonathon-bracken

**Objective**
A versatile Software Engineer with over 10 years of diverse experience in front-end, back-end, and Android firmware engineering roles. Eager to apply problem-solving abilities and technical expertise in a challenging new role.

---

**Skills**

- Programming languages: Java, JavaScript, Python, C++, C#, Kotlin, Swift, SQL
- Web Technologies: HTML, CSS, React, AngularJS, Node.js, Express.js, REST APIs
- Databases: MySQL, PostgreSQL, MongoDB, Redis
- Android Firmware: Android Open Source Project (AOSP), Linux Kernel, Custom ROM Development
- Tools: Git, Docker, Jenkins, Jira, Agile/Scrum methodologies
- Soft Skills: Communication, Leadership, Problem Solving, Teamwork, Adaptability

---

**Work Experience**

**Senior Software Engineer | HoloWare Inc.**
*San Jose, CA | February 2021 - Present*

- Lead the development team

## Iterate over Queries

In [5]:
from langchain.chat_models import ChatOpenAI

chat_llm = ChatOpenAI(model='gpt-3.5-turbo-16k')

In [6]:
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser
from langchain.prompts import PromptTemplate
from tqdm import tqdm
import logging

resume = dict()
models = (Experiences, EducationHistory, ProfileLinks, Skills)
progress_bar = tqdm(models)

for pydantic_model in progress_bar:
    progress_bar.set_description("Processing {}".format(pydantic_model.__name__))
    resume_output_parser = PydanticOutputParser(pydantic_object=pydantic_model)

    prompt = PromptTemplate(
        template="I am providing a JSON Schema followed by a resume in plain text. Format the plain text into the JSON format.\n{format_instructions}\n{resume}\n",
        input_variables=["resume"],
        partial_variables={"format_instructions": resume_output_parser.get_format_instructions()},
    )
    
    _input = prompt.format_prompt(resume=resume_content)
    
    output = model(_input.to_string())
    
    progress_bar.set_description("Fixing Output {}".format(pydantic_model.__name__))
    output_fixing_parser = OutputFixingParser.from_llm(parser=resume_output_parser, llm=chat_llm)
    
    resume.update(output_fixing_parser.parse(output).dict())

Fixing Output Skills: 100%|███████████████████████| 4/4 [00:49<00:00, 12.48s/it]


In [7]:
import json

json_formatted_str = json.dumps(resume, indent=2, default=str)
print(json_formatted_str)

{
  "experiences": [
    {
      "title": "Senior Software Engineer",
      "organization": "HoloWare Inc.",
      "start_date": "2020-02-01",
      "end_date": null,
      "achievements": [
        "Lead the development team of HoloLens, a holographic glasses project, developing firmware based on Android Open Source Project (AOSP).",
        "Enhanced system performance by optimizing Linux Kernel and effectively reduced boot-up time by 30%.",
        "Assisted in the development of several in-house tools to facilitate rapid firmware testing and deployment."
      ],
      "responsibilities": []
    },
    {
      "title": "Full Stack Developer",
      "organization": "Nebula Dynamics",
      "start_date": "2017-06-01",
      "end_date": "2020-02-01",
      "achievements": [
        "Developed RESTful APIs using Node.js and Express, which served over 1M requests per day with an average response time of 200ms.",
        "Created a scalable web application using React and Redux that impr