In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import StrOutputParser
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

# API_KEY = os.getenv("OPENAI_API_KEY")

True

In [3]:
loader = UnstructuredPDFLoader("../static/resume.pdf")

data = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
splits = text_splitter.split_documents(data)

In [6]:
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')

In [7]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
retriever = vectorstore.as_retriever()

In [8]:
job_description = """
Below are the skills needed for this project. Students with the following relevant skills and interest, regardless of major, are encouraged to apply! This is a team based multidisciplinary project. Students on the team are not expected to have experience in all areas, but should be willing to learn and will be asked to perform a breadth of tasks throughout the two semester project.
Advanced Data Science and Modeling Techniques
Specific Skills: Applied project experience with Large Language Models and other applied AI techniques OR advanced coursework

Likely Majors: DATA, STATS, MATH, CS


Data Science
Specific Skills: General skills in Data Science, good software development skills, and a willingness to quickly develop new technical skills as required for the project

EECS 281(or equivalent) is required

Likely Majors: DATA, CS


General Coding
Specific Skills: General Programming skills, good software engineering practice and design, and a willingness to quickly develop new technical skills as required for the project 

EECS 281 (or equivalent) is required

Likely Majors: CS, DATA, BBA/CS



Additional Desired Skills/Knowledge/Experience
Successful team-based project experience.  Excellent interpersonal skills
Project Management utilizing Agile/Scrum
Experience in business process analysis
Interest in and general knowledge of Commercial Banking
Practical experience implementing predictive analytics in a complex data environment
Ability and desire to independently learn new technology skills as necessary for the project
Experience implementing large language models, neural networks and self-supervised / semi-supervised learning models
"""

In [21]:
from langchain.prompts import ChatPromptTemplate
import PyPDF2

In [22]:
class ResumeParser:
    def __init__(self, path: str):
        print ("parsing resume...")
        with open(path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
            
            llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

            skills_prompt = ChatPromptTemplate.from_template("""
            Given the below resume, extract the candidate's skills, if any, verbatim. Return them in the following format (delimited by triple backticks):
            \"\"\"
            Skills:
            - skill 1 (verbatim from resume)
            - skill 2 (verbatim from resume)
            ...
            \"\"\"                                                                                    
            Resume: {resume}
            Answer:                                           
            """)

            experiences_prompt = ChatPromptTemplate.from_template("""
            Given the below resume, extract the candidate's experiences, if any, verbatim. Return them in the following format (delimited by triple backticks):
            \"\"\"
            Experiences:
            - experience 1 (date range)
                - bullet points (verbatim from resume)                                                      
            - experience 2 (date range)
                - bullet points (verbatim from resume)
            ...
            \"\"\"                                                                                    
            Resume: {resume}
            Answer:                                           
            """)

            education_prompt = ChatPromptTemplate.from_template("""
            Given the below resume, extract the candidate's education history, if any, verbatim. Return them in the following format (delimited by triple backticks):
            \"\"\"
            Education:
            - education 1 (verbatim from resume)
            - education 2 (verbatim from resume)
            ...
            \"\"\"                                                                                    
            Resume: {resume}
            Answer:                                           
            """)

            interests_prompt = ChatPromptTemplate.from_template("""
            Given the below resume, extract the candidate's interests, if any, verbatim. Return them in the following format (delimited by triple backticks):
            \"\"\"
            Interests:
            - interest 1 (verbatim from resume)
            - interest 2 (verbatim from resume)
            ...
            \"\"\"                                                                                    
            Resume: {resume}
            Answer:                                           
            """)

            prompts = [skills_prompt, experiences_prompt, education_prompt, interests_prompt]
            
            self.resume = "" 
            for prompt in prompts:
                rag_chain = (
                    {"resume": RunnablePassthrough()}
                    | prompt
                    | llm
                    | StrOutputParser()
                )
                try: 
                    self.resume += rag_chain.invoke(text)
                except:
                    pass



class JobDescriptionParser:
    def __init__(self, description: str):
        print ("parsing job description...")
        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
        
        essentials_prompt = ChatPromptTemplate.from_template("""
        Given the below job description and the candidates relevant skills and experiences, please list anything listed as, essential, required, or otherwise needed essentially for a candidate to apply. If any, please be verbatim when listing these requirements, otherwise return nothing.
        Return them in the following format (delimited by triple backticks):
        \"\"\"
        Essential Skills:
        - skill 1 (verbatim from resume)
        - skill 2 (verbatim from resume)
        ...
        \"\"\"                                                                                    
        Job Description: {job_description}. 
        Answer:                                           
        """)

        rag_chain = (
            {"job_description": RunnablePassthrough()}
            | essentials_prompt
            | llm
            | StrOutputParser()
        )

        essentials = rag_chain.invoke(description)

        desirables_prompt = ChatPromptTemplate.from_template("""
        Given the below job description and the candidates relevant skills and experiences, please list anything listed as beneficial or preffered, or otherwise non-essential but beneficial to have. The if the requirement is already satisfied, don't include it at all.
        Return them in the following format (delimited by triple backticks):
        \"\"\"
        Desirable Skills:
        - skill 1 (verbatim from resume)
        - skill 2 (verbatim from resume)
        ...
        \"\"\"                                              
        Job Description: {job_description}
        Satisfied: {context}
        Preferred Requirements:                                           
        """)

        rag_chain = (
            {"job_description": RunnablePassthrough(), "context": RunnablePassthrough()}
            | desirables_prompt
            | llm
            | StrOutputParser()
        )

        desirables = rag_chain.invoke(job_description=description, context=essentials)

        self.requirements = essentials + "\n" + desirables

        

In [23]:
resume = ResumeParser("../static/resume.pdf")
job_description = JobDescriptionParser(job_description)

parsing resume...
parsing job description...


TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>