In [None]:
import os
import pandas as pd
import openai
from util import extract_text_from_resume
import json
import re
import textwrap
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from dotenv import load_dotenv, find_dotenv

# Load environment variables from .env file
_=load_dotenv(find_dotenv())

# Set OpenAI API key
openai.api_key = os.environ['OPENAI_API_KEY']

chat = ChatOpenAI(temperature=0.0)



In [None]:
name_schema=ResponseSchema(name="name",
description="Extracts name in JSON format. \
    The keys must be 'name'. Each corresponding value should be represented as a Python string.\
    If the name cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.")

email_schema=ResponseSchema(name="email",
description="Extracts email in JSON format. \
    The keys must be 'email'. Each corresponding value should be represented as a Python string.\
    If the email cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.")


phone_number_schema=ResponseSchema(name="phone number",
description="Extracts phone number in JSON format. \
    The keys must be 'phone number'.Each corresponding value should be represented as a Python string. \
    If the phone number cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.")


education_schema=ResponseSchema(name="education",
description="Extract information about the individual's educational background in JSON format.\
      Each educational experience should be represented as a separate JSON object. \
    For each education instance, the keys must be 'institution', 'degree_type', 'major', and 'graduation_date'. \
    Each corresponding value should be represented as a Python string.\
    If any information cannot be found for a given key, ensure the key is still included in the JSON object, but assign its corresponding value as null.")


work_experience_schema=ResponseSchema(name="work experiences",
description="Follow steps below to extract work experiences: \
1. Begin by extracting details about each distinct job role from the work experience section.\
2. For every distinct job role, even if it is within the same company, create a separate JSON object.\
3. Each JSON object must include the following keys: 'job_title', 'employer', and 'employment_duration'.\
4. For the keys 'job_title', 'employer', and 'employment_duration', represent the corresponding values as Python strings.\
      For instance, the 'job_title' for a specific role might look like: 'Software Engineer'.\
5. If there is any key for which you cannot find the corresponding information, ensure that this key is still included in the JSON object. \
    If no details are found for the keys, assign their value as an empty Python string, for instance 'job_title': ''.\
6. Repeat these steps for each distinct job role identified in the work experience section.")


project_schema=ResponseSchema(name="projects",
description="Follow the steps below to extract project details:\
1. Start by identifying and extracting details for each distinct project, the employer, the job title, and the technical skills utilized in each project.\
2. For each distinct project, create a separate JSON object.\
3. The JSON object for each project must include the following keys: 'project_name', 'employer', 'job_title', and 'technical_skills'.\
4. Represent the corresponding values for each key as Python lists or strings. \
Each individual item within the 'technical_skills' list should be separated by commas.\
For instance, a list of technical skills for a specific project might appear as: ['JavaScript', 'React', 'Firebase'].\
5. If you cannot find the information corresponding to any of the keys, ensure that this key is still included in the JSON object.\
However, in such cases, assign its value as an empty Python list or empty string.\
For example, if no technical skills are associated with a particular project, you should include: 'technical_skills': [] in the JSON object.\
Similarly, if no employer or job title is found related to the project, you should include: 'employer': '', 'job_title': '' in the JSON object.\
6. Repeat these steps for each distinct project identified.")

response_schemas1=[name_schema, email_schema, phone_number_schema, education_schema, work_experience_schema]

response_schemas2=[project_schema]

output_parser1 = StructuredOutputParser.from_response_schemas(response_schemas1)
format_instructions1 = output_parser1.get_format_instructions()

output_parser2 = StructuredOutputParser.from_response_schemas(response_schemas2)
format_instructions2 = output_parser2.get_format_instructions()



In [None]:
output_template1="""\
For the following text, extract the following information:

Extracts name in JSON format. \
    The keys must be 'name'. Each corresponding value should be represented as a Python string.\
    If the name cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.

Extracts email in JSON format. \
    The keys must be 'email'. Each corresponding value should be represented as a Python string.\
    If the email cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.
            
Extracts phone number in JSON format. \
    The keys must be 'phone number'.Each corresponding value should be represented as a Python string. \
    If the phone number cannot be found, the key should still be included in the JSON object, but its corresponding value should be null.

Extract information about the individual's educational background in JSON format.\
      Each educational experience should be represented as a separate JSON object. \
    For each education instance, the keys must be 'institution', 'degree_type', 'major', and 'graduation_date'. \
    Each corresponding value should be represented as a Python string.\
    If any information cannot be found for a given key, ensure the key is still included in the JSON object, but assign its corresponding value as null.

Follow steps below to extract work experiences: 
1. Begin by extracting details about each distinct job role from the work experience section.\
2. For every distinct job role, even if it is within the same company, create a separate JSON object.\
3. Each JSON object must include the following keys: 'job_title', 'employer', and 'employment_duration'.\
4. For the keys 'job_title', 'employer', and 'employment_duration', represent the corresponding values as Python strings.\
      For instance, the 'job_title' for a specific role might look like: 'Software Engineer'.\
5. If there is any key for which you cannot find the corresponding information, ensure that this key is still included in the JSON object. \
    If no details are found for the keys, assign their value as an empty Python string, for instance 'job_title': ''.\
6. Repeat these steps for each distinct job role identified in the work experience section.


text: {text}

{format_instructions1} """


output_template2="""\
For the following text, extract the following information:

Follow the steps below to extract project details:
1. Start by identifying and extracting details for each distinct project, the employer, the job title, and the technical skills utilized in each project.
2. For each distinct project, create a separate JSON object.
3. The JSON object for each project must include the following keys: 'project_name', 'employer', 'job_title', and 'technical_skills'.
4. Represent the corresponding values for each key as Python lists or strings. \
Each individual item within the 'technical_skills' list should be separated by commas.\
For instance, a list of technical skills for a specific project might appear as: ['JavaScript', 'React', 'Firebase'].
5. If you cannot find the information corresponding to any of the keys, ensure that this key is still included in the JSON object.\
However, in such cases, assign its value as an empty Python list or empty string.\
For example, if no technical skills are associated with a particular project, you should include: 'technical_skills': [] in the JSON object.\
Similarly, if no employer or job title is found related to the project, you should include: 'employer': '', 'job_title': '' in the JSON object.
6. Repeat these steps for each distinct project identified.
        
text: {text}

{format_instructions2} """



In [None]:
import os
import json

resumes_dir = "/Users/yunjaewon/JohnResume/resume-rater/resume_data/"
output_dir = "/Users/yunjaewon/JohnResume/resume-rater/extracted_data/"

# List of all files in the directory
resumes_files = os.listdir(resumes_dir)

# Go through every file
for filename in resumes_files:
    # Construct the full file path
    file_path = os.path.join(resumes_dir, filename)

    # Make sure it's a file and not a directory, and it's a .txt file (or whichever format your resumes are in)
    if os.path.isfile(file_path) and filename.endswith(".txt"):
        try:
            with open(file_path, 'r') as file:
                resume = file.read()
                # Process the content here
                # ...

                prompt1 = ChatPromptTemplate.from_template(template=output_template1)
                messages1 = prompt1.format_messages(text=resume, format_instructions1=format_instructions1)
                response1 = chat(messages1, temperature=0.0)
                output_dict1 = output_parser1.parse(response1.content)

                prompt2 = ChatPromptTemplate.from_template(template=output_template2)
                messages2 = prompt2.format_messages(text=resume, format_instructions2=format_instructions2)
                response2 = chat(messages2, temperature=0.0)
                output_dict2 = output_parser2.parse(response2.content)

                

                # Combine the outputs into one dictionary
                output_dict = output_dict1 | output_dict2

                # Construct the output file path, preserving the original file name but changing the directory and extension
                output_file_path = os.path.join(output_dir, os.path.splitext(filename)[0] + '.txt')

                # Write the output to a file in the output directory
                with open(output_file_path, 'w') as output_file:
                    json.dump(output_dict, output_file)

        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Now you have a .txt file in the output directory for each resume, containing the processed information


In [None]:
extracted_files=os.listdir(output_dir)

In [None]:
resumes_files = [f for f in os.listdir(resumes_dir) if f != '.DS_Store']

sorted(extracted_files)==sorted(resumes_files)

In [None]:
import os
import openai
from util import extract_text_from_resume
import json
import re
import textwrap
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from dotenv import load_dotenv, find_dotenv

# Load environment variables from .env file
_=load_dotenv(find_dotenv())

# Set OpenAI API key
openai.api_key = os.environ['OPENAI_API_KEY']

chat = ChatOpenAI(temperature=0.0)

output_template1="""\
Given the text provided, please perform the following actions:

Closely examine the text for information related to work experience. For each employment instance, strive to extract the following details:

    - 'Job Title': The role or position held by the individual.
    - 'Employer': The organization or company where the individual was employed.
    - 'Project': Specific tasks, responsibilities, or initiatives that the individual was a part of or led during their employment. 
    - 'Skills Used': List any particular abilities, knowledge, or competencies that the individual employed during their projects or job tasks.
    - 'Employment Tenure Assessment': Please calculate the length of employment tenure, derived from the given start and end dates. \
        Your result should specify the duration in terms of years and months. For example, '3 years and 4 months'.

If the individual held multiple positions within the same company, ensure that details for each position are separately presented.

Text for Analysis: {text}
"""


output_template2="""\
Given the provided text, please carry out the following tasks:

1. Scan the entirety of the text, including but not limited to the 'Skills' section, to identify and extract any explicitly mentioned technical skills. This may include specific software proficiencies, technical abilities, certifications, or other specialised competencies.

2. Additionally, examine the sections related to work experiences. For each employment instance, identify and extract any technical skills that may be implied or directly stated in the description of job roles, responsibilities, or projects. 

3. Present these skills in a consolidated and organised manner, clearly differentiating between skills explicitly stated and those inferred from work experiences.

Text for Analysis: {text} """

import os
import json

resumes_dir = "/Users/yunjaewon/JohnResume/resume-rater/resume_data/"
skill_output_dir = "/Users/yunjaewon/JohnResume/resume-rater/skill_data/"
work_output_dir="/Users/yunjaewon/JohnResume/resume-rater/work_data/"

# List of all files in the directory
resumes_files = os.listdir(resumes_dir)[:2]

# Go through every file
for filename in resumes_files:
    # Construct the full file path
    file_path = os.path.join(resumes_dir, filename)

    # Make sure it's a file and not a directory, and it's a .txt file (or whichever format your resumes are in)
    if os.path.isfile(file_path) and filename.endswith(".txt"):
        try:
            with open(file_path, 'r') as file:
                resume = file.read()
                # Process the content here
                # ...

                prompt1 = ChatPromptTemplate.from_template(template=output_template1)
                messages1 = prompt1.format_messages(text=resume)
                response1 = chat(messages1, temperature=0.0)
                

                prompt2 = ChatPromptTemplate.from_template(template=output_template2)
                messages2 = prompt2.format_messages(text=resume)
                response2 = chat(messages2, temperature=0.0)
                

                

                # Construct the output file path, preserving the original file name but changing the directory and extension
                #work
                output_file_path1 = os.path.join(work_output_dir, os.path.splitext(filename)[0] + '_work.txt')
                #skill
                output_file_path2 = os.path.join(skill_output_dir, os.path.splitext(filename)[0] + '_skills.txt')
                # Write the output to a file in the output directory
                with open(output_file_path1, 'w') as output_file:
                    json.dump(response1.content, output_file)
                with open(output_file_path2, 'w') as output_file:
                    json.dump(response2.content, output_file)
        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Now you have a .txt file in the output directory for each resume, containing the processed information


In [None]:
print(response1.content)

In [None]:
print(response2.content)