In [1]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
import pandas as pd
import json

In [2]:
openai_api_key = 'INSERT YOUR API KEY HERE'

In [3]:
# Temp = 0 so that we get clean information without a lot of creativity
chat_model = ChatOpenAI(temperature=0, openai_api_key=openai_api_key, max_tokens=1000)

In [4]:
# How you would like your response structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(name="opportunity_skills",
    description="These are the hard and soft skills you extract from the job_description"),
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [5]:
# See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print (output_parser.get_format_instructions())

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"opportunity_skills": string  // These are the hard and soft skills you extract from the job_description
}
```


In [6]:
template = """
You will be given a job description for the role of {opportunity_title}.
Your job is to extract soft and hard skills from the job description.

{format_instructions}

Wrap your final output with closed and open brackets (a list of json objects)

input_opportunity_description INPUT:
{opportunity_description}

YOUR RESPONSE:
"""

prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(template)  
    ],
    input_variables=["opportunity_title", "opportunity_description"],
    partial_variables={"format_instructions": format_instructions}
)

### Load in the data

In [7]:
# Load the data
applications = pd.read_pickle("../../Data/split_4_enriched.pkl")

In [8]:
opportunity_id_sample = ['+5os8/vB/EGBEahyGxRMjA==',
'+0cRKl9xvkGTQq4D8IwnAQ==',
'+3lu6kPHC06xU79BvlwHew==',
'zu77vbq9g0ed54tWutsAig==',
'zsFZ88cMtUKMWf2kIKdncQ==',
'Pw9q3TmWDE6up1oUsv/OTA==',
'pJtXynDm6kqbyczn0dXkDg==',
'VPpkoLNRtUCOxpYUssltxg==',
'7uXMjQV9r061ap/Lc91I1Q==',
'Im/cVgG09Uesn1fjkbt5TQ==',]

In [9]:
# locate applications where the opportunity_id is in the sample
applications_sample = applications[applications.opportunity_id.isin(opportunity_id_sample)]
applications_sample

Unnamed: 0,opportunity_id,application_id,opportunity_brief_description,opportunity_description,opportunity_title,application_pass_first_step,application_step_category,application_job_titles,application_job_responsibilities,application_education,...,application_skills,jaccard_similarity,intersected_skills,intersected_skills_count,application_reported_skills_list_length,opportunity_required_skills_list_length,brief_description_skills_list_length,application_job_responsibilities_extracted_skills_list_length,application_skills_list_length,intersected_skills_list_length
580,Pw9q3TmWDE6up1oUsv/OTA==,jiJ1JAa/AkWkg853IcV99A==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,detention officer order builder administrative...,managed daily administrative operation departm...,High School Diploma++Mandatory courses++New Ha...,...,"[operations, increase sales, legal documents, ...",0.009709,[operation],1,21,61,5,22,43,1
688,Pw9q3TmWDE6up1oUsv/OTA==,DMMShh6ksEeTDMTqbqS5Nw==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,construction superintendent part manager,osha 10 30 hour certification 15 year general ...,High school diploma,...,"[concrete, construction management, drainage, ...",0.000000,[],0,14,61,5,8,22,0
1047,Pw9q3TmWDE6up1oUsv/OTA==,UUO4CGKds0SR5OdxnONVag==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,True,0,laborer,currently floor maintenance,12/diplona,...,"[, floor]",0.000000,[],0,1,61,5,1,2,0
1480,7uXMjQV9r061ap/Lc91I1Q==,i2GfHI8PiE2iSfMDmjNh3Q==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,commercial sale representative hunter needed,True,0,president bdm sale manager area sale manager t...,design develop implement national sale marketi...,Bachelor of Arts,...,"[sales, sales manager, marketing, territory, t...",0.029940,"[industry, sale, access, plan, source]",5,45,83,10,44,89,5
1901,Pw9q3TmWDE6up1oUsv/OTA==,wuHbCd1jAkSVhHG44qUcBg==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,threat reduction specialist,completed 4 year contract united state marine ...,High School,...,"[, united state]",0.000000,[],0,1,61,5,1,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22014,+3lu6kPHC06xU79BvlwHew==,o9zE84ts7EeTJ16L/Et5AQ==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,quality control technician laboratory technici...,perform release product product hold analysis ...,Bachelor's Degree in science,...,"[laboratory, testing, documentation, healthcar...",0.020408,"[receive, quality assurance, received, level]",4,48,64,8,88,136,4
22015,+3lu6kPHC06xU79BvlwHew==,VinkkPxps0ulKfwNA9qOeQ==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,manager,accounting scheduling insurance referral insur...,Diploma,...,"[computer, acknowledging others, scheduling, i...",0.013158,[customer service],1,9,64,8,4,13,1
22016,+3lu6kPHC06xU79BvlwHew==,PN7h+ikMHEKaJ/lKkTOYjg==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,fraud detection coordinator assistant manager ...,manage outbound customer call investigate dete...,Bachelors++Associates,...,"[microsoft office, interpersonal skills, multi...",0.030928,"[customer service, data entry, account]",3,10,64,8,26,36,3
22017,+3lu6kPHC06xU79BvlwHew==,58O63nAFEkSn6PEFm63C4g==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,compliance service analyst,perform data validation various client project...,High School,...,[extensive experience in the customer support ...,0.000000,[],0,1,64,8,7,8,0


In [10]:
aggregated_apps = pd.DataFrame(applications_sample.groupby("opportunity_id").agg(
    {"opportunity_description":"first", "opportunity_title":"first",  "application_job_responsibilities":list,})).reset_index()
aggregated_apps

Unnamed: 0,opportunity_id,opportunity_description,opportunity_title,application_job_responsibilities
0,+0cRKl9xvkGTQq4D8IwnAQ==,first year earnings opportunity 40 000 50 000 ...,service technician train,[removal stump removal tree chip brush clear d...
1,+3lu6kPHC06xU79BvlwHew==,orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...
2,+5os8/vB/EGBEahyGxRMjA==,orkin purpose help protect world live work pla...,outside sale professional,[national geographic snorkeler water sport equ...
3,7uXMjQV9r061ap/Lc91I1Q==,orkin purpose help protect world live work pla...,commercial sale representative hunter needed,[design develop implement national sale market...
4,Im/cVgG09Uesn1fjkbt5TQ==,orkin purpose help protect world live work pla...,outside sale professional,[analyst business coaching consulting www righ...
5,Pw9q3TmWDE6up1oUsv/OTA==,orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...
6,VPpkoLNRtUCOxpYUssltxg==,19 00 per hour orkin purpose help protect worl...,service technician termite train,[moved within company course several year orig...
7,pJtXynDm6kqbyczn0dXkDg==,orkin purpose help protect world live work pla...,pest control technician,"[mix ink ship company use make drink can, resi..."
8,zsFZ88cMtUKMWf2kIKdncQ==,15 00 per hour customer service specialist ful...,customer service specialist,[assist dealer support question incentive ship...
9,zu77vbq9g0ed54tWutsAig==,homepro subsidiary rollins inc nyse rol headqu...,service technician train,[supervised crew maintain lawn prune bush spra...


In [11]:
opportunity_titles = [aggregated_apps.iloc[n]["opportunity_title"] for n in range(len(aggregated_apps))]
opportunity_descriptions = [aggregated_apps.iloc[n]["opportunity_description"] for n in range(len(aggregated_apps))]

print(opportunity_titles)
print(opportunity_descriptions)


['service technician train', 'customer relation specialist', 'outside sale professional', 'commercial sale representative hunter needed', 'outside sale professional', 'termite technician', 'service technician termite train', 'pest control technician', 'customer service specialist', 'service technician train']
['first year earnings opportunity 40 000 50 000 orkin purpose help protect world live work play orkin pest management industry leader offer extensive training service technician deliver valuable service customer every day interested chance expand knowledge grow career well financial opportunity check position turn much successful candidate complete award winning company paid training learn skill required responsible daily operation pest control service route follow route schedule within assigned territory ensuring timely safe arrival customer operate company vehicle safely ensure cleanliness vehicle equipment communicate customer ensure satisfaction discus additional service meet 

In [12]:
outputs = []

for n in range(0, len(aggregated_apps)):
    opportunity_title = opportunity_titles[n]
    opportunity_description = opportunity_descriptions[n]
    _input = prompt.format_prompt(opportunity_title=opportunity_title, opportunity_description=opportunity_description)

    #print (f"There are {len(_input.messages)} message(s)")
    #print (f"Type: {type(_input.messages[0])}")
    #print ("---------------------------")
    #print (_input.messages[0].content)
    output = chat_model(_input.to_messages())
    outputs.append(output)
aggregated_apps['gpt_generated_skill_list'] = outputs

In [13]:
aggregated_apps['gpt_generated_skill_list_content'] = aggregated_apps['gpt_generated_skill_list'].apply(lambda x: x.content)
#Remove the ```json\n[\n\t{\n\t\t"opportunity_skills": """ and """\n\t}\n]\n``` from the output
aggregated_apps['gpt_generated_skill_list_content'] = aggregated_apps['gpt_generated_skill_list_content'].apply(lambda x: x.replace('```json\n[\n\t{\n\t\t"opportunity_skills":', ''))
# I only want what is in double quotes from the gpt_generated_skill_list_content column
aggregated_apps['gpt_generated_skill_list_content'] = aggregated_apps['gpt_generated_skill_list_content'].apply(lambda x: x.split('"')[1::2])
aggregated_apps['gpt_generated_skill_list_content']

0    [extensive training, service delivery, custome...
1    [customer service, strong work ethic, teamwork...
2    [outside sales, customer service, problem solv...
3    [hunter mentality, sales experience, prospecti...
4    [outside sales, customer service, problem-solv...
5    [pest control, customer service, problem solvi...
6    [termite technician, pest control, customer se...
7    [pest control, customer service, problem solvi...
8    [customer service, communication, teamwork, wo...
9    [service technician, training, customer servic...
Name: gpt_generated_skill_list_content, dtype: object

In [14]:
aggregated_apps

Unnamed: 0,opportunity_id,opportunity_description,opportunity_title,application_job_responsibilities,gpt_generated_skill_list,gpt_generated_skill_list_content
0,+0cRKl9xvkGTQq4D8IwnAQ==,first year earnings opportunity 40 000 50 000 ...,service technician train,[removal stump removal tree chip brush clear d...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[extensive training, service delivery, custome..."
1,+3lu6kPHC06xU79BvlwHew==,orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, strong work ethic, teamwork..."
2,+5os8/vB/EGBEahyGxRMjA==,orkin purpose help protect world live work pla...,outside sale professional,[national geographic snorkeler water sport equ...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[outside sales, customer service, problem solv..."
3,7uXMjQV9r061ap/Lc91I1Q==,orkin purpose help protect world live work pla...,commercial sale representative hunter needed,[design develop implement national sale market...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[hunter mentality, sales experience, prospecti..."
4,Im/cVgG09Uesn1fjkbt5TQ==,orkin purpose help protect world live work pla...,outside sale professional,[analyst business coaching consulting www righ...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[outside sales, customer service, problem-solv..."
5,Pw9q3TmWDE6up1oUsv/OTA==,orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi..."
6,VPpkoLNRtUCOxpYUssltxg==,19 00 per hour orkin purpose help protect worl...,service technician termite train,[moved within company course several year orig...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[termite technician, pest control, customer se..."
7,pJtXynDm6kqbyczn0dXkDg==,orkin purpose help protect world live work pla...,pest control technician,"[mix ink ship company use make drink can, resi...","content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi..."
8,zsFZ88cMtUKMWf2kIKdncQ==,15 00 per hour customer service specialist ful...,customer service specialist,[assist dealer support question incentive ship...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, communication, teamwork, wo..."
9,zu77vbq9g0ed54tWutsAig==,homepro subsidiary rollins inc nyse rol headqu...,service technician train,[supervised crew maintain lawn prune bush spra...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[service technician, training, customer servic..."


### Do the same with the applications.

In [15]:
# How you would like your response structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(name="applicant_skills",
    description="These are the hard and soft skills you extract from the applicants' background"),
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [16]:
# See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print (output_parser.get_format_instructions())

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"applicant_skills": string  // These are the hard and soft skills you extract from the applicants' background
}
```


In [17]:
template = """
You will be given a job applicants' background.
Your job is to extract soft and hard skills from the job description.

{format_instructions}

Wrap your final output with closed and open brackets (a list of json objects)

input_opportunity_description INPUT:
{applicant_background}

YOUR RESPONSE:
"""

prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(template)  
    ],
    input_variables=["applicant_background"],
    partial_variables={"format_instructions": format_instructions}
)

In [19]:
list_of_applicants_experiences = applications_sample.application_job_responsibilities.to_list()

In [20]:
outputs = []

for experiences in list_of_applicants_experiences:
    if experiences == None:
        experiences = "None"
    _input = prompt.format_prompt(applicant_background=experiences)

    #print (f"There are {len(_input.messages)} message(s)")
    #print (f"Type: {type(_input.messages[0])}")
    #print ("---------------------------")
    #print (_input.messages[0].content)
    output = chat_model(_input.to_messages())
    outputs.append(output)
applications_sample['application_skills_gpt_extraction'] = outputs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  applications_sample['application_skills_gpt_extraction'] = outputs


In [21]:
applications_sample.head(2)

Unnamed: 0,opportunity_id,application_id,opportunity_brief_description,opportunity_description,opportunity_title,application_pass_first_step,application_step_category,application_job_titles,application_job_responsibilities,application_education,...,jaccard_similarity,intersected_skills,intersected_skills_count,application_reported_skills_list_length,opportunity_required_skills_list_length,brief_description_skills_list_length,application_job_responsibilities_extracted_skills_list_length,application_skills_list_length,intersected_skills_list_length,application_skills_gpt_extraction
580,Pw9q3TmWDE6up1oUsv/OTA==,jiJ1JAa/AkWkg853IcV99A==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,detention officer order builder administrative...,managed daily administrative operation departm...,High School Diploma++Mandatory courses++New Ha...,...,0.009709,[operation],1,21,61,5,22,43,1,"content='```json\n[\n\t{\n\t\t""applicant_skill..."
688,Pw9q3TmWDE6up1oUsv/OTA==,DMMShh6ksEeTDMTqbqS5Nw==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,construction superintendent part manager,osha 10 30 hour certification 15 year general ...,High school diploma,...,0.0,[],0,14,61,5,8,22,0,"content='```json\n[\n\t{\n\t\t""applicant_skill..."


In [22]:
applications_sample['application_skills_gpt_extraction'].iloc[0].content

'```json\n[\n\t{\n\t\t"applicant_skills": "administrative operations, phone call handling, data entry, inmate management, safety and security enforcement, inventory management, warehouse operations, forklift driving, quality control, training, document processing, transcribing, client communication"\n\t}\n]\n```'

In [23]:
applications_sample['application_skills_gpt_extraction_content'] = applications_sample['application_skills_gpt_extraction'].apply(lambda x: x.content)
#applications_sample['application_skills_gpt_extraction_content'] = applications_sample['application_skills_gpt_extraction_content'].apply(lambda x: x.replace('```json\n[\n\t{\n\t\t"opportunity_skills":', ''))
applications_sample['application_skills_gpt_extraction_content'] = applications_sample['application_skills_gpt_extraction_content'].apply(lambda x: x.split('"')[1::2])
applications_sample['application_skills_gpt_extraction_content']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  applications_sample['application_skills_gpt_extraction_content'] = applications_sample['application_skills_gpt_extraction'].apply(lambda x: x.content)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  applications_sample['application_skills_gpt_extraction_content'] = applications_sample['application_skills_gpt_extraction_content'].apply(lambda x: x.split('"')[1::2])


580      [applicant_skills, administrative operations, ...
688      [applicant_skills, osha 10 30 hour certificati...
1047                 [applicant_skills, floor maintenance]
1480     [applicant_skills, design, develop, implement,...
1901     [applicant_skills, completed 4 year contract, ...
                               ...                        
22014    [applicant_skills, release product, product ho...
22015    [applicant_skills, accounting, scheduling, ins...
22016    [applicant_skills, customer service, data entr...
22017    [applicant_skills, data validation, client pro...
22018    [applicant_skills, general administrative cler...
Name: application_skills_gpt_extraction_content, Length: 73, dtype: object

In [38]:
applications_sample = applications_sample.merge(aggregated_apps[['opportunity_id', 'gpt_generated_skill_list_content']], on='opportunity_id', how='left')
applications_sample

Unnamed: 0,opportunity_id,application_id,opportunity_brief_description,opportunity_description_x,opportunity_title_x,application_pass_first_step,application_step_category,application_job_titles,application_job_responsibilities_x,application_education,...,application_skills_list_length,intersected_skills_list_length,application_skills_gpt_extraction,application_skills_gpt_extraction_content,opportunity_description_y,opportunity_title_y,application_job_responsibilities_y,gpt_generated_skill_list,gpt_generated_skill_list_content_x,gpt_generated_skill_list_content_y
0,Pw9q3TmWDE6up1oUsv/OTA==,jiJ1JAa/AkWkg853IcV99A==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,detention officer order builder administrative...,managed daily administrative operation departm...,High School Diploma++Mandatory courses++New Ha...,...,43,1,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, administrative operations, ...",orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi...","[pest control, customer service, problem solvi..."
1,Pw9q3TmWDE6up1oUsv/OTA==,DMMShh6ksEeTDMTqbqS5Nw==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,construction superintendent part manager,osha 10 30 hour certification 15 year general ...,High school diploma,...,22,0,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, osha 10 30 hour certificati...",orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi...","[pest control, customer service, problem solvi..."
2,Pw9q3TmWDE6up1oUsv/OTA==,UUO4CGKds0SR5OdxnONVag==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,True,0,laborer,currently floor maintenance,12/diplona,...,2,0,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, floor maintenance]",orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi...","[pest control, customer service, problem solvi..."
3,7uXMjQV9r061ap/Lc91I1Q==,i2GfHI8PiE2iSfMDmjNh3Q==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,commercial sale representative hunter needed,True,0,president bdm sale manager area sale manager t...,design develop implement national sale marketi...,Bachelor of Arts,...,89,5,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, design, develop, implement,...",orkin purpose help protect world live work pla...,commercial sale representative hunter needed,[design develop implement national sale market...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[hunter mentality, sales experience, prospecti...","[hunter mentality, sales experience, prospecti..."
4,Pw9q3TmWDE6up1oUsv/OTA==,wuHbCd1jAkSVhHG44qUcBg==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,termite technician,False,0,threat reduction specialist,completed 4 year contract united state marine ...,High School,...,2,0,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, completed 4 year contract, ...",orkin purpose help protect world live work pla...,termite technician,[managed daily administrative operation depart...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[pest control, customer service, problem solvi...","[pest control, customer service, problem solvi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,+3lu6kPHC06xU79BvlwHew==,o9zE84ts7EeTJ16L/Et5AQ==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,quality control technician laboratory technici...,perform release product product hold analysis ...,Bachelor's Degree in science,...,136,4,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, release product, product ho...",orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, strong work ethic, teamwork...","[customer service, strong work ethic, teamwork..."
69,+3lu6kPHC06xU79BvlwHew==,VinkkPxps0ulKfwNA9qOeQ==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,manager,accounting scheduling insurance referral insur...,Diploma,...,13,1,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, accounting, scheduling, ins...",orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, strong work ethic, teamwork...","[customer service, strong work ethic, teamwork..."
70,+3lu6kPHC06xU79BvlwHew==,PN7h+ikMHEKaJ/lKkTOYjg==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,fraud detection coordinator assistant manager ...,manage outbound customer call investigate dete...,Bachelors++Associates,...,36,3,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, customer service, data entr...",orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, strong work ethic, teamwork...","[customer service, strong work ethic, teamwork..."
71,+3lu6kPHC06xU79BvlwHew==,58O63nAFEkSn6PEFm63C4g==,orkin purpose help protect world live work pla...,orkin purpose help protect world live work pla...,customer relation specialist,False,0,compliance service analyst,perform data validation various client project...,High School,...,8,0,"content='```json\n[\n\t{\n\t\t""applicant_skill...","[applicant_skills, data validation, client pro...",orkin purpose help protect world live work pla...,customer relation specialist,[support client engagement team cet customer e...,"content='```json\n[\n\t{\n\t\t""opportunity_ski...","[customer service, strong work ethic, teamwork...","[customer service, strong work ethic, teamwork..."


In [33]:
skill_lists = applications_sample[['gpt_generated_skill_list_content', 'application_skills_gpt_extraction_content']]
skill_lists.columns = ['opportunity_skills', 'applicant_skills']


#make the lists of skills into sets
skill_lists['opportunity_skills'] = skill_lists['opportunity_skills'].apply(lambda x: set(x))
skill_lists['applicant_skills'] = skill_lists['applicant_skills'].apply(lambda x: [i for i in x if i != 'applicant_skills'])
skill_lists['applicant_skills'] = skill_lists['applicant_skills'].apply(lambda x: set(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skill_lists['opportunity_skills'] = skill_lists['opportunity_skills'].apply(lambda x: set(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skill_lists['applicant_skills'] = skill_lists['applicant_skills'].apply(lambda x: [i for i in x if i != 'applicant_skills'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [42]:
applications_sample.columns

Index(['opportunity_id', 'application_id', 'opportunity_brief_description',
       'opportunity_description_x', 'opportunity_title_x',
       'application_pass_first_step', 'application_step_category',
       'application_job_titles', 'application_job_responsibilities_x',
       'application_education', 'application_reported_skills',
       'similarity_score', 'similarity_rank', 'opportunity_required_skills',
       'brief_description_skills',
       'application_job_responsibilities_extracted_skills',
       'application_skills', 'jaccard_similarity', 'intersected_skills',
       'intersected_skills_count', 'application_reported_skills_list_length',
       'opportunity_required_skills_list_length',
       'brief_description_skills_list_length',
       'application_job_responsibilities_extracted_skills_list_length',
       'application_skills_list_length', 'intersected_skills_list_length',
       'application_skills_gpt_extraction',
       'application_skills_gpt_extraction_content',
 

In [46]:
gpt_generated_skills = applications_sample[['opportunity_id', 'opportunity_title_x', 'opportunity_description_x', 'application_id', 'application_job_responsibilities_x', 'gpt_generated_skill_list_content_x', 'application_skills_gpt_extraction_content', 'opportunity_required_skills', 'application_job_responsibilities_extracted_skills',]]
gpt_generated_skills.columns = ['opportunity_id', 'opportunity_title', 'opportunity_description', 'application_id', 'application_job_responsibilities', 'opportunity_skills_gpt', 'applicant_skills_gpt', 'opportunity_skills_skillNER', 'applicant_skills_skillNER']
gpt_generated_skills.sample(4)

Unnamed: 0,opportunity_id,opportunity_title,opportunity_description,application_id,application_job_responsibilities,opportunity_skills_gpt,applicant_skills_gpt,opportunity_skills_skillNER,applicant_skills_skillNER
28,Pw9q3TmWDE6up1oUsv/OTA==,termite technician,orkin purpose help protect world live work pla...,nDEHCuGVvkueDM8cv744ag==,responsible prepping food item busy night week...,"[pest control, customer service, problem solvi...","[applicant_skills, prepping food, stocking kit...","[pest control, service provider, good driving ...","[social skill, food, food, communicated, prepa..."
57,zu77vbq9g0ed54tWutsAig==,service technician train,homepro subsidiary rollins inc nyse rol headqu...,hxgj2IzWOUOtWWfMHhd7Ig==,secures premise personnel patrolling property ...,"[service technician, training, customer servic...","[applicant_skills, security, premises, personn...","[pest control, good driving record, career man...",[patrolling]
42,Pw9q3TmWDE6up1oUsv/OTA==,termite technician,orkin purpose help protect world live work pla...,1Kd9yok5L0GCmCWFefPM9w==,name triston roseman currently serving united ...,"[pest control, customer service, problem solvi...","[applicant_skills, hardworking]","[pest control, service provider, good driving ...",[united state]
38,Pw9q3TmWDE6up1oUsv/OTA==,termite technician,orkin purpose help protect world live work pla...,uNuvh//qHkm+jYrkzSsXuQ==,november 2017 present electrical aptitude test...,"[pest control, customer service, problem solvi...","[applicant_skills, electrical aptitude, troubl...","[pest control, service provider, good driving ...","[customer service, corrosion control, personal..."


In [45]:
gpt_generated_skills.to_pickle("../../Data/gpt_generated_skills.pkl")

In [35]:
skill_lists['jaccard_similarity'] = skill_lists.apply(lambda x: len(x['opportunity_skills'].intersection(x['applicant_skills'])) / len(x['opportunity_skills'].union(x['applicant_skills'])), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skill_lists['jaccard_similarity'] = skill_lists.apply(lambda x: len(x['opportunity_skills'].intersection(x['applicant_skills'])) / len(x['opportunity_skills'].union(x['applicant_skills'])), axis=1)


In [37]:
skill_lists['jaccard_similarity'].describe()

count    73.0
mean      0.0
std       0.0
min       0.0
25%       0.0
50%       0.0
75%       0.0
max       0.0
Name: jaccard_similarity, dtype: float64

In [47]:
skill_lists['intersection'] = skill_lists.apply(lambda x: x['opportunity_skills'].intersection(x['applicant_skills']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skill_lists['intersection'] = skill_lists.apply(lambda x: x['opportunity_skills'].intersection(x['applicant_skills']), axis=1)


In [48]:
skill_lists['intersection'].describe()

count     73
unique     1
top       {}
freq      73
Name: intersection, dtype: object

### When GPT Generated the skills given the applications and the job descriptions, it did not generate any skills that were in the job description in any case.