## Getting started with Spacy

In [2]:
positive_job_posting = {
        "description": "This is a remote position",
        "title": "software engineer",
        "jobLocationType": "TELECOMMUTE",
        "jobLocation": {
            "@type": "Place",
            "address": {
                "@type": "PostalAddress",
                "addressLocality": "Remote",
                "addressCountry": "US"
            }
        }
    }
type(positive_job_posting)

dict

In [3]:
!curl -X 'GET' \
  'https://jobs-job-service.mwwnextapppreprod-us.monster-next.com/jobs-job-service/v1/jobs/abc?format=standard' \
  -H 'accept: */*'

{"timestamp":1701795935724,"status":400,"error":"Bad Request","message":"[QueryController.getJob()]  Invalid JobId. JobId must be 36 characters. code=400,error={class=io.monster.jobs.jobservice.exception.JpMalformedIdException message=Invalid job id=abc}","path":"/jobs-job-service/v1/jobs/abc"}

### load a job 

In [4]:
import requests
import json
from string import Template

template = Template('https://jobs-job-service.mwwnextapppreprod-us.monster-next.com/jobs-job-service/v1/jobs/${JOBID}?format=standard')
jobid="f95215de-e176-4f4b-84c3-2b7eca180aee"
#jobid="abc"
url=template.substitute(JOBID=jobid)
headers = {"Accept": "application/json"}
r = requests.get(url, headers=headers)
print(f"Status code: {r.status_code}")
if r.status_code == 200:
    # Convert the response object to a dictionary.
    response_dict = r.json()
    response_string = json.dumps(response_dict, indent=4)
    #print(response_string)
    job_posting=response_dict['jobPosting']
    job_description = job_posting['description']
    print(job_description)
else:
    print(type(r))
    response_dict = r.json()
    response_string = json.dumps(response_dict, indent=4)
    print(response_string)


Status code: 200
<p><strong>We've recently updated our offered benefits and wages to stay competitive with the rising hiring demands. We're looking for the BEST to grow our family here at Palmen and our wages and benefits reflect that. </strong></p><ul><li><strong>Paid Time Off and flexible scheduling to ensure a healthy work/life balance</strong></li><li><strong>Creative and competitive pay plan options </strong></li><li><strong>Health, dental, and vision insurance</strong></li><li><strong>401K program with company match </strong></li></ul><p>Reporting to our Service & Parts Director, our <strong>Service Porter (Full Time or Part Time)</strong> will maintain new and used vehicle inventory appearance and performance by cleaning interior and exterior of vehicles, replenishing all vehicle fluids as needed, and replacing batteries when necessary. If you're someone who thrives on attention to detail, customer service, and a rewarding work environment, this could be the perfect job for you!

### configure patterns

In [5]:
import spacy
from spacy.matcher import Matcher
from bs4 import BeautifulSoup
import json

# description nlp/matcher
description_nlp = spacy.load('en_core_web_sm')
description_matcher = Matcher(description_nlp.vocab)

# Define name patterns for description
description_patterns = [
    [
        {'LOWER': 'position'},
        {'LEMMA': "be"},
        {'LOWER': 'fully'},
        {'LOWER': 'remote'}
    ],  # position is fully remote
    [{'LOWER': 'employee'},
     {'LOWER': 'choice'},
     {'LOWER': 'to'},
     {'LOWER': 'work'},
     {'LOWER': 'remote'}
     ],  # Employee choice to work remote
    [{'LOWER': 'work'},
     {'LOWER': 'remote'}
     ],  # work remote
    [{'LOWER': 'remote'},
     {'LOWER': 'opportunity'}
     ],  # remote opportunity
    [{'LOWER': 'position'},
     {'LEMMA': "be", "OP": "?"},
     {'SHAPE': 'ddd'},
     {'ORTH': '%'},
     {'LOWER': 'remote'}
     ],  # position is 100% remote || position 100% remote
    [{'LOWER': 'position'},
     {'LEMMA': "be"},
     {'LOWER': 'remote'}
     ],  # position is remote
    [{'LOWER': 'fully'},
     {'LOWER': 'remote'}
     ],  # fully remote
    [{'POS': 'PRON'},
     {'LEMMA': "be"},
     {'POS': 'DET'},
     {'LOWER': 'remote'},
     {'LOWER': 'position'}
     ],  # This is a remote position
    [{'LOWER': 'work'},
     {'LOWER': 'location'},
     {'IS_PUNCT': True, "OP": "*"},
     {'LOWER': 'remote'}
     ],  # Work Location: Remote
    [{'LOWER': 'job'},
     {'LOWER': 'location'},
     {'IS_PUNCT': True, "OP": "*"},
     {'LOWER': 'remote'}
     ],  # job Location: Remote
    [{'POS': 'PRON'},
     {'LEMMA': "be"},
     {'POS': 'DET'},
     {'SHAPE': 'ddd'},
     {'ORTH': '%'},
     {'LOWER': 'remote'},
     {'LOWER': 'position'}
     ],  # This is a 100% remote position
    [{'LOWER': 'opportunities'},
     {'LEMMA': "to"},
     {'LOWER': 'work'},
     {'LOWER': 'remotely'}
     ]  # opportunities to work remotely

]

for pattern in description_patterns:
    description_matcher.add('REMOTEDESCRIPTION', patterns=[pattern])

type(description_matcher)

spacy.matcher.matcher.Matcher

### run matcher against loaded job

In [6]:
doc = description_nlp(job_description)
matches = description_matcher(doc)

descriptionmatches = []
for match_id, start, end in matches:
    span = doc[start:end]
    descriptionmatches.append(span.text)
print(descriptionmatches)

[]


In [9]:
from app.model.job_posting import JobPosting
jobposting = JobPosting(description=job_description)
type(jobposting)

app.model.job_posting.JobPosting