# Project 4: Candidate Resume Recommendation System
# Import packages

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import RegexpTokenizer, word_tokenize 
from nltk.stem import WordNetLemmatizer
import spacy
import statistics 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import en_core_web_sm
nltk.download('wordnet')
nlp = en_core_web_sm.load()

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hellojenny/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
#pd.set_option('display.max_colwidth', -1)

# Load the data into dataframes

In [4]:
jobs = pd.read_pickle('./jobs.pkl')
resumes = pd.read_pickle('./resumes.pkl')

## Create functions for cleaning

In [5]:
def ultimate_cleaning(text):
    #remove named entities
    #remove words from pos_tag_list
    #remove stop words
    #remove non-alpha characters
    #return only lemma
    doc = nlp(text)
    pos_tag_list = ['ADJ', 'ADV', 'NOUN', 'PART', 'VERB'] 
    stop_words_list = ['nbsp', 'candidate', 'now', 'work', 'professional', 'ability', 'hire', 'join', 
                       'interested', 'approximately', 'start', 'end', 'p.m.', '-PRON-']
    cleaned_text = []

    for token in doc:
        if token.pos_ in pos_tag_list\
        and token.is_stop == False\
        and token.is_alpha == True\
        and token.lemma_ not in stop_words_list:
            cleaned_text.append(token.lemma_)

    return ' '.join(cleaned_text)

In [6]:
def title_cleaning(text):
    doc = nlp(text)
    pos_tag_list = ['ADJ', 'ADV', 'NOUN', 'PART', 'VERB', 'PROPN'] 
    stop_words_list = ['nbsp', 'candidate', 'now', 'work', 'professional', 'ability', 'hire', 'join', 
                       'interested', 'approximately', 'start', 'end', 'p.m.', '-PRON-']
    cleaned_text = []

    for token in doc:
        if token.pos_ in pos_tag_list\
        and token.is_stop == False\
        and token.is_alpha == True\
        and token.lemma_ not in stop_words_list:
            cleaned_text.append(token.lemma_)

    return ' '.join(cleaned_text)

## Clean text for `jobs`

In [7]:
jobs.head(10)

Unnamed: 0,job_id,slug,job_title,company,industry,job_description,employment_type,education,org_cleaned_text
0,111,palo-alto-ca-tacolicious-server,Server,Tacolicious,Food and Beverages,Tacolicious' first Palo Alto store just opened...,Part-Time,,server tacolici palo alto part time tacolici f...
1,113,san-francisco-ca-claude-lane-kitchen-staff-chef,Kitchen Staff/Chef,Claude Lane,Food and Beverages,\r\n\r\nNew French Brasserie in S.F. Financia...,Part-Time,,kitchen staff chef claud lane san francisco pa...
2,117,san-francisco-ca-machka-restaurants-corp-barte...,Bartender,Machka Restaurants Corp.,Food and Beverages,We are a popular Mediterranean wine bar and re...,Part-Time,,bartend machka restaur corp. san francisco par...
3,121,brisbane-ca-teriyaki-house-server,Server,Teriyaki House,Food and Beverages,● Serve food/drinks to customers in a profess...,Part-Time,,server teriyaki hous brisban part time serv fo...
4,127,los-angeles-ca-rosa-mexicano-sunset-kitchen-st...,Kitchen Staff/Chef,Rosa Mexicano - Sunset,Food and Beverages,"Located at the heart of Hollywood, we are one ...",Part-Time,,kitchen staff chef rosa mexicano sunset lo ang...
5,129,los-altos-ca-mind-of-beauty-day-spa-receptionist,Receptionist,Mind of Beauty Day Spa,Retail,We are a group of professional massage therapi...,Part-Time,,receptionist mind beauti day spa lo alto part ...
6,131,los-angeles-ca-roy-s-woodland-hills-server,Server,Roy's Woodland Hills,Food and Beverages,● Serve food/drinks to customers in a profess...,Part-Time,,server roy woodland hill lo angel part time se...
7,133,berkeley-ca-koja-kitchen-driver,Driver,KoJa Kitchen,Food and Beverages,KoJa Kitchen:\r\nPART-TIME LOCAL HELP NEEDED W...,Part-Time,,driver koja kitchen berkeley part time koja ki...
8,134273,mendham-nj-king-s-food-markets-assistant-store...,Assistant Store Manager,King's Food Markets,,\r\nAssistant Store Manager\r\n\r\nNow Hiring ...,Part-Time,Not Specified,assist store manag king food market mendham pa...
9,134274,onalaska-wi-aldi-store-associate-retail-sales-...,Store Associate - Retail Sales (Customer Service),ALDI,,Hiring Event Details\r\nStore Associate\r\n\r\...,Full-Time/Part-Time,High School Diploma,store associ retail sale custom servic aldi on...


In [8]:
jobs['job_description'] = jobs['job_description'].fillna('')

In [9]:
jobs['ultimate_cleaned_text'] = jobs['job_description'].apply(ultimate_cleaning)

In [10]:
jobs['cleaned_job_title'] = jobs['job_title'].apply(title_cleaning)

In [11]:
jobs['text'] = jobs['ultimate_cleaned_text'] + ' ' + jobs['cleaned_job_title']

In [12]:
jobs.to_pickle('./jobs_cleaned.pkl')

In [21]:
jobs

Unnamed: 0,job_id,slug,job_title,company,industry,job_description,employment_type,education,org_cleaned_text,ultimate_cleaned_text,cleaned_job_title,text
0,111,palo-alto-ca-tacolicious-server,Server,Tacolicious,Food and Beverages,Tacolicious' first Palo Alto store just opened...,Part-Time,,server tacolici palo alto part time tacolici f...,tacolicious store open recently love taco love...,server,tacolicious store open recently love taco love...
1,113,san-francisco-ca-claude-lane-kitchen-staff-chef,Kitchen Staff/Chef,Claude Lane,Food and Beverages,\r\n\r\nNew French Brasserie in S.F. Financia...,Part-Time,,kitchen staff chef claud lane san francisco pa...,seek energetic dynamic chef charge grow compan...,kitchen staff chef,seek energetic dynamic chef charge grow compan...
2,117,san-francisco-ca-machka-restaurants-corp-barte...,Bartender,Machka Restaurants Corp.,Food and Beverages,We are a popular Mediterranean wine bar and re...,Part-Time,,bartend machka restaur corp. san francisco par...,popular wine bar restaurant look experienced b...,bartender,popular wine bar restaurant look experienced b...
3,121,brisbane-ca-teriyaki-house-server,Server,Teriyaki House,Food and Beverages,● Serve food/drinks to customers in a profess...,Part-Time,,server teriyaki hous brisban part time serv fo...,serve food drink customer manner cashier need ...,server,serve food drink customer manner cashier need ...
4,127,los-angeles-ca-rosa-mexicano-sunset-kitchen-st...,Kitchen Staff/Chef,Rosa Mexicano - Sunset,Food and Beverages,"Located at the heart of Hollywood, we are one ...",Part-Time,,kitchen staff chef rosa mexicano sunset lo ang...,locate heart popular mexican place currently l...,kitchen staff chef,locate heart popular mexican place currently l...
...,...,...,...,...,...,...,...,...,...,...,...,...
84085,82,san-francisco-ca-national-japanese-american-hi...,Book Keeper,National Japanese American Historical Society,Office Administration,NJAHS stands for National Japanese American Hi...,Part-Time,,book keeper nation japanes american histor soc...,njahs stand national japanese locate town curr...,book keeper,njahs stand national japanese locate town curr...
84086,83,larkspur-ca-emporio-rulli-kitchen-staff-chef,Kitchen Staff/Chef,Emporio Rulli,Food and Beverages,Weekend Brunch Line Cook \r\n● Other shifts ma...,Part-Time,,kitchen staff chef emporio rulli larkspur part...,line other shift available team kitchen order,kitchen staff chef,line other shift available team kitchen order ...
84087,84,san-francisco-ca-onigilly-driver-84,Driver,Onigilly,Food and Beverages,ONIGILLY (Japanese rice ball wraps) seeks outg...,Part-Time,,driver onigilli san francisco part time onigil...,onigilly japanese rice ball wrap seek outgoing...,driver,onigilly japanese rice ball wrap seek outgoing...
84088,88,san-francisco-ca-machka-restaurants-corp-line-...,Line Cook,Machka Restaurants Corp.,Food and Beverages,We are a popular Mediterranean restaurant in F...,Part-Time,,line cook machka restaur corp. san francisco p...,popular restaurant look experienced line cook ...,line cook,popular restaurant look experienced line cook ...


## Clean text for `resumes`

In [13]:
resumes.sort_values(by=['applicant_id']).head(10)

Unnamed: 0,applicant_id,job_title,employer,job_description,salary
2763,2,Volunteer,School for Self-Healing,* Read aloud Meir Schneider's books and record...,
2762,2,Writer for the Uloop Blog,Cecilia Abate,"* Wrote articles for the ""Uloop Blog,"" which i...",
3759,3,Marketing Intern,Honda,,
3758,3,Server,Aloha Beach Resort,,
3757,3,Prep Cook,Moscone Center,,20.0
6277,6,Project Assistant,IOM,,18.0
7490,8,"deli clerk,server, cashier, food prep, order t...",Safeway Grocery Inc,,8.75
368,11,Cashier,Cristina Green,,8.45
809,12,Server,Buffalo Wild Wings,,8.0
810,12,Rec Leader 1,SF Park & Rec,,


In [14]:
new_resumes = resumes[['applicant_id', 'job_title', 'job_description']].dropna(thresh=3).fillna('')

In [15]:
new_resumes['cleaned_job_title'] = new_resumes['job_title'].apply(title_cleaning)

In [16]:
new_resumes['cleaned_job_description'] = new_resumes['job_description'].apply(ultimate_cleaning)

In [17]:
new_resumes = new_resumes.groupby(['applicant_id']).agg(lambda x: ' '.join(set(x)))
new_resumes['text'] = new_resumes['cleaned_job_title'] + ' ' + new_resumes['cleaned_job_description']

In [18]:
new_resumes.reset_index(inplace=True)

In [19]:
new_resumes.to_pickle('./resumes_cleaned.pkl')

In [20]:
new_resumes

Unnamed: 0,applicant_id,job_title,job_description,cleaned_job_title,cleaned_job_description,text
0,2,Writer for the Uloop Blog Volunteer,"* Wrote articles for the ""Uloop Blog,"" which i...",writer uloop blog volunteer,read aloud book record audio visually impair w...,writer uloop blog volunteer read aloud book re...
1,38,Sales Person & Phone Receptionist,Asking customer if they need any assistance an...,sales person phone receptionist,ask customer need assistance check extra size ...,sales person phone receptionist ask customer n...
2,78,Impact team member,"Help maintain merchandise flow, Work on fillin...",impact team member,help maintain merchandise flow fill present st...,impact team member help maintain merchandise f...
3,86,none,none,,,
4,89,Clerk's helper Healthcare Specialist / Combat ...,"Clinical and field medicine, Healthcare educat...",clerk helper healthcare specialist combat medic,clinical field medicine education food medicin...,clerk helper healthcare specialist combat medi...
...,...,...,...,...,...,...
2411,14627,Cook,Perparing meals,cook,perpar meal,cook perpar meal
2412,14630,Product strategist,- Product strategist of Job Search and Job Dis...,product strategist,product strategist analyst site provide traini...,product strategist product strategist analyst ...
2413,14639,Student Senate Intern Girl Boss Consultant Tou...,"Strategy and operations consulting, primarily ...",assistant property manager webmaster consultan...,responsible lead historical informational min...,assistant property manager webmaster consultan...
2414,14642,Director Diversity & Human Resources Adjunct F...,*Chief Human Resources Officer and Director of...,found president author director diversity huma...,connect scholar industry expert manager educa...,found president author director diversity huma...
