In [1]:
import pandas as pd
import nltk
import numpy as np
from nltk.tokenize import word_tokenize
import re
import matplotlib.pyplot as plt
import math
from nltk.corpus import stopwords

# We extract skills based on cosine similarity of words from given list of clusters and each (meaningful) word in vacancy. It's possible to combine it with job extraction method made during holidays

In [2]:
Data = pd.read_csv("JobDataTranslated.csv")
#Data['Job Description'].apply(lambda x: x.lower())

def clean(text):
    
    # removing paragraph numbers
    text = re.sub('[0-9]+.\t','',str(text))
    # removing new line characters
    text = re.sub('\n ','',str(text))
    text = re.sub('\n',' ',str(text))
    # removing apostrophes
    text = re.sub("'s",'',str(text))
    # removing hyphens
    text = re.sub("-",' ',str(text))
    text = re.sub("— ",'',str(text))
    # removing quotation marks
    text = re.sub('\"','',str(text))
    # removing salutations
    text = re.sub("Mr\.",'Mr',str(text))
    text = re.sub("Mrs\.",'Mrs',str(text))
    # removing any reference to outside text
    text = re.sub("[\(\[].*?[\)\]]", "", str(text))
    
    return text

# preprocessing speeches
Data['Cleaned Job Description'] = Data['Job Description'].apply(clean)

from string import punctuation

def remove_punctuation(s):
    global punctuation
    for p in punctuation:
        s = s.replace(p, '')
    return s

Data['Cleaned Job Description'] = Data['Cleaned Job Description'].map(remove_punctuation)

Data['Cleaned Job Description'] = Data['Cleaned Job Description'].apply(lambda x: x.lower())

In [18]:
s = pd.read_csv("JobDataTranslated.csv")


In [4]:
#nltk.download('stopwords')
en_stopwords = stopwords.words('english')
def remove_stopwords(s):
    global en_stopwords
    s = word_tokenize(s)
    s = " ".join([w for w in s if w not in en_stopwords])
    return s

# Create a new column of descriptions with no stopwords
Data['Cleaned Job Description NoStop'] = Data['Cleaned Job Description'].map(remove_stopwords)




In [5]:
x = "agriculture architecture engineering art finance buziness construction excavation education healthcare hospitality \
math installation reapair legal military science administaration sales service transportation medicine driver chemistry\
nurse technician language therapy accounting delivery logistics teaching psychiatry hr plumber mechanic management \
leadership psychology communication carpenter physiotherapy ICT marketing programming computer administration"


In [6]:
len(x.split(' '))

46

In [7]:
import spacy

nlp = spacy.load("en_core_web_lg")

In [8]:
"""
remove copies of list in order
"""
def reduce(l):
    sorted_reduced = []
    for i in l:
        if i not in sorted_reduced:
            sorted_reduced.append(i)

    return sorted_reduced

In [9]:
'''
skills for words that got a highest cosine similarity
'''
def print_top_skills(vac):
    token1 = nlp(x)
    tokens = nlp(vac)
    l=[]
    for token0 in token1:
        for token2 in tokens:
                l.append([token0.similarity(token2), token0.text, token2.text])
    
    clusters = [l[i][1] for i in range(len(l)) if l[i][0]>0.7]
    words_traken_from =  [l[i][2] for i in range(len(l)) if l[i][0]>0.7] 
    print('Top clusters in order: ', reduce(clusters))
    print()
    print('Words in vacancy triggered by clusters: ', reduce(words_traken_from ))
    return

In [10]:
s['Job Description'][0]

"You know better than anyone how to bind other people to you, people for whom you can mean something. A great new assignment for jobseekers and the right match for their sourcing issue for clients. If you are also curious about market developments, would you like to hear more about projects within the industry and are you able to translate this information into opportunities for Brunel, then a role as a Sales Consultant is perfect for you! About this position As a Sales Consultant you always have something to do. Your main goal is to make the best match between clients and candidates, and that involves a lot. Your work does not stop at finding and connecting both parties. You are also responsible for expanding and maintaining your own network of candidates and clients. That means that you are in constant contact with both parties. Keeping an overview and keeping different balls in the air is no problem for you. Your focus area will be on specialists and organizations within the Norther

In [11]:
import warnings # if it works I don't care that says!
warnings.filterwarnings("ignore")

print_top_skills(Data['Cleaned Job Description NoStop'][0])

Top clusters in order:  ['sales', 'driver', 'logistics', 'management', 'communication']

Words in vacancy triggered by clusters:  ['sales', 'driver', 'logistics', 'management', 'communication']


In [12]:
s['Job Description'][200]

"XPO Logistics is a global player in supply chain solutions. XPO has more than 89,000 employees, spread over 1,440 offices in 33 countries. Our services focus on 3 domains: logistics solutions, transport and global forwarding.  For our Transport Compentence Center in Eindhoven we are looking for a: TRANSPORT ENGINEER  YOUR FUNCTION: As a Transport Engineer you fulfill a crucial role within the Transport Compentence Center department. You have an advisory and supporting role and you are constantly monitoring and analyzing data in the Transport department. You convert these analyzes into advice and improvement proposals. To Do: Conducting network studies and analysis of shipment data; Developing transport solutions and proposals to internal and external customers; Processing transport rates from carriers to rate sheets for customers; Keeping the carrier and rate database up to date; Executing projects for the further optimization of the Transport Competence Center; Analyzing and optimizi

In [13]:
print_top_skills(Data['Cleaned Job Description NoStop'][200])

Top clusters in order:  ['engineering', 'service', 'transportation', 'logistics', 'marketing']

Words in vacancy triggered by clusters:  ['engineer', 'services', 'service', 'transport', 'logistics', 'business']


In [14]:
s['Job Description'][500]

'Also view this vacancy at: _www.maratec.nl/vacatures.html _ Ambiance Maratec is a specialist in the field of plastic frames, interior and exterior sun protection, mosquito nets, roller shutters and patio roofs. Our offer consists of a varied range of top brands, tailor-made advice and professional installation. Maratec has a suitable offer for every wallet. We are proud of our showroom, the most beautiful and largest in South Limburg. And we continue to grow. Full-time field service engineer and a Full-time apprentice field technician In this position you are the technical link between the customers and our company. For these vacancies we are looking for an independently operating professional with technical affinity. What are you going to do? As a mechanic at Ambiance Maratec you have a very varied job. Due to the wide variety of products and services, you are busy with different activities every day. Installation of indoor and outdoor sun protection, terrace solutions, insect protec

In [15]:
print_top_skills(Data['Cleaned Job Description NoStop'][500])

Top clusters in order:  ['engineering', 'installation', 'service', 'technician', 'mechanic']

Words in vacancy triggered by clusters:  ['engineer', 'installation', 'service', 'services', 'technician', 'mechanic']


In [16]:
s['Job Description'][900]

'Your challenge as a Service Technician Electrical Engineering | Venlo As a Service Technician you independently perform maintenance and service work on (complex) installations of our customers within the utility. Think of customers in the segments education, healthcare and government. You maintain, repair and modify installations of various types, put them into operation and set them up, within the set time and in accordance with instructions. Locates faults, resolves them and checks installations for defects or imminent defects. You provide written reports on maintenance work and / or malfunctions and you take care of the necessary administration. You participate in breakdown services together with a young and ambitious team. With your service-providing attitude, you and the team ensure maximum internal and external customer satisfaction Your team You will work within Unica Building Services; the one-stop-shop for building-related technology. From design, realization to management, m

In [17]:
print_top_skills(Data['Cleaned Job Description NoStop'][900])

Top clusters in order:  ['engineering', 'education', 'healthcare', 'installation', 'service', 'technician', 'teaching', 'management', 'ICT', 'marketing', 'administration']

Words in vacancy triggered by clusters:  ['engineering', 'education', 'healthcare', 'care', 'health', 'installations', 'installation', 'service', 'services', 'technician', 'management', 'ict', 'business', 'administration']
