In [129]:
import pandas as pd
import numpy as np
import re

df = pd.read_csv('staff.am_data_2020.csv')

In [130]:
def clean_str(s, condense=False):
    if isinstance(s, str):
        s = re.sub('[^0-9a-zA-Z]+', ' ', s)
        if condense:
            s = ''.join(s.split())
        else:
            s = ' '.join(s.split())
        s = s.lower()
        return s
    return clean_str(str(s))

def condense_str(s):
    return clean_str(s, True)

def to_list(csl):
    if isinstance(csl, str):
        words = csl.split(',')
        return [condense_str(word) for word in words]
    elif isinstance(csl, list):
        return csl
    return ''

# Add test job
test_job = {
    'id': 'test_job',
    'title': 'Marketing Specialist',
    'employment_terms': 'Permanent',
    'job_type': 'Full time',
    'deadline': '2020-12-31',
    'category': 'Marketing/Advertising',
    'required_qualifications': """Education background does not matter; we are looking for talent, experience, passion and creativity.
        At least 1 year of professional full-time experience in social media marketing is preferred.
        Experience with running social media pages such as Facebook is a must, experience with Linkedin, Insta and YouTube is a plus.
        Excellent in creative content writing, 
        Experience and knowledge of Adobe Photoshop (knowledge of other tools is a plus), for creating social media postings, based on visual templates provided by the designer.
        Strong written and verbal communication in Armenian and English languages, Russian is desirable.
        Positive attitude, detail and customer oriented with good multitasking and organisational ability.""",
    'responsibilities': """Develop original and exciting SMM campaigns and content on a daily basis (e.g. social media posts, website content, etc).
        Coordinate with marketing and design teams to generate relevant marketing content, 
        Manage staff.am's & HireBee's social media presence on Facebook, Linkedin, Telegram, Instagram and YouTube.
        Prepare successful email marketing campaigns with well-structured content.
        Maintain appropriate tone of voice through social media and other digital channels.
        Suggest and implement other marketing activities to boost awareness and increase website traffic and app installs.
        Complete other tasks related to Marketing as required.""",
    'soft_skills': 'Written communication skills,Positive attitude,Time management,Team player',
    'prof_skills': 'Adobe Photoshop,SMM,Email Marketing,Content marketing',
    'salary': 'NaN',
    'candidate_level': 'Mid level'
}
df = df.append(test_job, ignore_index=True)

# Clean strings
df['title'] = df['title'].apply(condense_str)
df['employment_terms'] = df['employment_terms'].apply(condense_str)
df['job_type'] = df['job_type'].apply(condense_str)
df['category'] = df['category'].apply(condense_str)
df['required_qualifications'] = df['required_qualifications'].apply(clean_str)
df['responsibilities'] = df['responsibilities'].apply(clean_str)
df['candidate_level'] = df['candidate_level'].apply(condense_str)

# Convert comma-separated lists into Python lists
df['soft_skills'] = df['soft_skills'].apply(to_list)
df['prof_skills'] = df['prof_skills'].apply(to_list)

# Convert to date
df['deadline'] = df['deadline'].apply(lambda x: pd.to_datetime(x, format='%Y-%m-%d', errors='ignore'))

In [131]:
def rep(s, t=5):
    return (s + ' ') * t

def create_soup(x):
    classes = rep(x['title']) + rep(x['employment_terms']) + rep(x['job_type']) + rep(x['category']) + rep(x['candidate_level'])
    description = x['required_qualifications'] + ' ' + x['responsibilities'] + ' ' + ' '.join(x['soft_skills']) + ' ' + ' '.join(x['prof_skills'])
    return classes + description

df['soup'] = df.apply(create_soup, axis=1)

In [132]:
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words='english')
count_mat = count.fit_transform(df['soup'])
count_mat.shape

(6423, 13630)

In [133]:
from sklearn.metrics.pairwise import cosine_similarity

cos_sim = cosine_similarity(count_mat, count_mat)

df = df.reset_index()
indices = pd.Series(df.index, index=df['id'])

def get_recommendations(id, cosine_sim=cos_sim):
    idx = indices[id]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    job_indices = [i[0] for i in sim_scores]
    return df.iloc[job_indices]

In [134]:
get_recommendations('test_job', cos_sim)

Unnamed: 0,index,id,title,employment_terms,job_type,deadline,category,required_qualifications,responsibilities,soft_skills,prof_skills,salary,candidate_level,soup
5544,5544,zc2oKc7ovZd7fLH0Rvtw1Lrf8,smmspecialist,permanent,fulltime,2020-08-31,marketingadvertising,an educational background does not matter we a...,manage rsquo s social media presence in tiktok...,"[leadershipskills, writtencommunicationskills,...","[digitalmarketing, smm]",,midlevel,smmspecialist smmspecialist smmspecialist smms...
602,602,UL1OcV6PLMy8tFS3okZk7uNk4,digitalmarketingspecialist,permanent,fulltime,2020-02-19,marketingadvertising,bachelor rsquo s degree in marketing business ...,design plan and implement digital campaigns al...,"[positiveattitude, timemanagement, teamwork, a...","[digitalmarketing, smm, digitalanalytics]",Competitive,midlevel,digitalmarketingspecialist digitalmarketingspe...
6372,6372,bh2bem8GqGJ8BBY3JRRk3Tqe8,smmspecialist,permanent,fulltime,2020-10-03,marketingadvertising,bachelor rsquo s degree in marketing or releva...,implement social media strategy throughout dif...,"[abilitytoworkindependently, positiveattitude,...","[digitalmarketing, smm]","Competitive, depending on experience and skills.",midlevel,smmspecialist smmspecialist smmspecialist smms...
3723,3723,IK2Yqy5WntV1pcT9ammb2Wzr5,smmspecialist,permanent,fulltime,2020-06-21,marketingadvertising,proven working experience in social media mark...,build and execute social media strategy throug...,"[communicationskills, resultoriented, hardwork...","[digitalmarketing, smm, facebookads, marketing...",,midlevel,smmspecialist smmspecialist smmspecialist smms...
4562,4562,qo2tqz6NJyr3VFK8HCtm6Rly3,smmspecialist,permanent,fulltime,2020-07-30,marketingadvertising,proven working experience in social media mark...,build and execute social media strategy throug...,"[communicationskills, resultoriented, hardwork...","[digitalmarketing, smm, facebookads, marketing...",,midlevel,smmspecialist smmspecialist smmspecialist smms...
6189,6189,MQ2KqY8cmmW6WvF0WQLz0rWe2,smmspecialist,permanent,fulltime,2020-09-28,marketingadvertising,proven working experience in social media mark...,build and execute social media strategy throug...,"[communicationskills, resultoriented, hardwork...","[digitalmarketing, smm, facebookads, marketing...",,midlevel,smmspecialist smmspecialist smmspecialist smms...
5678,5678,fX2cqX7eomA8NKF6IfiE6nqo4,smmspecialist,permanent,fulltime,2020-09-09,marketingadvertising,minimum one year of experience in marketing bu...,build plan and implement the overall social me...,,"[digitalmarketing, smm]",,midlevel,smmspecialist smmspecialist smmspecialist smms...
1806,1806,kV1eEB8EsoM1Ccg2ybEj7ick7,smmmanager,permanent,fulltime,2020-03-25,marketingadvertising,nbsp minimum 3 4 years of social media experie...,nbsp in partnership with digital marketing tea...,"[writtencommunicationskills, timemanagement, f...","[adobephotoshop, mspowerpoint, msexcel]",,notdefined,smmmanager smmmanager smmmanager smmmanager sm...
2174,2174,ei1Lss8GEil5cXN1McFl7kqE5,socialmediamarketingspecialist,permanent,fulltime,2020-04-04,marketingadvertising,bsc degree in marketing communication or relat...,optimize and publish content on social media p...,"[abilitytoworkindependently, timemanagement, p...","[adobephotoshop, digitalmarketing, seo, smm, c...",,senior,socialmediamarketingspecialist socialmediamark...
1897,1897,Ml1eOn8LZhf2Nql2wzhe6nTP2,marketingspecialist,permanent,fulltime,2020-03-27,marketingadvertising,university degree in marketing or in related f...,develop a social media strategy and set goals ...,,[smm],,midlevel,marketingspecialist marketingspecialist market...
