# Trying Doc2Vec

# Connect GDRIVE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Necessary libraries

In [None]:
!pip install uszipcode --quiet

[K     |████████████████████████████████| 121 kB 47.5 MB/s 
[K     |████████████████████████████████| 76 kB 3.5 MB/s 
[?25h

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast 
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

from gensim.models import Doc2Vec
import gensim.models.doc2vec
from collections import OrderedDict
import multiprocessing

from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.utils import simple_preprocess
from gensim.test.utils import get_tmpfile
from gensim.models.callbacks import CallbackAny2Vec

import pickle

from uszipcode import SearchEngine
from geopy.distance import distance

import gc
import re

import warnings; warnings.simplefilter('ignore')



# Utility functions

In [None]:
def preprocessor(text):
    text = text.replace('\\r', '').replace('&nbsp', '').replace('\n', '')
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) +\
        ' '.join(emoticons).replace('-', '')
    return text

def user_exist(user):
    if len(users.loc[users['UserID'] == user]) == 0:
        return False
    return True

def has_coordinates(user):
    c = users.loc[users['UserID'] == user, "Coordinates"] 

    if (len(c) == 0) or (c.iloc[0].split(',')[0]) == 'None':
        return False
    return True

def user_profile(user_id):
    user_profile = np.array(users.loc[users['UserID'] == user_id, ['DegreeType', 'Major']])[0]
    historical_apps = user_history.loc[user_history.UserID == user_id, 'JobTitle']
    user_profile = """
    Degree Type: {}
    Major: {}
    """.format(user_profile[0], user_profile[1])

    return user_profile

def historical_application(user_id):
    historical_apps = apps.loc[apps.UserID == user_id]['JobID']
    content = list()
    for application in historical_apps:
        temp = jobs.loc[jobs.JobID == application, ['Title', 'Description', 'Requirements']]
        if len(temp) != 0:
            content += [temp.Title.values + ". " + temp.Description.values + ". " + temp.Requirements.values]
    return content

class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''

    def __init__(self):
        self.epoch = 0

    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
        self.epoch += 1

In [None]:
assert gensim.models.doc2vec.FAST_VERSION > -1, "SLOW VERSION"
MAX_DISTANCE = 15

# Reading datasets

In [None]:
folder = '/content/drive/MyDrive/job-recommendation-system/'

In [None]:
apps = pd.read_csv(folder+'data/apps.tsv', delimiter='\t', encoding='utf-8')
len

<function len(obj, /)>

In [None]:
user_history = pd.read_csv(folder+'data/user_history.tsv', delimiter='\t', encoding='utf-8')
user_history.head()

Unnamed: 0,UserID,WindowID,Split,Sequence,JobTitle
0,47,1,Train,1,National Space Communication Programs-Special ...
1,47,1,Train,2,Detention Officer
2,47,1,Train,3,"Passenger Screener, TSA"
3,72,1,Train,1,"Lecturer, Department of Anthropology"
4,72,1,Train,2,Student Assistant


In [None]:
jobs = pd.read_csv(folder+'data/jobs.tsv', delimiter='\t', encoding='utf-8', error_bad_lines=False)
jobs.head()

b'Skipping line 122433: expected 11 fields, saw 12\n'
b'Skipping line 602576: expected 11 fields, saw 12\n'
b'Skipping line 990950: expected 11 fields, saw 12\n'


Unnamed: 0,JobID,WindowID,Title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate
0,1,1,Security Engineer/Technical Lead,<p>Security Clearance Required:&nbsp; Top Secr...,<p>SKILL SET</p>\r<p>&nbsp;</p>\r<p>Network Se...,Washington,DC,US,20531.0,2012-03-07 13:17:01.643,2012-04-06 23:59:59
1,4,1,SAP Business Analyst / WM,<strong>NO Corp. to Corp resumes&nbsp;are bein...,<p><b>WHAT YOU NEED: </b></p>\r<p>Four year co...,Charlotte,NC,US,28217.0,2012-03-21 02:03:44.137,2012-04-20 23:59:59
2,7,1,P/T HUMAN RESOURCES ASSISTANT,<b> <b> P/T HUMAN RESOURCES ASSISTANT</b> <...,Please refer to the Job Description to view th...,Winter Park,FL,US,32792.0,2012-03-02 16:36:55.447,2012-04-01 23:59:59
3,8,1,Route Delivery Drivers,CITY BEVERAGES Come to work for the best in th...,Please refer to the Job Description to view th...,Orlando,FL,US,,2012-03-03 09:01:10.077,2012-04-02 23:59:59
4,9,1,Housekeeping,I make sure every part of their day is magica...,Please refer to the Job Description to view th...,Orlando,FL,US,,2012-03-03 09:01:11.88,2012-04-02 23:59:59


In [None]:
users = pd.read_csv(folder+'data/users.tsv', delimiter='\t', encoding='utf-8')
users.head()

Unnamed: 0,UserID,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany
0,47,1,Train,Paramount,CA,US,90723,High School,,1999-06-01 00:00:00,3,10.0,Yes,No,0
1,72,1,Train,La Mesa,CA,US,91941,Master's,Anthropology,2011-01-01 00:00:00,10,8.0,Yes,No,0
2,80,1,Train,Williamstown,NJ,US,8094,High School,Not Applicable,1985-06-01 00:00:00,5,11.0,Yes,Yes,5
3,98,1,Train,Astoria,NY,US,11105,Master's,Journalism,2007-05-01 00:00:00,3,3.0,Yes,No,0
4,123,1,Train,Baton Rouge,LA,US,70808,Bachelor's,Agricultural Business,2011-05-01 00:00:00,1,9.0,Yes,No,0


In [None]:
test_users = pd.read_csv(folder+'data/test_users.tsv', delimiter='\t', encoding='utf-8')
test_users.head()

Unnamed: 0,UserID,WindowID
0,767,1
1,769,1
2,861,1
3,1006,1
4,1192,1


In [None]:
print("Original number of jobs: ",len(jobs))
print("Original number of users: ",len(users))

Original number of jobs:  1091923
Original number of users:  389708


# Pipeline - Preprocessing


### Subsetting & Sampling users of NY state with 40% sampling

In [None]:
users = users.loc[users.State == 'NY']
users = users.sample(frac=0.4, replace=False, random_state=1)
len(users)

8236

### Subsetting jobs with zipcode of NY state for testing distance feature

In [None]:
jobs = jobs.loc[jobs.State == 'NY']
# all_jobs = jobs.copy()

In [None]:
jobs.Zip5 = jobs.Zip5.fillna(0)
jobs.Zip5 = jobs.Zip5.astype(int)
jobs = jobs.loc[jobs['Zip5'] != 0]
len(jobs)

33679

### Subsetting & Sampling jobs of NY state with 40% sampling

In [None]:
jobs = jobs.sample(frac=0.4, replace=False, random_state=1)
len(jobs)

13472

### Clearning Memory

In [None]:
gc.collect()

15

### Creating jobs coordinates and Communities

In [None]:
search = SearchEngine()

Download /root/.uszipcode/simple_db.sqlite from https://github.com/MacHu-GWU/uszipcode-project/releases/download/1.0.1.db/simple_db.sqlite ...
  1.00 MB downloaded ...
  2.00 MB downloaded ...
  3.00 MB downloaded ...
  4.00 MB downloaded ...
  5.00 MB downloaded ...
  6.00 MB downloaded ...
  7.00 MB downloaded ...
  8.00 MB downloaded ...
  9.00 MB downloaded ...
  10.00 MB downloaded ...
  11.00 MB downloaded ...
  Complete!


In [None]:
def coordinates(zipcode):
    zipcode = search.by_zipcode(zipcode)
    try:
      community = zipcode.post_office_city
      return community, "{},{}".format(zipcode.lat, zipcode.lng)
    except AttributeError:
      return None

In [None]:
# search.by_zipcode("2e").lat
%%timeit
jobs["Community"] = ""
jobs["Coordinates"] = ""
for zipcode in jobs.Zip5.unique():
    try:
      community, coordinate = coordinates(zipcode)
      jobs.loc[jobs.Zip5 == zipcode, "Coordinates"] =  str(coordinate)
      jobs.loc[jobs.Zip5 == zipcode, "Community"] =  community
    except TypeError:
      print("cannot unpack non-iterable NoneType object")

cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpack non-iterable NoneType object
cannot unpa

### Creating user coordinates

In [None]:
def creating_coordinates(df, column):
    df["Community"] = ""
    df["Coordinates"] = "None, None"
    for zipcode in df[column].unique():
        try:
          community, coordinate = coordinates(zipcode)
          df.loc[df[column] == zipcode, "Coordinates"] =  str(coordinate)
          df.loc[df[column] == zipcode, "Community"] =  community
          return df
        except TypeError:
          pass
users = creating_coordinates(users, 'ZipCode')

### Cleaning Description and Requirements

In [None]:
jobs['Description'] = jobs['Description'].astype(dtype='str').apply(preprocessor)

In [None]:
jobs['Requirements'] = jobs['Requirements'].astype(dtype='str').apply(preprocessor)

### Making jobs profiles

In [None]:
jobs['profile'] = jobs['Title'].astype(str)  +  '. ' + jobs['Requirements'].astype(str) +  '. ' + jobs['Description'].astype(str) 

### Making users profiles

In [None]:
%%timeit

users['profile'] =  (users['Major'].astype(str) +  " " ) + (users['DegreeType'].astype(str) + " ")

#Cleaning Garbage Words
users['profile'] = users['profile'].str.replace('None.', ' ')
users['profile'] = users['profile'].str.replace('Not Applicable', ' ')
users['profile'] = users['profile'].str.replace('nan', ' ') 

30.7 ms ± 18.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# EDA 2

### Jobs by Communmties/County

In [None]:
jobs.groupby(['Community']).size().reset_index(name='Count').sort_values('Count', ascending=False).head(20)

Unnamed: 0,Community,Count
325,"New York, NY",4189
398,"Rochester, NY",639
57,"Buffalo, NY",494
55,"Brooklyn, NY",426
53,"Bronx, NY",337
292,"Melville, NY",273
2,"Albany, NY",228
504,"White Plains, NY",191
453,"Syracuse, NY",172
198,"Hauppauge, NY",168


### Jobs by zipcode

In [None]:
jobs.groupby(['Community', 'Zip5', 'Coordinates']).size().reset_index(name='Count').sort_values('Count', ascending=False).head(20)

Unnamed: 0,Community,Zip5,Coordinates,Count
452,"New York, NY",10001,"40.75,-74.0",1267
466,"New York, NY",10017,"40.75,-73.97",418
465,"New York, NY",10016,"40.75,-73.98",318
416,"Melville, NY",11747,"40.78,-73.41",273
485,"New York, NY",10036,"40.76,-73.99",257
467,"New York, NY",10018,"40.76,-73.99",225
471,"New York, NY",10022,"40.76,-73.97",179
313,"Hauppauge, NY",11788,"40.82,-73.21",168
456,"New York, NY",10005,"40.71,-74.01",166
502,"New York, NY",10167,"40.75,-73.97",155


### Users by Community

In [None]:
users.groupby(['City', 'ZipCode', 'State']).size().reset_index(name='Count').sort_values('Count', ascending=False).head(20)

Unnamed: 0,City,ZipCode,State,Count
163,Brooklyn,11236,NY,66
152,Brooklyn,11226,NY,64
454,Jamaica,11434,NY,62
112,Bronx,10466,NY,57
161,Brooklyn,11234,NY,57
404,Hempstead,11550,NY,56
113,Bronx,10467,NY,55
137,Brooklyn,11212,NY,55
108,Bronx,10462,NY,54
104,Bronx,10458,NY,54


### Training set

In [None]:
jobs.head()

Unnamed: 0,JobID,WindowID,Title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate,Community,Coordinates,profile
322649,268634,2,On-Call Office Services Associate (Scanning/Ma...,pitney bowes management services pbms provides...,the ideal candidate must be able to demonstrat...,Armonk,NY,US,10504,2012-04-16 14:03:48.903,2012-05-15 23:59:59,"Armonk, NY","41.13,-73.71",On-Call Office Services Associate (Scanning/Ma...
1073169,936381,7,"Coordinator, Corporate Marketing",gannett corporate marketing coordinator corpo...,,New York,NY,US,10001,2012-05-24 21:05:10.253,2012-06-23 23:59:00,"New York, NY","40.75,-74.0","Coordinator, Corporate Marketing. . gannett ..."
91453,358280,1,Director of Concessions at Barclays Center/Bro...,as a levy restaurants director of concessions ...,n a,Brooklyn,NY,US,11217,2012-03-29 04:02:17.373,2012-04-28 23:59:59,"Brooklyn, NY","40.68,-73.98",Director of Concessions at Barclays Center/Bro...
527720,701181,3,Marketing/Executive Assistant,the von group marketing executive assistant jo...,job requirements high aptitude for microsoft e...,Manhattan,NY,US,10016,2012-04-12 17:35:49.613,2012-05-11 23:59:59,"New York, NY","40.75,-73.98",Marketing/Executive Assistant. job requirement...
316226,224741,2,Restaurant Manager - Grand Lux Cafe - Long Isl...,ndescriptionthe restaurant manager is respons...,,Garden City,NY,US,11530,2012-04-22 08:16:32.973,2012-05-21 23:59:59,"Garden City, NY","40.72,-73.64",Restaurant Manager - Grand Lux Cafe - Long Isl...


In [None]:
users.head()

Unnamed: 0,UserID,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany,Community,Coordinates,profile
200098,248008,4,Train,New York,NY,US,10003,,Electrical Engineering,,2,15.0,Yes,No,0,"New York, NY","40.73,-73.99",Electrical Engineering
286529,1184529,5,Train,New York,NY,US,10035,High School,Not Applicable,2005-01-01 00:00:00,3,7.0,,No,0,,"None, None",High School
57797,1122774,1,Train,New Hartford,NY,US,13413,,Business Administration,1987-01-01 00:00:00,2,23.0,Yes,Yes,8,,"None, None",Business Administration
60141,1165147,1,Train,Staten Island,NY,US,10314,Bachelor's,BA/Criminal Justice,1980-06-01 00:00:00,7,35.0,Yes,Yes,200,,"None, None",BA/Criminal Justice Bachelor's
371308,875025,7,Train,Brooklyn,NY,US,11219,,Business Management,2009-01-01 00:00:00,8,6.0,Yes,Yes,8,,"None, None",Business Management


# Machine Learninig Pipeline: NLP Model

In [None]:
%%time
def similarities_nlp_model( model_name = folder+"jobs_doc2vec_model",
                 mapping_name = folder+"jobID_mapping.p", max_epochs = 100,
                 alpha = 0.025):
    
    document = list()
    jobID_mapping = dict()
        
    for i, token in enumerate(jobs['profile']):
        value = jobs.iloc[i]["JobID"]
        tokens = TaggedDocument(simple_preprocess(token), [i])
        document.append(tokens)
        jobID_mapping[i] = value

    epoch_logger = EpochLogger()
    model = Doc2Vec(size = 20, alpha=alpha, 
                    min_alpha=0.00025, min_count=2,
                    callbacks=[epoch_logger], dm =1, workers=8, window=2)
    
    model.build_vocab(document)
    print(model.corpus_count)
    for epoch in range(max_epochs):
        print('iteration {0}'.format(epoch))
        model.train(document, 
                    total_examples=model.corpus_count, 
                    epochs=model.iter)
        # decrease the learning rate
        model.alpha -= 0.0002
        # fix the learning rate, no decay
        model.min_alpha = model.alpha

    # create a dictionary
    pickle.dump(model, open(model_name, "wb")) 
    pickle.dump(jobID_mapping, open(mapping_name, "wb")) 

# -------------------------------------------------------------
# Load the dictionary back from the pickle file.
    return model, jobID_mapping

model, jobID_mapping = similarities_nlp_model()

13472
iteration 0
Epoch #0 start
Epoch #1 start
Epoch #2 start
Epoch #3 start
Epoch #4 start




iteration 1
Epoch #5 start
Epoch #6 start
Epoch #7 start
Epoch #8 start
Epoch #9 start
iteration 2
Epoch #10 start
Epoch #11 start
Epoch #12 start
Epoch #13 start
Epoch #14 start
iteration 3
Epoch #15 start
Epoch #16 start
Epoch #17 start
Epoch #18 start
Epoch #19 start
iteration 4
Epoch #20 start
Epoch #21 start
Epoch #22 start
Epoch #23 start
Epoch #24 start
iteration 5
Epoch #25 start
Epoch #26 start
Epoch #27 start
Epoch #28 start
Epoch #29 start
iteration 6
Epoch #30 start
Epoch #31 start
Epoch #32 start
Epoch #33 start
Epoch #34 start
iteration 7
Epoch #35 start
Epoch #36 start
Epoch #37 start
Epoch #38 start
Epoch #39 start
iteration 8
Epoch #40 start
Epoch #41 start
Epoch #42 start
Epoch #43 start
Epoch #44 start
iteration 9
Epoch #45 start
Epoch #46 start
Epoch #47 start
Epoch #48 start
Epoch #49 start
iteration 10
Epoch #50 start
Epoch #51 start
Epoch #52 start
Epoch #53 start
Epoch #54 start
iteration 11
Epoch #55 start
Epoch #56 start
Epoch #57 start
Epoch #58 start
Epoch #

# Recommender Pipeline


### Recomender by popularity

In [None]:
unique = 0.3 #update name
top = 20

def ranking_by_popularity(top = 100):
    
    popular_jobs = user_history.groupby(
    ['JobTitle']).size().reset_index(
    name='Count').sort_values('Count', ascending=False)
    ranking =  dict()
    top_i = 0
    
    while True:
        job_title = popular_jobs['JobTitle'].iloc[top_i]
        jobs_list = jobs.loc[jobs['Title'] == job_title, ['JobID']]['JobID'].unique().tolist()

        if len(jobs_list) > 1:
            ranking[job_title] = jobs_list

        if len(ranking) == top:
            break

        top_i +=1
    return ranking

ranking_popular = ranking_by_popularity(top = 50)

In [None]:
def recommender_popular_jobs(user_id,  top = 10):
    
    recommended_popular_jobs = dict()
    c1 = users.loc[users['UserID'] == user_id, 'Coordinates']
    count = 0
    if user_exist(user_id) and has_coordinates(user_id):
        for title, jobs_list in ranking_popular.items():
            
            distances = dict()
        
            for job in jobs_list:
                
                c2 = jobs.loc[jobs['JobID'] == job, 'Coordinates']
                if c2.iloc[0].split(',')[0] == 'None':
                    continue

                distances[job] = round(distance(c1, c2.iloc[0]).miles, 2)
            
            distances = sorted(distances.items(), key=lambda kv: kv[1])
            closest = distances[0]
            if closest[1] >= MAX_DISTANCE:
                continue
            recommended_popular_jobs[title] = (closest[0], closest[1])
            count += 1
            if count == top:
                break
            
        if len (recommended_popular_jobs) < top:
            count = len (recommended_popular_jobs) -1
            
            for title, jobs_list in ranking_popular.items():
                recommended_popular_jobs[title] = jobs_list[0]
                count += 1
                if count == top:
                    break

            
    else:
        for title, jobs_list in ranking_popular.items():
            recommended_popular_jobs[title] = jobs_list[0]
            count += 1
            if count == top:
                break
            
    
    
    return recommended_popular_jobs

### Content Based Recommender


In [None]:
#If user has coordinates
#if user has info then similarties if not popularity ranker
jobID_mapping = pickle.load(open(folder+"jobID_mapping.p", "rb"))
model  = pickle.load(open(folder+"jobs_doc2vec_model", "rb"))

def content_distance_based_recommender(user_id, jobID_mapping = jobID_mapping, model =  model, top = 10):
    #As infer_vector produce stochastics result I made a for to save the best list
    user_profile = np.array(users.loc[users['UserID'] == user_id, 'profile'])[0]

    historical_apps = historical_application(user_id)
    for application in historical_apps:
        user_profile += ". " + (str(application[0]) + " ")
    user_profile = simple_preprocess(user_profile)
    
    best = 0
    tops =pd.DataFrame(index = range(top), columns = ['JobID', 'Title', 'Distance', 'Description', 'Requirements'])
    c1 = users.loc[users['UserID'] == user_id, 'Coordinates']
    job_distance_list = list()
    
    for i in range (1):
        inferred_vector = model.infer_vector(user_profile)
        sims = model.docvecs.most_similar([inferred_vector], topn=len(model.docvecs))
        sum_results = 0
        count = 0
        total_recom = 0
        job_distance_list = list()
        
        while True:
            job_id = jobID_mapping[sims[count][0]]
            c2 = jobs.loc[jobs['JobID'] == jobID_mapping[sims[count][0]], 'Coordinates']
            count +=1
            if len(c2) == 0:
                #print("Empty", sims[count][0])
                continue
            
            if c2.iloc[0].split(',')[0] == 'None':
                #print('None', sims[count][0])
                continue
                
            job_distance = round(distance(c1, c2).miles, 2)
            
            if job_distance >= MAX_DISTANCE:
                #print('Distance', job_distance,  sims[count][0])
                continue

            sum_results+=sims[count][1]
            total_recom +=1
            job_distance_list.append(job_distance)
            if total_recom == top:
                break
        
        ##Best simulation
        if sum_results > best:
            best = sum_results
            best_sim = sims
            job_distance_list2 = job_distance_list
            
    for i in range(top):
        recomendation = jobID_mapping[best_sim[i][0]]
        tops.iloc[i]['JobID', 'Title',  'Description', 'Requirements'] = np.array(jobs.loc[jobs['JobID'] == recomendation][['JobID', 'Title', 
                                                               'Description', 'Requirements']])[0]
        tops.iloc[i]['Distance'] = job_distance_list2[i]
    
    return tops 

def hybrid_recommender(user_id, top=10):
    
    if has_coordinates(user_id) and len(historical_application(user_id)) > 0:
        print("Content")
        return content_distance_based_recommender(user_id, top = top)
    else:
        print("Cold Star")
        return recommender_popular_jobs(user_id,  top = top)

### Recommendation by doc2vec

In [None]:
user_id = 206656

print("USER PROFILE: ", user_profile(user_id))
# print("HISTORICAL APPLICATION CONTENT:\n\n", historical_application(user_id))
hybrid_recommender(user_id, top = 10)

USER PROFILE:  
    Degree Type: Bachelor's
    Major: Human Resource Management
    
Cold Star


{'Customer Service Representative': 1015707,
 'Cashier': 668223,
 'Administrative Assistant': 617073,
 'Sales Associate': 407797,
 'Assistant Manager': 317522,
 'Office Manager': 203695,
 'Manager': 1097462,
 'Receptionist': 518270,
 'Customer Service': 119569,
 'Store Manager': 1075099}