In [1]:
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import pandas as pd
import numpy as np
import random
import re, nltk, spacy, gensim
import pyLDAvis
import pyLDAvis.sklearn
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', 50)

In [2]:
df = pd.read_csv('JD_Source_B_Accountant_Week_09 - Copy.csv', encoding="latin-1")
df.set_index('name', inplace = True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [3]:
indices = pd.Series(df.index)
def recommendations(name, cosine_similarities = cosine_similarities):
    
    recommended_jds = []
    
    # gettin the index of the JDs that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)

    # getting the indexes of the 5 most similar JDs except itself
    top_10_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the names of the top 5 matching JDs
    for i in top_10_indexes:
        recommended_jds.append(list(df.index)[i])
        
    return recommended_jds

In [4]:
recommendations('Accounts Executive, Fullsets (Real Estate / Property Management)')

['Accounts Executive | Full Set | Facilities Management industry',
 'Accounts Executive',
 'Senior Accounts Executive',
 'Senior Accounts Executive',
 'Senior Accounts Executive']

In [5]:
df.loc['Accounts Executive, Fullsets (Real Estate / Property Management)'].desc

"Our client is renowned within Real Estate industry located within Central Business District area. They are looking for an Accounts Executive to join their team.\nÂ\xa0\n- Good working location\n- Fullsets of Accounts\n- Prior supervisory experience added advantage\n- Real estate industry background\nÂ\xa0\nJob Description:\nMaintain full set of accounts and ensure that the monthly reporting timelines are duly met\nMonthly management reports\nSupervise Accounts Payable and Accounts Receivable Officers\nGST returns & Income Tax Computation and Property Tax\nCash Management and Forecast\nStatutory Accounts\nBudgeting and Forecasting\nAssist with Internal Audit as and when necessary\nAssist with other ad-hoc tasks where necessary\nJob Requirements:\nCandidate must possess at least a Diploma, Advanced/Higher/Graduate Diploma, Bachelor's Degree, Post Graduate Diploma, Professional Degree, Economics, Finance/Accountancy/Banking, Business Studies/Administration/Management, Mathematics, Commer

In [6]:
df.loc["Accounts Executive - Payment"].desc

'Our client, a well-known Japanese company is looking for a suitable candidate for the position of Accounts Executive - Payment.\nÂ\xa0\nResponsibilities:\nResponsible for GL and payments related matters\nVerification of payment data with assigned countries\nReview and approve Supplier master data with assigned countries\nAssist on month end closing processes\nLiaise with internal or external auditors and follow up on auditorsâ\x80\x99 queries\nRequirements:\nMinimum Diploma in Accountancy or equivalent\nGoodÂ\xa0experience in Shared Service environment or Payment Hub environment\nExcellent communication skill and good problem-solving mindset\nÂ\xa0\nÂ\xa0\nFor interested parties, kindly send in your resume in MS Word format to eejing.loh@persolsg.com\nWe regret that only shortlisted candidates will be notified.\nÂ\xa0\nCapita Pte Ltd | EA License No.: 08C2893 | RCB No. 200701282M\nLoh Ee Jing | Registration No.: R1877262'

In [7]:
df.loc["3 Months Accounts Assistant (Up$2200 / 5 Days / Redhil / Urgent)"].desc

'Benefits Summary:\nSalary: $2200 + BenefitsÂ\xa0\nWorking Location: Redhill\nGood working environment\nResponsibilities:\nPerform accounts payable duties such as coding, staff reimbursements and data entries\nPrepare and process payments for suppliers and customer refunds\nAssist in other accounting duties\nRequirements:\nApplicants who possess relevant qualifications, employment background and suitable skill sets are most welcome to apply\nApplicants who do not possess the above will be considered on individual merits\nApplicants not shortlisted for this role may be matched with other suitable opportunities\nQualified or interested candidates, please submit your updated resume in MS format by using the \nApply Now\n Button.\nAlternatively, you may also email your resume to us at \nmci2119@mci.com.sg.\nÂ\xa0\nOur dedicated and approachable MCI consultants will get back to you soon for a confidential discussion.\nÂ\xa0\nPlease include the following information in your resume:\nEducatio

In [8]:
df.reset_index(inplace=True)

In [9]:
df = pd.concat([pd.Series(str(row['name']), str(row['desc']).split('. '))              
                    for _, row in df.iterrows()]).reset_index()
df.columns = ['sentence', 'name']
df['sentence'] = df['sentence'].map(lambda x: re.sub(r'\W+', ' ', x))
print('We have ', len(df), 'sentences in total')

We have  952 sentences in total


In [10]:
df.loc[df['name'] == 'Accounts Executive, Fullsets (Real Estate / Property Management)']

Unnamed: 0,sentence,name
0,Our client is renowned within Real Estate indu...,"Accounts Executive, Fullsets (Real Estate / Pr..."
1,They are looking for an Accounts Executive to ...,"Accounts Executive, Fullsets (Real Estate / Pr..."
2,No R1218682 Gwen Goh Kangping,"Accounts Executive, Fullsets (Real Estate / Pr..."


In [11]:
a = 0
for i in range(a,a+8):
    print(df.sentence[i])
    print()

Our client is renowned within Real Estate industry located within Central Business District area

They are looking for an Accounts Executive to join their team Â Good working location Fullsets of Accounts Prior supervisory experience added advantage Real estate industry background Â Job Description Maintain full set of accounts and ensure that the monthly reporting timelines are duly met Monthly management reports Supervise Accounts Payable and Accounts Receivable Officers GST returns Income Tax Computation and Property Tax Cash Management and Forecast Statutory Accounts Budgeting and Forecasting Assist with Internal Audit as and when necessary Assist with other ad hoc tasks where necessary Job Requirements Candidate must possess at least a Diploma Advanced Higher Graduate Diploma Bachelor s Degree Post Graduate Diploma Professional Degree Economics Finance Accountancy Banking Business Studies Administration Management Mathematics Commerce or equivalent At least 3 year s of working exp

In [12]:
vectorizer = CountVectorizer(analyzer='word',       
                             min_df=3,                        # minimum reqd occurences of a word 
                             stop_words='english',             # remove stop words
                             lowercase=True,                   # convert all words to lowercase
                             token_pattern='[a-zA-Z0-9]{3,}',  # num chars > 3
                             max_features=3000,             # max number of uniq words
                            )

data_vectorized = vectorizer.fit_transform(df['sentence'])

In [13]:
lda_model = LatentDirichletAllocation(n_components=40, # Number of topics
                                      learning_method='online',
                                      random_state=0,       
                                      n_jobs = -1  # Use all available CPUs
                                     )
lda_output = lda_model.fit_transform(data_vectorized)

print(lda_model)

LatentDirichletAllocation(learning_method='online', n_components=40, n_jobs=-1,
                          random_state=0)


In [14]:
pyLDAvis.enable_notebook()
pyLDAvis.sklearn.prepare(lda_model, data_vectorized, vectorizer, mds='tsne')

  default_term_info = default_term_info.sort_values(


In [15]:
# Show top 20 keywords for each topic
def show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=20):
    keywords = np.array(vectorizer.get_feature_names())
    topic_keywords = []
    for topic_weights in lda_model.components_:
        top_keyword_locs = (-topic_weights).argsort()[:n_words]
        topic_keywords.append(keywords.take(top_keyword_locs))
    return topic_keywords

topic_keywords = show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=20)        

# Topic - Keywords Dataframe
df_topic_keywords = pd.DataFrame(topic_keywords)
df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
df_topic_keywords



Unnamed: 0,Word 0,Word 1,Word 2,Word 3,Word 4,Word 5,Word 6,Word 7,Word 8,Word 9,Word 10,Word 11,Word 12,Word 13,Word 14,Word 15,Word 16,Word 17,Word 18,Word 19
Topic 0,documents,accounts,appropriate,payments,invoices,work,candidate,ledgers,job,role,review,journals,monthly,positive,strong,reports,attitude,able,personality,accounting
Topic 1,experience,internal,reports,support,working,requirements,assist,accounts,key,reporting,prepare,accounting,tax,parties,perform,external,hoc,preparation,excel,management
Topic 2,accounting,accounts,prepare,experience,monthly,job,requirements,work,assist,end,minimum,tax,responsibilities,diploma,preparation,reconciliation,years,closing,working,perform
Topic 3,issuance,transfer,suppliers,telegraphic,local,inventory,costing,overseas,activities,outgoing,inclusive,input,superior,matching,day,cheques,accpac,orders,statement,reconcile
Topic 4,project,research,asset,promptly,funding,budget,agencies,vant,weekly,hospitality,limited,essential,periodic,open,hotel,objective,grants,deal,requests,3439
Topic 5,financial,preparing,matters,actual,data,whatsapp,budgets,forecast,controls,expenses,process,reporting,claims,cpa,project,funding,jte,adhoc,operations,finance
Topic 6,cost,centers,assign,expenses,appropriate,accounts,prepare,reports,requirements,basic,accounting,accurately,consent,generate,transport,reconciliation,months,bank,capita,email
Topic 7,proficient,prepare,candidates,resume,minimum,software,accounts,monthly,email,duties,excellent,interpersonal,schedule,responsibilities,work,relevant,assigned,cash,team,meticulous
Topic 8,industry,supply,optimize,located,develop,inventory,area,deadline,organised,bills,execute,mail,payable,tel,excellent,level,central,achieve,plan,supporting
Topic 9,claims,staff,payment,petty,gst,player,cover,duties,literacy,singaporeans,ensure,monthly,customers,computer,reconciliations,accurately,years,processing,attitude,responsibilities


In [16]:
# Create Document - Topic Matrix
lda_output = lda_model.transform(data_vectorized)

# column names
topicnames = ["Topic" + str(i) for i in range(40)]

# index names
docnames = ["Doc" + str(i) for i in range(len(lda_output))]

# Make the pandas dataframe
df_document_topic = pd.DataFrame(np.round(lda_output, 2), columns=topicnames, index=docnames)

# Get dominant topic for each document
dominant_topic = np.argmax(df_document_topic.values, axis=1)
df_document_topic['dominant_topic'] = dominant_topic

In [17]:
df_document_topic.head(10)

Unnamed: 0,Topic0,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,Topic8,Topic9,Topic10,Topic11,Topic12,Topic13,Topic14,Topic15,Topic16,Topic17,Topic18,Topic19,Topic20,Topic21,Topic22,Topic23,Topic24,Topic25,Topic26,Topic27,Topic28,Topic29,Topic30,Topic31,Topic32,Topic33,Topic34,Topic35,Topic36,Topic37,Topic38,Topic39,dominant_topic
Doc0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8
Doc1,0.0,0.0,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.67,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,20
Doc2,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0
Doc3,0.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,2
Doc4,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.76,0.01,0.01,0.01,0.01,0.01,0.01,33
Doc5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23
Doc6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25
Doc7,0.0,0.0,0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14,2
Doc8,0.0,0.0,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.33,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27
Doc9,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.76,0.01,0.01,0.01,0.01,0.01,0.01,0.01,32


In [18]:
df_document_topic.reset_index(inplace=True)
df_sent_topic= pd.merge(df, df_document_topic, left_index=True, right_index=True)
df_sent_topic.drop('index', axis=1, inplace=True)

In [19]:
df_sent_topic.head(10)

Unnamed: 0,sentence,name,Topic0,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,Topic8,Topic9,Topic10,Topic11,Topic12,Topic13,Topic14,Topic15,Topic16,Topic17,Topic18,Topic19,Topic20,Topic21,Topic22,Topic23,Topic24,Topic25,Topic26,Topic27,Topic28,Topic29,Topic30,Topic31,Topic32,Topic33,Topic34,Topic35,Topic36,Topic37,Topic38,Topic39,dominant_topic
0,Our client is renowned within Real Estate indu...,"Accounts Executive, Fullsets (Real Estate / Pr...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8
1,They are looking for an Accounts Executive to ...,"Accounts Executive, Fullsets (Real Estate / Pr...",0.0,0.0,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.67,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,20
2,No R1218682 Gwen Goh Kangping,"Accounts Executive, Fullsets (Real Estate / Pr...",0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0
3,Our client a well known Japanese company is lo...,Accounts Executive - Payment,0.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,2
4,200701282M Loh Ee Jing Registration No R1877262,Accounts Executive - Payment,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.76,0.01,0.01,0.01,0.01,0.01,0.01,33
5,Benefits Summary Salary 2200 BenefitsÂ Working...,3 Months Accounts Assistant (Up$2200 / 5 Days ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23
6,Â To find out about other career opportunitie...,3 Months Accounts Assistant (Up$2200 / 5 Days ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25
7,â 2000 2500 Basic AWS Variable Bonus â Japan M...,Admin Assistant cum Accounts ( 5D/ Partial / M...,0.0,0.0,0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14,2
8,BIG FOUR AUDIT FIRM lÂ RAFFLES PLACE Â Duratio...,[BIG FOUR] Accounts Assistant l Raffles Place ...,0.0,0.0,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.33,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27
9,Â EA License No 08C2893 EA Reg,[BIG FOUR] Accounts Assistant l Raffles Place ...,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.76,0.01,0.01,0.01,0.01,0.01,0.01,0.01,32


In [20]:
df_sent_topic.loc[df_sent_topic['name'] == 'Accounts Executive, Fullsets (Real Estate / Property Management)'][['sentence', 'dominant_topic']]

Unnamed: 0,sentence,dominant_topic
0,Our client is renowned within Real Estate indu...,8
1,They are looking for an Accounts Executive to ...,20
2,No R1218682 Gwen Goh Kangping,0


In [21]:
df_sent_topic.loc[df_sent_topic['dominant_topic'] == 4][['sentence', 'dominant_topic']].sample()

Unnamed: 0,sentence,dominant_topic
872,Weekly Forecast on AR collection 4,4


In [22]:
df_sent_topic.loc[df_sent_topic['dominant_topic'] == 12][['sentence', 'dominant_topic']].sample()

Unnamed: 0,sentence,dominant_topic
680,As a JMP Accounts Executive role you will be r...,12


In [23]:
print('There are', len(df_sent_topic.loc[df_sent_topic['dominant_topic'] == 4]), 'sentences that belong to topic 4 and we will remove')
print('There are', len(df_sent_topic.loc[df_sent_topic['dominant_topic'] == 12]), 'sentences that belong to topic 12 and we will remove')

There are 1 sentences that belong to topic 4 and we will remove
There are 11 sentences that belong to topic 12 and we will remove


In [24]:
df_topic_distribution = df_document_topic['dominant_topic'].value_counts().reset_index(name="Num Sentences")
df_topic_distribution.columns = ['Topic #', 'Num Sentences']
df_topic_distribution

Unnamed: 0,Topic #,Num Sentences
0,2,224
1,39,188
2,27,107
3,25,73
4,32,53
5,19,49
6,34,33
7,13,30
8,23,27
9,16,26


In [25]:
df_sent_topic_clean = df_sent_topic.drop(df_sent_topic[(df_sent_topic.dominant_topic == 4) | (df_sent_topic.dominant_topic == 12)].index)

In [26]:
df_description = df_sent_topic_clean[['sentence','name']]
df_description = df_description.groupby('name')['sentence'].agg(lambda col: ' '.join(col)).reset_index()

In [27]:
df_description.head()

Unnamed: 0,name,sentence
0,"*NEW* Accounts Executive ($3,000 to $3,500) IM...",Our Clients Overseas MNCS and Companies are ur...
1,3 Months Accounts Assistant (Up$2200 / 5 Days ...,Benefits Summary Salary 2200 BenefitsÂ Working...
2,6 Months Contract Accounts Executive,We are Hiring Under Recruit Express s headcoun...
3,6 months Accounts Assistant(AP) - up to $3000 ...,Accounts and trade payable Reconciles processe...
4,6 months Accounts Officer - up to $4000,Management reporting and analysis Support in t...


In [28]:
df_description['sentence'][45]

'BenefitsÂ Summary Salary up to 2 200 Monday to Friday 9 00am to 6 30pm Location Taiseng Training will be provided Â Responsibilities Accounting programme â Issue Tax Invoices AdministrationÂ duties Handle customers enquire Train to handle Accounting documents Reply to email other duties as assigned Applicants who possess relevant experience for the above responsibilities are most welcome to apply If you do not possess the above experience your application will still be considered on individual merits and you may be contacted for other opportunities Please submit your updated resume in MS format by using the APPLY NOW BUTTON Alternatively you may also email your resume to us at mci3627 mci com sg Our dedicated and approachable MCI consultants will get back to you soon for a confidential discussion Please include the following information in your resume â Education background â Work experiences in point forms â Reason s for leaving â Current and expected salary â Date of availability No

In [29]:
df_description.set_index('name', inplace = True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df_description['sentence'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [30]:
indices = pd.Series(df_description.index)
def recommendations(name, cosine_similarities = cosine_similarities):
    
    recommended_jds = []
    
    # gettin the index of the JDs that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)

    # getting the indexes of the 5 most similar JDs except itself
    top_10_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the names of the top 5 matching JDs
    for i in top_10_indexes:
        recommended_jds.append(list(df_description.index)[i])
        
    return recommended_jds

In [31]:
recommendations('Contract Accounts Executive/Accountant (Healthcare/Lifescience) - HHW')

['Contract Accounts Assistant (Immediate/ Office Hours) - HCK',
 'Accounts Assistant',
 'Full Sets Accounts Executive (Up to $3500) REF:BYQ',
 'Accounts Executive',
 'Finance Assistant (Full Sets Accounts)']

In [38]:
print(indices.head())

0    *NEW* Accounts Executive ($3,000 to $3,500) IM...
1    3 Months Accounts Assistant (Up$2200 / 5 Days ...
2                 6 Months Contract Accounts Executive
3    6 months Accounts Assistant(AP) - up to $3000 ...
4              6 months Accounts Officer - up to $4000
Name: name, dtype: object
