In [78]:
import pandas as pd
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

In [79]:
data = pd.read_csv('./data/jobs.csv')
data.drop("Unnamed: 0", axis=1, inplace=True)



In [80]:
data.head(1)

Unnamed: 0,Job Salary,Job Experience Required,Key Skills,Role Category,Functional Area,Industry,Job Title
0,Not Disclosed by Recruiter,5 - 10 yrs,Media Planning| Digital Media,Advertising,"Marketing , Advertising , MR , PR , Media Plan...","Advertising, PR, MR, Event Management",Media Planning Executive/Manager


In [89]:
data.dropna(subset=['Job Title', 'Key Skills'])


Unnamed: 0,Job Salary,Job Experience Required,Key Skills,Role Category,Functional Area,Industry,Job Title
0,Not Disclosed by Recruiter,5 - 10 yrs,Media Planning| Digital Media,Advertising,"Marketing , Advertising , MR , PR , Media Plan...","Advertising, PR, MR, Event Management",Media Planning Executive/Manager
1,Not Disclosed by Recruiter,2 - 5 yrs,pre sales| closing| software knowledge| clien...,Retail Sales,"Sales , Retail , Business Development","IT-Software, Software Services",Sales Executive/Officer
2,Not Disclosed by Recruiter,0 - 1 yrs,Computer science| Fabrication| Quality check|...,R&D,"Engineering Design , R&D","Recruitment, Staffing",R&D Executive
3,"2,00,000 - 4,00,000 PA.",0 - 5 yrs,Technical Support,Admin/Maintenance/Security/Datawarehousing,"IT Software - Application Programming , Mainte...","IT-Software, Software Services",Technical Support Engineer
4,Not Disclosed by Recruiter,2 - 5 yrs,manual testing| test engineering| test cases|...,Programming & Design,IT Software - QA & Testing,"IT-Software, Software Services",Testing Engineer
...,...,...,...,...,...,...,...
27005,Not Disclosed by Recruiter,9 - 12 yrs,Graphics| C++| USB| Project management| SOC| ...,Programming & Design,"IT Software - Embedded , EDA , VLSI , ASIC , C...","Semiconductors, Electronics",Software Developer
27006,Not Disclosed by Recruiter,1 - 5 yrs,Service delivery| LMS| CRE| Integration servi...,Operations,"ITES , BPO , KPO , LPO , Customer Service , Op...","Education, Teaching, Training",Operations Manager
27007,Not Disclosed by Recruiter,4 - 6 yrs,Counselor| Mentor| Trainer| Advisor| Teaching,University Level,"Teaching , Education , Training , Counselling","Education, Teaching, Training",Lecturer/Professor
27008,Not Disclosed by Recruiter,5 - 10 yrs,Security Analysis| Software Development Life ...,Programming & Design,"IT Software - Application Programming , Mainte...","IT-Software, Software Services",Software Developer


In [90]:
features = [str(skill) for skill in data["Key Skills"]]

stopwords = list(text.ENGLISH_STOP_WORDS.union({"|"}))  # Convert frozenset to list



In [91]:
tfidf = text.TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(features)
similarity = cosine_similarity(tfidf_matrix)

In [92]:
indices = pd.Series(data.index, index=data['Job Title']).drop_duplicates()
print(indices)

Job Title
Media Planning Executive/Manager                  0
Sales Executive/Officer                           1
R&D Executive                                     2
Technical Support Engineer                        3
Testing Engineer                                  4
                                              ...  
Software Developer                            27005
Operations Manager                            27006
Lecturer/Professor                            27007
Software Developer                            27008
Search Engine Optimisation /SEO Specialist    27009
Length: 27010, dtype: int64


In [121]:
def recommend_jobs(title, similarity=similarity):
    if title not in indices:
        return pd.DataFrame(columns=['Job Title', 'Job Experience Required', 'Key Skills'])
    
    index = indices[title].any()
   
    similarity_scores = list(enumerate(similarity[0]))
    recommendations = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:11]
    job_indices = [i[0] for i in similarity_scores]
    return data[['Job Title', 'Job Experience Required', 'Key Skills']].iloc[job_indices]
    
    

In [120]:
recommend_jobs("Software Developer")

[(20730, np.float64(0.7874873682183559)), (23610, np.float64(0.7722954458610047)), (10597, np.float64(0.7281546543765661)), (1901, np.float64(0.6995438891411889)), (24408, np.float64(0.677882321258524)), (10390, np.float64(0.6513473233406274)), (15475, np.float64(0.636386702818991)), (13986, np.float64(0.6296977712630268)), (20880, np.float64(0.6216624854282699)), (18305, np.float64(0.6190137880622806))]


Unnamed: 0,Job Title,Job Experience Required,Key Skills
0,Media Planning Executive/Manager,5 - 10 yrs,Media Planning| Digital Media
1,Sales Executive/Officer,2 - 5 yrs,pre sales| closing| software knowledge| clien...
2,R&D Executive,0 - 1 yrs,Computer science| Fabrication| Quality check|...
3,Technical Support Engineer,0 - 5 yrs,Technical Support
4,Testing Engineer,2 - 5 yrs,manual testing| test engineering| test cases|...
...,...,...,...
27005,Software Developer,9 - 12 yrs,Graphics| C++| USB| Project management| SOC| ...
27006,Operations Manager,1 - 5 yrs,Service delivery| LMS| CRE| Integration servi...
27007,Lecturer/Professor,4 - 6 yrs,Counselor| Mentor| Trainer| Advisor| Teaching
27008,Software Developer,5 - 10 yrs,Security Analysis| Software Development Life ...
