In [1]:
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
job_df = pd.read_csv('/content/drive/MyDrive/Colab/AI_project/job_sample/jobs_dataset.csv')
job_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   company_name  35 non-null     object
 1   job_title     35 non-null     object
 2   Unnamed: 2    23 non-null     object
 3   job_des       34 non-null     object
 4   job_location  35 non-null     object
 5   exp_req       17 non-null     object
 6   edu_req       12 non-null     object
 7   cert_req      3 non-null      object
dtypes: object(8)
memory usage: 2.3+ KB


In [10]:
job_df.head()

Unnamed: 0,company_name,job_title,Unnamed: 2,job_des,job_location,exp_req,edu_req,cert_req
0,Smart Food Safe Solutions Inc,Software Testing Internship,-,Selected intern's day-to-day responsibilities ...,Bengaluru,0,students or freshers can apply who: are availa...,-
1,Mobiveil,Embedded Software Testing,-,Good in different type of testing and proficie...,Bangalore,4+ Years,Btech/ MCA,should own a module/component end 2 end
2,Open Systems International,Senior Engineer - Software Testing,-,Specific on-the-job training will be provided....,Bengaluru,Strong hands-on 2+ yearsâ€™ experience on Test...,"Masterâ€™s degree in Computer Science, Softwar...",Experience with Quality Assurance within an Ag...
3,HyperLink InfoSystem,Business Development Manager,-,Contacting potential clients to establish rapp...,Ahemdabad,2+ years,"Any graduation/post graduation, MBA/PGDM in Ma...",
4,Infosys,Oracle Functional Consultant,-,Able to configure financial modules independen...,Mangalore,5 to 8 years of exp,,


In [12]:
job_df = job_df[job_df['job_des'].notna()]
job_df.shape

(34, 8)

In [13]:
job_df['job_des']

0     Selected intern's day-to-day responsibilities ...
1     Good in different type of testing and proficie...
2     Specific on-the-job training will be provided....
3     Contacting potential clients to establish rapp...
4     Able to configure financial modules independen...
5     As a data analyst, design and carry out survey...
6     IT/systems associate analyst participates in t...
7     As a business analyst, use data modelling tech...
8     Understanding business objectives and developi...
9     Responsibilities include: Identify valuable da...
10    Require Strong Development & Test Resources fo...
11    An Associate Developer works with and supports...
12    The data scientist job description involves fe...
13    As a data analyst, develop records management ...
14    NLP, Machine Learning, with Java as operating ...
15    Perform following duties: oversee the implemen...
16    The hire is expected to have good knowledge of...
17    Perform following duties: creating documen

In [15]:
# CREATING A BAG OF WORDS

from sklearn.feature_extraction.text import TfidfVectorizer

# Convert a collection of raw documents to a matrix of TF-IDF features.
# tf-idf means term-frequency times inverse document-frequency
# tf-idf for a term t of a document d in a document set is tf-idf(t, d) = tf(t, d) * idf(t)
# idf is computed as idf(t) = log [ n / df(t) ] + 1 

tdif = TfidfVectorizer(stop_words='english')
# print(type(tdif))
tdif_matrix = tdif.fit_transform(job_df['job_des'])
tdif_matrix.shape

(34, 629)

In [16]:
print(type(tdif_matrix))

<class 'scipy.sparse._csr.csr_matrix'>


In [18]:
from sklearn.metrics.pairwise import linear_kernel

# considers not only the similarity between vectors under the same dimension,
# but also across dimensions. When used in machine learning algorithms, 
# this allows to account for feature interaction.

cosine_sim = linear_kernel(tdif_matrix, tdif_matrix)

#  tf-idf functionality in sklearn.feature_extraction.text can produce normalized vectors, in which case cosine_similarity is equivalent to linear_kernel, only slower

indices = pd.Series(job_df.index, index=job_df['job_title']).drop_duplicates()
indices

job_title
Software Testing Internship                            0
Embedded Software Testing                              1
Senior Engineer - Software Testing                     2
Business Development Manager                           3
Oracle Functional Consultant                           4
Jr. Data Analyst                                       5
IT/System Associate Analyst                            6
Senior Business Analyst                                7
Machine Learning Engineer                              8
Data Scientist                                         9
Software Testing Engineer                             10
Associate Developer                                   11
Data Scientist                                        12
Data Analyst                                          13
NLP / Machine Learning                                14
Business Analyst                                      15
Java GCP Data Engineer                                16
Jr. Business Analyst 

In [26]:
def get_recommendations(title,cosine_sim=cosine_sim):
  idx = indices[title]
  # print(idx)
  sim_scores = list(enumerate(cosine_sim[idx]))
  # print(sim_scores)
  sim_scores = sorted(sim_scores, key=lambda X:X[1], reverse=True)
  # we don't how this 'key=lambda X:X[1]' works
  # print(sim_scores)
  sim_scores = sim_scores[1:5]

  tech_indices = [i[0] for i in sim_scores]
  return job_df.iloc[tech_indices]

In [27]:
sim_jobs = get_recommendations('Software Testing Internship')
sim_jobs

Unnamed: 0,company_name,job_title,Unnamed: 2,job_des,job_location,exp_req,edu_req,cert_req
28,Hexaware Technologies,Software Testing Engineer,-,Day-to-day responsibilities include: 1. Review...,Navi Mumbai,,,
2,Open Systems International,Senior Engineer - Software Testing,-,Specific on-the-job training will be provided....,Bengaluru,Strong hands-on 2+ yearsâ€™ experience on Test...,"Masterâ€™s degree in Computer Science, Softwar...",Experience with Quality Assurance within an Ag...
32,Cvent,Software Testing Engineer,-,"To ensure success as a software test engineer,...",West Delhi,Experience working with popular operating syst...,"Bachelor’s degree in computer science, softwar...",
10,Wipro,Software Testing Engineer,-,Require Strong Development & Test Resources fo...,Mumbai,,,
