# Preliminary work

In [32]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import re
from sklearn.model_selection import GridSearchCV


In [4]:
df = pd.read_csv("Indeed_WebScraping\indeed_jobs_modified.csv")

# Text preprocessing
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Job Description'])

# Function to recommend jobs based on input text
def recommend_jobs(input_text, df, tfidf_matrix, tfidf_vectorizer, top_n=3):
    input_vector = tfidf_vectorizer.transform([input_text])
    cosine_similarities = cosine_similarity(input_vector, tfidf_matrix).flatten()
    related_jobs_indices = cosine_similarities.argsort()[:-top_n-1:-1]
    recommended_jobs = df.iloc[related_jobs_indices]
    return recommended_jobs

In [10]:
# Example usage
input_text = "data science, machine learning, data analysis, ai"
recommend_jobs(input_text, df, tfidf_matrix, tfidf_vectorizer)

Unnamed: 0,Job Title,Company,Salary,Job Type,Location,Job Description,Posted Date,Today Date,Listing Date
262,Software and Applications Manager\n,JOYFULAI TECHNOLOGY PTE. LTD.,"$10,000 - $13,000 a month","Permanent, Full-time",Singapore,"Joyful AI, an innovative AI technology startup...",PostedToday,2024-03-21,2024-03-21 00:00:00
124,Summer Internship 2024,Munich Re,,Internship,Singapore,Summer Internship 2024 - Data Analyst & BI Int...,PostedToday,2024-03-21,2024-03-21 00:00:00
23,Summer Internship 2024,Munich Re,,Internship,Singapore,Summer Internship 2024 - Data Analyst & BI Int...,PostedToday,2024-03-21,2024-03-21 00:00:00


In [11]:
input_text = "data science"
recommend_jobs(input_text, df, tfidf_matrix, tfidf_vectorizer)

Unnamed: 0,Job Title,Company,Salary,Job Type,Location,Job Description,Posted Date,Today Date,Listing Date
119,DATA ANALYTICS (Health)\n,RESOURCE HUNT PTE. LTD.,"$9,000 - $10,000 a month",Full-time,Singapore,Health Analytics Manager\nJoin our team and he...,PostedJust posted,2024-03-21,2024-03-21 00:00:00
192,Solution Architect,SoftwareOne,,Full-time,Singapore 189720,Job Function: Software & Cloud Services The ro...,PostedPosted 2 days ago,2024-03-21,2024-03-19 00:00:00
286,Software Development Senior Specialist\n,NTT Data Services Pte Ltd,,"Permanent, Full-time",Singapore,S\nPosted by\nShubhansh Jaiswal\nTalent Acquis...,PostedToday,2024-03-21,2024-03-21 00:00:00


In [12]:
input_text = "data analysis"
recommend_jobs(input_text, df, tfidf_matrix, tfidf_vectorizer)

Unnamed: 0,Job Title,Company,Salary,Job Type,Location,Job Description,Posted Date,Today Date,Listing Date
167,Data Analyst\n,COGNOSPHERE PTE. LTD.,"$7,000 - $14,000 a month","Permanent, Full-time",Singapore,"Who Are We?\nAt HoYoverse, we are committed to...",PostedPosted 2 days ago,2024-03-21,2024-03-19 00:00:00
170,Data Analyst\n,NodeFlair,"$7,000 - $14,000 a month",,Singapore,"Job Summary\n\nSalary\nS$7,000 - S$14,000 / Mo...",PostedPosted 2 days ago,2024-03-21,2024-03-19 00:00:00
140,Customer Excellence Data Analytics & Reporting...,Cargill,,,Singapore,Job Purpose and Impact\n\nThe role of Commerci...,PostedPosted 1 day ago,2024-03-21,2024-03-20 00:00:00


In [13]:
input_text = "machine learning"
recommend_jobs(input_text, df, tfidf_matrix, tfidf_vectorizer)

Unnamed: 0,Job Title,Company,Salary,Job Type,Location,Job Description,Posted Date,Today Date,Listing Date
208,Workforce Operations Analyst\n,Apple,,,Singapore,Summary\n\nPosted: 15 Mar 2024\n\nRole Number:...,PostedPosted 2 days ago,2024-03-21,2024-03-19 00:00:00
204,Quantitative Trader/Analyst Commodities\n,KS Talent Solutions,,"Permanent, Full-time",Singapore,Join our client in an exciting role based in S...,PostedPosted 2 days ago,2024-03-21,2024-03-19 00:00:00
222,Software Engineer\n,ETUAN MECHATRONIC PTE LTD,"$3,500 - $4,500 a month",Full-time,Singapore,Design and develops software for PLC/PC/Vision...,PostedJust posted,2024-03-21,2024-03-21 00:00:00


# Rec system
- cold start problem - use content based approach
  - standard nlp process, 
  - user profiles info to start rec
  - e.g. k-NN model
- avoid rule based / search engine like recc system
- lack of labels, unsupervised method
  - PCA
- copy paste resume in to get rec jobs: https://towardsdatascience.com/building-a-job-recommender-for-non-technical-business-roles-via-nlp-and-machine-learning-626c4039931e
- azure cloud deployment, 10months old
  - https://github.com/abbas99-hub/Job-Recommendation-System 
  - https://medium.com/@abbasbehrain95/creating-an-ai-powered-job-recommendation-system-50ce1cd12d36
- microsoft resource: https://azure.microsoft.com/en-us/blog/building-recommender-systems-with-azure-machine-learning-service/

# TF IDF, KNN

In [21]:
# Load the data
data = pd.read_csv('consolidated.csv')

In [22]:
# Data Cleaning
def clean_text(text):
    if isinstance(text, str):
        # Remove special characters and digits
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        # Convert to lowercase
        text = text.lower()
        return text
    else:
        return ""

data['clean_description'] = data['description'].apply(clean_text)

In [23]:
# TF-IDF Representation
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, min_df=0.2, ngram_range=(1, 2))
tfidf_matrix = vectorizer.fit_transform(data['clean_description'])

In [33]:
# Hyperparameter Tuning
parameters = {
    'n_neighbors': [5, 10, 15],
    'leaf_size': [20, 30, 40],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

nbrs = NearestNeighbors()
grid_search = GridSearchCV(nbrs, parameters, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(tfidf_matrix)

print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'algorithm': 'auto', 'leaf_size': 20, 'n_neighbors': 5}


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


In [37]:
# Get the best parameters from grid search
best_params = grid_search.best_params_

# Create a new instance of NearestNeighbors with the best parameters
nbrs = NearestNeighbors(n_neighbors=best_params['n_neighbors'],
                             leaf_size=best_params['leaf_size'],
                             algorithm=best_params['algorithm'])

# Fit the new NearestNeighbors model with the TF-IDF matrix
nbrs .fit(tfidf_matrix)

In [38]:
# Function to recommend top jobs using the new NearestNeighbors model
def recommend_jobs_with_best_params(resume_text):
    queryTFIDF = vectorizer.transform([resume_text])
    distances, indices = nbrs.kneighbors(queryTFIDF)
    matches = []
    for i, j in enumerate(indices):
        for idx in j:
            dist = round(distances[i][0], 2)
            temp = {'Job Title': data.iloc[idx]['job title'], 
                    'Company': data.iloc[idx]['company'],
                    'Salary Range': data.iloc[idx]['salary range'],
                    'Match Confidence': dist,
                    'Description': data.iloc[idx]['description']}
            matches.append(temp)
    recommended_jobs = pd.DataFrame(matches).sort_values('Match Confidence', ascending=False)
    return recommended_jobs


In [42]:
# Example usage
sample_resumes = [
    "Experienced software engineer proficient in Python and JavaScript.",
    "Experienced data scientist with expertise in machine learning and big data analytics.",
    "Junior software developer proficient in Java and SQL, eager to learn and contribute to innovative projects.",
    "Recent graduate with a degree in computer science, skilled in Python programming and web development.",
    "Experienced project manager with strong leadership and communication skills, adept at managing cross-functional teams."
]

for idx, resume_text in enumerate(sample_resumes, 1):
    print(f"Sample Resume {idx}:")
    print(f"Resume Text: {resume_text}")
    recommended_jobs = recommend_jobs_with_best_params(resume_text)
    print("Recommended Jobs:")
    print(recommended_jobs)
    print("\n")

Sample Resume 1:
Resume Text: Experienced software engineer proficient in Python and JavaScript.
Recommended Jobs:
                                           Job Title  \
0                                Python FS Developer   
1         Software Engineer (Python), Up to SGD$180k   
2         Software Engineer (Python), Up to SGD$180k   
3  Senior Software Engineer (C#, Python )/ Senior...   
4                 Software Engineer - Python and K8s   

                            Company              Salary Range  \
0                  Snaphunt Pte Ltd    $1,000 - 1,300 monthly   
1                       Hunter Bond                       NaN   
2                       Hunter Bond                       NaN   
3  STAR CAREER CONSULTING PTE. LTD.  $8,000 - $9,500 a month    
4                         Canonical                       NaN   

   Match Confidence                                        Description  
0              0.62  Leadership RoleGreat work environmentAttractiv...  
1          

In [43]:
# Longer sample resume texts
longer_sample_resumes = [
    "Highly skilled software engineer with over 8 years of experience in developing robust and scalable software solutions. Proficient in a wide range of programming languages including Python, Java, C++, and JavaScript. Experienced in full-stack web development with expertise in both front-end and back-end technologies. Strong problem-solving abilities and a passion for learning new technologies.",
    "Data scientist with a background in mathematics and statistics, possessing advanced analytical skills and expertise in machine learning algorithms. Experienced in data preprocessing, feature engineering, and model evaluation. Proficient in programming languages such as Python and R, with extensive experience in data manipulation libraries such as Pandas and NumPy. Strong communication skills and ability to present complex technical concepts to non-technical stakeholders.",
    "Seasoned full-stack developer with a focus on creating innovative and user-friendly web applications. Proficient in modern web technologies including HTML5, CSS3, JavaScript, and TypeScript. Experienced in using frameworks such as React, Angular, and Vue.js for front-end development, and Node.js and Express.js for back-end development. Skilled in database management systems including MySQL, MongoDB, and PostgreSQL. Strong understanding of Agile methodologies and best practices in software development.",
    "Recent graduate with a degree in computer science, eager to kickstart a career in software engineering. Possesses a strong foundation in computer science concepts including data structures, algorithms, and object-oriented programming. Proficient in programming languages such as Java, Python, and C#. Experience with web development technologies including HTML, CSS, and JavaScript. Strong problem-solving skills and ability to work collaboratively in team environments.",
    "Accomplished project manager with over 10 years of experience leading successful projects from initiation to completion. Skilled in project planning, resource allocation, and risk management. Experienced in working with cross-functional teams and stakeholders to achieve project objectives. Strong leadership abilities and effective communication skills. PMP certified with a proven track record of delivering projects on time and within budget."
]

# Display recommended jobs for longer sample resume texts
for idx, resume_text in enumerate(longer_sample_resumes, 1):
    print(f"Sample Resume {idx}:")
    print(f"Resume Text: {resume_text}\n")
    recommended_jobs = recommend_jobs_with_best_params(resume_text)
    print("Recommended Jobs:")
    print(recommended_jobs)
    print("\n")


Sample Resume 1:
Resume Text: Highly skilled software engineer with over 8 years of experience in developing robust and scalable software solutions. Proficient in a wide range of programming languages including Python, Java, C++, and JavaScript. Experienced in full-stack web development with expertise in both front-end and back-end technologies. Strong problem-solving abilities and a passion for learning new technologies.

Recommended Jobs:
            Job Title                     Company              Salary Range  \
0  Software Engineer                     Randstad                       NaN   
1  Software Engineer                     Randstad                       NaN   
2   Software Engineer               Nicoll Curtin                       NaN   
3         Developer\n                 PERSOLKELLY    Up to $11,000 a month    
4   DevOps Engineer\n  D4L DATA4LIFE ASIA LIMITED  $5,000 - $7,500 a month    

   Match Confidence                                        Description  
0      