#Import libraries

In [2]:
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast
import random


In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


#Data preparation

In [6]:
#Sample industries, experiences and job functions that are randomly output by the following three lists

industries = ['Accountancy', 'Air Transport', 'Aerospace', 'Biopharmaceuticals Manufacturing', 'Built Environment', 'Design', 'Early Childhood', 'Electronics', 'Energy And Chemicals', 'Energy And Power', 'Engineering Services', 'Environmental Services', 'Financial Services', 'Food Manufacturing', 'Food Services', 'Healthcare', 'Hotel & Accommodation Services', 'Human Resource', 'Infocomm Technology', 'Intellectual Property', 'Landscape', 'Logistics', 'Marine And Offshore', 'Media', 'Precision Engineering', 'Public Transport', 'Retail', 'Sea Transport', 'Security', 'Social Service', 'Tourism', 'Training And Adult Education', 'Wholesale Trade', 'Workplace Safety And Health']
experiences = ['Entry', 'Junior to Mid', 'Senior', 'Manager (Minimal Experience)', 'Senior Manager', 'Director & Above']
job_functions = ['Administrative', 'Architecture, Design', 'Business Development', 'Customer Service, Support', 'Community Service/NVPC', 'Consulting/IT', 'Consulting/Strategy', 'Education, Teaching, Childcare', 'Food and Beverage', 'Finance/Accounting', 'Finance/Commercial Banking', 'Finance/Corporate Finance', 'Finance/Insurance', 'Finance/Investment Banking', 'Finance/Investments/Asset Mgt', 'Finance/Public Finance', 'Finance/Real Estate', 'Finance/Sales and Trading', 'Human Resources', 'Hospitality, Hotel, Tourism', 'Information Technology', 'Legal, Risk and Compliance', 'Management/General', 'Management/Leadership Development', 'Marketing/Advertising', 'Marketing/Brand Management', 'Marketing/Market Research', 'Marketing/Public Relations', 'Marketing/Sales', 'Manufacturing, Factory', 'Medical, Nursing, Pharmacy', 'Operations/Engineering', 'Operations/Logistics/Supply Chain', 'Operations/Production Management', 'PR, Communications', 'Quality Assurance', 'Translation', 'Writing, Editing']

data = []

for i in range(100):
    entry = {}
    entry['industry'] = random.choice(industries)
    entry['experience'] = random.choice(experiences)
    entry['job_function'] = random.choice(job_functions)
    data.append(entry)

json_data = json.dumps(data)
print(json_data)


[{"industry": "Workplace Safety And Health", "experience": "Junior to Mid", "job_function": "Information Technology"}, {"industry": "Engineering Services", "experience": "Manager (Minimal Experience)", "job_function": "Architecture, Design"}, {"industry": "Intellectual Property", "experience": "Senior", "job_function": "Finance/Public Finance"}, {"industry": "Early Childhood", "experience": "Director & Above", "job_function": "Education, Teaching, Childcare"}, {"industry": "Public Transport", "experience": "Entry", "job_function": "Management/Leadership Development"}, {"industry": "Public Transport", "experience": "Senior", "job_function": "Finance/Public Finance"}, {"industry": "Infocomm Technology", "experience": "Director & Above", "job_function": "PR, Communications"}, {"industry": "Landscape", "experience": "Junior to Mid", "job_function": "Marketing/Brand Management"}, {"industry": "Sea Transport", "experience": "Manager (Minimal Experience)", "job_function": "Quality Assurance"}

In [4]:
# Converting string to list
jobs = ast.literal_eval(json_data)

#Recommendation algorithm

In [7]:
# Get user input
user_input = json.loads(input("Enter job preferences in JSON format: "))

# Create a corpus of job descriptions
corpus = [f"{job['job_function']} {job['industry']} {job['experience']}" for job in jobs]
# Add the user input to the corpus
user_description = f"{user_input['job_function']} {user_input['industry']} {user_input['experience']}"
corpus.append(user_description)
# print("Corpus",corpus)
# Vectorize the corpus
vectorizer = CountVectorizer()
vectorized_corpus = vectorizer.fit_transform(corpus)
# print("corpus",vectorized_corpus)
# print("corpus type",type(vectorized_corpus))
# Calculate cosine similarity between the user input and the jobs data
similarity_scores = cosine_similarity(vectorized_corpus[-1], vectorized_corpus[:-1])

# Set a threshold value
threshold = 0.1

# Filter jobs based on the threshold value
filtered_indices = similarity_scores.argsort()[0][::-1]
filtered_indices = [i for i in filtered_indices if similarity_scores[0][i] >= threshold]
recommended_jobs = [jobs[i] for i in filtered_indices]

# Print recommended jobs
if recommended_jobs:
    print("Recommended Jobs:")
    for job in recommended_jobs:
        print(f"{job['job_function']} in {job['industry']} at {job['experience']} level")
else:
    print("No jobs found for the given preferences.")


Enter job preferences in JSON format: {"industry": "Workplace Safety And Health", "experience": "Junior to Mid", "job_function": "Information Technology"}
Recommended Jobs:
Translation in Aerospace at Junior to Mid level
Finance/Accounting in Healthcare at Junior to Mid level
Quality Assurance in Aerospace at Junior to Mid level
Finance/Commercial Banking in Landscape at Junior to Mid level
Hospitality, Hotel, Tourism in Logistics at Junior to Mid level
Management/Leadership Development in Landscape at Junior to Mid level
Finance/Accounting in Early Childhood at Junior to Mid level
Finance/Insurance in Air Transport at Junior to Mid level
Human Resources in Environmental Services at Junior to Mid level
Management/Leadership Development in Early Childhood at Junior to Mid level
Operations/Logistics/Supply Chain in Aerospace at Junior to Mid level
Education, Teaching, Childcare in Biopharmaceuticals Manufacturing at Junior to Mid level
Operations/Engineering in Precision Engineering at J