In [10]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
job_data = pd.read_csv("glints_singapore_jobs.csv")

In [12]:
print(job_data.columns)

Index(['Job Title', 'Company Name', 'Industry', 'City', 'Country',
       'Employment Type', 'Work Arrangement', 'Min Experience (Years)',
       'Max Experience (Years)', 'Salary Min (SGD)', 'Salary Max (SGD)',
       'Skills'],
      dtype='object')


In [13]:
fdm_curriculum = "Software Development, Software Testing, DevOps, Cloud Computing, Site Reliability Engineering, Business Intelligence, Business Analytics, Project Support, Data Engineering, Data Science, Machine Learning, Data Governance, Technical Analysis, Amazon Web Services, Cyber Security, Risk and Compliance"

In [14]:
skills = job_data["Skills"].dropna().tolist()

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Combine job descriptions and FDM’s curriculum into one list
all_texts = skills + [fdm_curriculum]

# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Transform text into TF-IDF vectors
tfidf_matrix = vectorizer.fit_transform(all_texts)

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

# Compare last row (FDM curriculum) with all job postings
similarity_scores = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1])

In [17]:
import numpy as np

# Get sorted indices (highest similarity first)
ranked_indices = np.argsort(similarity_scores[0])[::-1]

# Print ranked jobs with similarity scores
print("\nJob Rankings Based on Similarity to FDM Curriculum:")
for i, idx in enumerate(ranked_indices):
    print(f"{i+1}. Job {idx+1} - Similarity Score: {similarity_scores[0][idx]:.4f}")


Job Rankings Based on Similarity to FDM Curriculum:
1. Job 15 - Similarity Score: 0.2720
2. Job 1 - Similarity Score: 0.1770
3. Job 21 - Similarity Score: 0.1415
4. Job 4 - Similarity Score: 0.1393
5. Job 2 - Similarity Score: 0.1242
6. Job 14 - Similarity Score: 0.1235
7. Job 16 - Similarity Score: 0.1151
8. Job 3 - Similarity Score: 0.1121
9. Job 6 - Similarity Score: 0.1034
10. Job 12 - Similarity Score: 0.1011
11. Job 29 - Similarity Score: 0.0959
12. Job 9 - Similarity Score: 0.0870
13. Job 17 - Similarity Score: 0.0840
14. Job 13 - Similarity Score: 0.0803
15. Job 26 - Similarity Score: 0.0771
16. Job 27 - Similarity Score: 0.0702
17. Job 18 - Similarity Score: 0.0672
18. Job 7 - Similarity Score: 0.0605
19. Job 28 - Similarity Score: 0.0592
20. Job 11 - Similarity Score: 0.0383
21. Job 23 - Similarity Score: 0.0295
22. Job 20 - Similarity Score: 0.0219
23. Job 10 - Similarity Score: 0.0000
24. Job 19 - Similarity Score: 0.0000
25. Job 8 - Similarity Score: 0.0000
26. Job 22 - S

In [19]:
# Create a DataFrame with ranked results
ranked_jobs = pd.DataFrame({
    "Job Title": job_data["Job Title"].iloc[ranked_indices].values,
    "Company Name": job_data["Company Name"].iloc[ranked_indices].values,
    "Similarity Score": similarity_scores[0][ranked_indices]
})

# Save to CSV
ranked_jobs.to_csv("ranked_jobs.csv", index=False)

# Show the top-ranked jobs
print(ranked_jobs.head(10))

                                           Job Title  \
0   Analyst Programmer / Systems Analyst (DSD) [NIE]   
1                             Software Test Engineer   
2                             AI Workflow Specialist   
3                                 Software Developer   
4              Full Stack Software Engineer (Remote)   
5  Technical/ Assistant director (Professional En...   
6                Backend Developer (Includes Equity)   
7             Software QA Engineer (Includes Equity)   
8                          Intern Software Developer   
9                      Civil and Structural Engineer   

                      Company Name  Similarity Score  
0  National Institute Of Education          0.271970  
1              GivEnergy Pte. Ltd.          0.176973  
2                 The Swim Starter          0.141508  
3                            Cinch          0.139346  
4                    Hypotenuse AI          0.124241  
5  Encasa Asia Employment Services          0.123500 