In [3]:
import sys
import os

PROJECT_ROOT = os.path.abspath("..")
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("Project root added:", PROJECT_ROOT)

Project root added: c:\xampp\htdocs\skill-value-intelligence


In [4]:
import pandas as pd

from src.ingestion.job_market import load_job_data
from src.processing.extract_skills import extract_skills
from src.signals.demand_signal import compute_demand_signal

In [6]:
df = load_job_data("../data/raw/jobs.csv")
df.head()


Unnamed: 0,title,company,location,description,salary_min,salary_max,date_posted,source
0,Junior Data Analyst,ABC Corp,Manila,We are looking for a Junior Data Analyst with ...,30000,45000,2024-01-15,mock_dataset
1,Data Scientist,XYZ Tech,Remote,"The role requires Python, machine learning, st...",80000,120000,2024-01-20,mock_dataset
2,Backend Developer,StartupHub,Remote,Seeking a backend developer with strong Python...,70000,100000,2024-01-18,mock_dataset
3,BI Analyst,Enterprise Inc,Quezon City,"Responsibilities include SQL reporting, dashbo...",40000,60000,2024-01-10,mock_dataset
4,Machine Learning Engineer,AI Labs,Remote,"Looking for an ML engineer skilled in Python, ...",100000,150000,2024-01-22,mock_dataset


In [7]:
df.shape, df.columns.tolist()

((5, 8),
 ['title',
  'company',
  'location',
  'description',
  'salary_min',
  'salary_max',
  'date_posted',
  'source'])

In [8]:
df["skills"] = df["description"].apply(extract_skills)
df[["title", "skills"]].head()

Unnamed: 0,title,skills
0,Junior Data Analyst,"[python, sql, excel, pandas]"
1,Data Scientist,"[python, aws, docker, machine learning]"
2,Backend Developer,"[python, sql, aws]"
3,BI Analyst,"[sql, excel]"
4,Machine Learning Engineer,"[python, docker, machine learning]"


In [9]:
demand = compute_demand_signal(df)
demand.head(10)

Unnamed: 0,skills,job_count
5,python,4
6,sql,3
0,aws,2
2,excel,2
1,docker,2
3,machine learning,2
4,pandas,1


### Demand Signal – Interpretation

This table shows the frequency of skill mentions across job postings.

This signal reflects employer *posting behavior*, not confirmed hires.

Key limitations:
- Boilerplate job descriptions may inflate counts
- Frequency ≠ economic value
- Baseline skills dominate early results

This motivates adding compensation and momentum signals next.


In [10]:
from src.signals.compensation_signal import compute_compensation_signal


In [11]:
compensation = compute_compensation_signal(df)
compensation

Unnamed: 0,skills,avg_salary
1,docker,112500.0
3,machine learning,112500.0
0,aws,92500.0
5,python,86875.0
6,sql,57500.0
2,excel,43750.0
4,pandas,37500.0


### Compensation Signal – Interpretation

This table estimates the average salary midpoint associated with each skill
based on job postings that explicitly mention that skill.

Observations:
- Some skills correlate with higher average salaries
- Results depend on job mix and sample size
- Skills appearing in senior roles may appear more valuable

Limitations:
- Small sample size
- Correlation ≠ causation
- Salary ranges may be noisy or inflated


In [12]:
from src.models.skill_value_index import compute_skill_value_index

In [13]:
svi = compute_skill_value_index(demand, compensation)
svi[["skills", "skill_value_index", "job_count", "avg_salary"]]

Unnamed: 0,skills,skill_value_index,job_count,avg_salary
0,python,0.829167,4,86875.0
5,machine learning,0.666667,2,112500.0
4,docker,0.666667,2,112500.0
2,aws,0.533333,2,92500.0
1,sql,0.466667,3,57500.0
3,excel,0.208333,2,43750.0
6,pandas,0.0,1,37500.0


### Skill Value Index – Interpretation

The Skill Value Index combines normalized demand and compensation signals
to estimate the relative economic attractiveness of technical skills.

Interpretation:
- High SVI skills are both widely demanded and well-compensated
- Low SVI skills may be common but low-paying, or rare but niche
- Rankings are relative, not absolute guarantees

Limitations:
- Small sample size
- Equal weighting is an assumption
- Does not account for seniority or industry context
