# Work Values Enrichment — Colab Runner

This notebook runs the **root-level** script `enrich_work_values.py` to merge O*NET **Work Values** into your *Company Job Titles - Mapped.xlsx* file.

**Works two ways:**
1. **Opened from your GitHub repo in Colab** → leave `REPO_URL` empty.
2. **Run standalone** → set `REPO_URL` to your repo (it will `git clone` and run from there).

**What you need**
- `enrich_work_values.py` at the **repo root** (no `src/`).
- Input: `Company Job Titles - Mapped.xlsx` (or a similar name).
- O*NET workbook: `Work Values.xlsx` (or a GitHub raw URL).

**Output**
- `Company Job Titles - Mapped.with_work_values.xlsx`

---

In [None]:
# ===== Config =====
REPO_URL = ""  # e.g., "https://github.com/<your-username>/<repo>.git" or leave empty if opened from repo

# File names (edit if yours differ)
INPUT_EXCEL = "Company Job Titles - Mapped.xlsx"
ONET_VALUES_URL = "Work Values.xlsx"  # can be a local file name or a GitHub RAW URL
OUTPUT_EXCEL = "Company Job Titles - Mapped.with_work_values.xlsx"

# Sanity check prints
print("REPO_URL:", REPO_URL)
print("INPUT_EXCEL:", INPUT_EXCEL)
print("ONET_VALUES_URL:", ONET_VALUES_URL)
print("OUTPUT_EXCEL:", OUTPUT_EXCEL)

In [None]:
# ===== Clone the repo (optional) & set working dir =====
import os, sys
from pathlib import Path

if REPO_URL:
    repo_name = REPO_URL.rstrip('/').split('/')[-1].replace('.git','')
    if not Path(repo_name).exists():
        !git clone "$REPO_URL"
    try:
        %cd $repo_name
    except Exception as e:
        print("cd failed:", e)
        os.chdir(repo_name)
        print("cwd ->", os.getcwd())
else:
    print("Using current working directory:", os.getcwd())

print("Directory listing:", os.listdir())

In [None]:
# ===== Install dependencies =====
import os
if os.path.exists('requirements.txt'):
    !pip install -r requirements.txt
else:
    !pip install pandas openpyxl

In [None]:
# ===== OPTIONAL: Preview your input columns (helps confirm SOC column) =====
try:
    import pandas as pd
    if os.path.exists(INPUT_EXCEL):
        df = pd.read_excel(INPUT_EXCEL)
        print("Found:", INPUT_EXCEL)
        print("Columns:", list(df.columns))
        display(df.head(5))
    else:
        print(f"{INPUT_EXCEL!r} not found in", os.getcwd())
except Exception as e:
    print("Preview skipped or failed:", e)

In [None]:
# ===== OPTIONAL: Upload files (Colab only) =====
try:
    from google.colab import files as gfiles
    print("If your Excel files are not present, upload them here:")
    uploaded = gfiles.upload()
    print("Uploaded:", list(uploaded.keys()))
except Exception as e:
    print("Not running in Colab or upload skipped:", e)

In [None]:
# ===== Run the enrichment script =====
import os, shlex, subprocess, sys

script_candidates = ["enrich_work_values.py", "./enrich_work_values.py"]
script = None
for c in script_candidates:
    if os.path.exists(c):
        script = c
        break

if not script:
    raise FileNotFoundError("enrich_work_values.py not found in current directory.")

cmd = f'python "{script}" --input_excel "{INPUT_EXCEL}" --onet_values_url "{ONET_VALUES_URL}" --output_excel "{OUTPUT_EXCEL}"'
print("Running:", cmd)
rc = os.system(cmd)
print("Return code:", rc)

In [None]:
# ===== Download the output (Colab) or show path =====
from pathlib import Path
out = Path(OUTPUT_EXCEL)
if out.exists():
    try:
        from google.colab import files as gfiles
        gfiles.download(str(out))
    except Exception as e:
        print("Output ready at:", out.resolve())
else:
    print("Output not found. Check logs above for errors.")