In [1]:
!pip -q install sentence-transformers joblib

load senoiority model（LLM）

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!cp -r /content/drive/MyDrive/final_seniority_distilbert models/

In [4]:
!ls models/final_seniority_distilbert

id2label.json  label2id.json  model  tokenizer


In [5]:
# load senoiority model
import os, json, torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

SEN_DIR = "models/final_seniority_distilbert"

sen_tokenizer = AutoTokenizer.from_pretrained(
    os.path.join(SEN_DIR, "tokenizer"),
    local_files_only=True
)

sen_model = AutoModelForSequenceClassification.from_pretrained(
    os.path.join(SEN_DIR, "model"),
    local_files_only=True
)

with open(os.path.join(SEN_DIR, "id2label.json")) as f:
    sen_id2label = {int(k): v for k, v in json.load(f).items()}

sen_model.eval()



DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)

Load department model（SentenceTransformer + MLP）

In [6]:
# load department model
from sentence_transformers import SentenceTransformer
import joblib, json
import numpy as np

MODEL_DIR = "models/final_department_embed_mlp"

embedder = SentenceTransformer(f"{MODEL_DIR}/sentence_model")
clf = joblib.load(f"{MODEL_DIR}/mlp.pkl")

with open(f"{MODEL_DIR}/classes.json", "r") as f:
    classes = json.load(f)

print(" Loaded Department model.")
print("Num classes:", len(classes))
print("First 5 classes:", classes[:5])


 Loaded Department model.
Num classes: 11
First 5 classes: ['Administrative', 'Business Development', 'Consulting', 'Customer Support', 'Human Resources']


In [7]:
# compute embedding
class_emb = embedder.encode(classes, normalize_embeddings=True)

def cosine_topk(text, k=5):
    q = embedder.encode([text], normalize_embeddings=True)[0]
    sims = class_emb @ q
    top_idx = np.argsort(-sims)[:k]
    return [(classes[i], float(sims[i])) for i in top_idx]


Explain

In [8]:
# cosine explain for department
def predict_department_with_explain(text, k=5):
    emb = embedder.encode([text])
    probs = clf.predict_proba(emb)[0]
    pred_idx = int(np.argmax(probs))

    pred_label = classes[pred_idx]
    pred_conf = float(probs[pred_idx])

    topk = cosine_topk(text, k=k)
    return pred_label, pred_conf, topk

# probability explain for seniority
def predict_seniority_with_probs(text, topk=3):
    """
    return：
    - pred_label (str)
    - pred_conf (float)
    - df_probs (DataFrame): top-k
    """
    inputs = sen_tokenizer(text, return_tensors="pt", truncation=True)

    with torch.no_grad():
        logits = sen_model(**inputs).logits  # [1, num_labels]
        probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]

    # top-k probability
    idx = np.argsort(-probs)[:topk]
    rows = [(sen_id2label[int(i)], float(probs[i])) for i in idx]
    df = pd.DataFrame(rows, columns=["Seniority", "Probability"])

    pred_label = rows[0][0]
    pred_conf  = rows[0][1]
    return pred_label, pred_conf, df


UI Design

In [9]:
!pip -q install gradio
import gradio as gr
import pandas as pd

def ui_predict(text, dept_k, sen_k):
    if not text or not text.strip():
        return "Please enter a job title / position text.", None, "Please enter a job title / position text.", None

    # A) Department (MLP + cosine explain)
    dept_label, dept_conf, dept_topk = predict_department_with_explain(text, k=dept_k)
    dept_warn = " low confidence" if dept_conf < 0.40 else ""
    dept_summary = f"Predicted Department: {dept_label} (conf={dept_conf:.3f}){dept_warn}"
    dept_df = pd.DataFrame(dept_topk, columns=["Domain (prototype)", "Cosine similarity"])

    # B) Seniority (DistilBERT + prob distribution explain)
    sen_label, sen_conf, sen_df = predict_seniority_with_probs(text, topk=sen_k)
    sen_warn = " low confidence" if sen_conf < 0.40 else ""
    sen_summary = f"Predicted Seniority: {sen_label} (conf={sen_conf:.3f}){sen_warn}"

    return dept_summary, dept_df, sen_summary, sen_df

demo = gr.Interface(
    fn=ui_predict,
    inputs=[
        gr.Textbox(lines=2, label="LinkedIn position text"),
        gr.Slider(3, 10, value=5, step=1, label="Department explainability: Top-K cosine similarities"),
        gr.Slider(2, 5, value=3, step=1, label="Seniority explainability: Top-K probabilities"),
    ],
    outputs=[
        gr.Textbox(label="Department prediction"),
        gr.Dataframe(label="Explainability (Department): Top-K cosine similarities", interactive=False),

        gr.Textbox(label="Seniority prediction"),
        gr.Dataframe(label="Explainability (Seniority): Top-K probability distribution", interactive=False),
    ],
    title="LinkedIn Classifier (Department + Seniority)",
    description="Prototype demo for SnapAddy capstone with embedding-based explainability (dept) and probability-based explainability (seniority)."
)

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1a80b083ae57162dab.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


