In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

In [3]:
df = pd.read_csv('/Users/mr.tian/Desktop/AI_Resume_Screening.csv')

In [11]:
df.head()

Unnamed: 0,Resume_ID,Name,Skills,Experience (Years),Education,Certifications,Job Role,Recruiter Decision,Salary Expectation ($),Projects Count,AI Score (0-100)
0,1,Ashley Ali,"TensorFlow, NLP, Pytorch",10,B.Sc,,AI Researcher,Hire,104895,8,100
1,2,Wesley Roman,"Deep Learning, Machine Learning, Python, SQL",10,MBA,Google ML,Data Scientist,Hire,113002,1,100
2,3,Corey Sanchez,"Ethical Hacking, Cybersecurity, Linux",1,MBA,Deep Learning Specialization,Cybersecurity Analyst,Hire,71766,7,70
3,4,Elizabeth Carney,"Python, Pytorch, TensorFlow",7,B.Tech,AWS Certified,AI Researcher,Hire,46848,0,95
4,5,Julie Hill,"SQL, React, Java",4,PhD,,Software Engineer,Hire,87441,9,100


In [5]:
role = "Data Scientist"
df_role = df[df["Job Role"] == role].copy()

In [7]:
y = (df_role["Recruiter Decision"] == "Hire").astype(int)


In [13]:
X_text = (
    df_role["Skills"].fillna("") + " " +
    df_role["Certifications"].fillna("")
)

In [17]:
X_train_text, X_test_text, y_train, y_test = train_test_split(
    X_text,
    y,
    test_size=0.3,
    random_state=42
)

test_index = X_test_text.index

In [19]:
tfidf = TfidfVectorizer(
    max_features=5000,
    stop_words="english",
    ngram_range=(1, 2)
)

X_train_tfidf = tfidf.fit_transform(X_train_text)
X_test_tfidf = tfidf.transform(X_test_text)

In [21]:
nlp_model = LogisticRegression(max_iter=1000)
nlp_model.fit(X_train_tfidf, y_train)

y_pred_nlp = nlp_model.predict(X_test_tfidf)
y_prob_nlp = nlp_model.predict_proba(X_test_tfidf)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred_nlp))
print("AUC:", roc_auc_score(y_test, y_prob_nlp))

Accuracy: 0.8311688311688312
AUC: 0.5709134615384615


In [29]:
df_eval_nlp = df_role.loc[test_index].copy()
df_eval_nlp["y_pred"] = y_pred_nlp

In [27]:
def exp_bucket(x):
    if x < 3:
        return "Junior"
    elif x < 7:
        return "Mid"
    else:
        return "Senior"

df_role['Experience Level'] = df_role['Experience (Years)'].apply(exp_bucket)

In [31]:
selection_rate_nlp = (
    df_eval_nlp
    .groupby("Experience Level")["y_pred"]
    .mean()
    .sort_values(ascending=False)
)

selection_rate_nlp

Experience Level
Junior    1.0
Mid       1.0
Senior    1.0
Name: y_pred, dtype: float64

In [33]:
baseline_group = "Mid"
baseline_rate = selection_rate_nlp.loc[baseline_group]

dp_diff_nlp = selection_rate_nlp - baseline_rate
disparate_impact_nlp = selection_rate_nlp / baseline_rate

audit_nlp = (
    pd.DataFrame({
        "Selection Rate": selection_rate_nlp,
        "DP Difference": dp_diff_nlp,
        "Disparate Impact": disparate_impact_nlp
    })
    .round(3)
)

audit_nlp

Unnamed: 0_level_0,Selection Rate,DP Difference,Disparate Impact
Experience Level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Junior,1.0,0.0,1.0
Mid,1.0,0.0,1.0
Senior,1.0,0.0,1.0


In [37]:
nlp_model = LogisticRegression(
    max_iter=1000,
    C=0.5,                  
    class_weight="balanced",  
    solver="liblinear"
)

nlp_model.fit(X_train_tfidf, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,0.5
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,1000


In [39]:
y_prob_nlp = nlp_model.predict_proba(X_test_tfidf)[:, 1]

threshold = 0.6
y_pred_nlp = (y_prob_nlp >= threshold).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_nlp))
print("AUC:", roc_auc_score(y_test, y_prob_nlp))

Accuracy: 0.2727272727272727
AUC: 0.563701923076923


In [41]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_test, y_pred_nlp))
print(classification_report(y_test, y_pred_nlp))

[[11  2]
 [54 10]]
              precision    recall  f1-score   support

           0       0.17      0.85      0.28        13
           1       0.83      0.16      0.26        64

    accuracy                           0.27        77
   macro avg       0.50      0.50      0.27        77
weighted avg       0.72      0.27      0.27        77



In [43]:
for t in [0.4, 0.5, 0.6, 0.7]:
    y_tmp = (y_prob_nlp >= t).astype(int)
    print(f"\nThreshold = {t}")
    print("Accuracy:", accuracy_score(y_test, y_tmp))


Threshold = 0.4
Accuracy: 0.7532467532467533

Threshold = 0.5
Accuracy: 0.5974025974025974

Threshold = 0.6
Accuracy: 0.2727272727272727

Threshold = 0.7
Accuracy: 0.18181818181818182


In [45]:
df_eval_nlp["hire_probability"] = y_prob_nlp

In [47]:
def decision_label(p, threshold=0.6):
    if p >= threshold:
        return "Recommend Interview"
    elif p >= threshold - 0.1:
        return "Borderline"
    else:
        return "Unlikely"
    
df_eval_nlp["decision_label"] = df_eval_nlp["hire_probability"].apply(decision_label)

In [49]:
group_avg = (
    df_eval_nlp
    .groupby("Experience Level")["hire_probability"]
    .mean()
)

df_eval_nlp["group_avg_probability"] = df_eval_nlp["Experience Level"].map(group_avg)

In [51]:
THRESHOLD = 0.4

y_pred_nlp = (y_prob_nlp >= THRESHOLD).astype(int)

In [53]:
df_eval_nlp["hire_probability"] = y_prob_nlp
df_eval_nlp["y_pred"] = y_pred_nlp

In [55]:
def decision_label(p, threshold=0.4):
    if p >= threshold:
        return "Recommend Interview"
    elif p >= threshold - 0.1:
        return "Borderline"
    else:
        return "Unlikely"

df_eval_nlp["decision_label"] = df_eval_nlp["hire_probability"].apply(decision_label)

In [57]:
def exp_bucket(x):
    if x < 3:
        return "Junior"
    elif x < 7:
        return "Mid"
    else:
        return "Senior"

df_eval_nlp["Experience Level"] = df_eval_nlp["Experience (Years)"].apply(exp_bucket)

In [59]:
selection_rate_nlp = (
    df_eval_nlp
    .groupby("Experience Level")["y_pred"]
    .mean()
)

selection_rate_nlp

Experience Level
Junior    0.947368
Mid       0.866667
Senior    0.892857
Name: y_pred, dtype: float64

In [61]:
baseline_group = "Mid"
baseline_rate = selection_rate_nlp.loc[baseline_group]

dp_difference = selection_rate_nlp - baseline_rate
disparate_impact = selection_rate_nlp / baseline_rate

audit_nlp = (
    pd.DataFrame({
        "Selection Rate": selection_rate_nlp,
        "DP Difference": dp_difference,
        "Disparate Impact": disparate_impact
    })
    .round(3)
)

audit_nlp

Unnamed: 0_level_0,Selection Rate,DP Difference,Disparate Impact
Experience Level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Junior,0.947,0.081,1.093
Mid,0.867,0.0,1.0
Senior,0.893,0.026,1.03


In [63]:
group_avg_prob = (
    df_eval_nlp
    .groupby("Experience Level")["hire_probability"]
    .mean()
)

df_eval_nlp["group_avg_probability"] = (
    df_eval_nlp["Experience Level"]
    .map(group_avg_prob)
)

In [65]:
def fairness_context(row):
    if row["hire_probability"] >= row["group_avg_probability"]:
        return "Above group average"
    else:
        return "Below group average"

df_eval_nlp["fairness_context"] = df_eval_nlp.apply(fairness_context, axis=1)

In [67]:
final_output = df_eval_nlp[[
    "Name",
    "Experience (Years)",
    "Experience Level",
    "Skills",
    "Certifications",
    "hire_probability",
    "decision_label",
    "group_avg_probability",
    "fairness_context"
]].sort_values("hire_probability", ascending=False)

final_output.head(10)

Unnamed: 0,Name,Experience (Years),Experience Level,Skills,Certifications,hire_probability,decision_label,group_avg_probability,fairness_context
284,Tara Williams,7,Senior,"Python, Deep Learning",,0.727581,Recommend Interview,0.523789,Above group average
379,Mr. Keith Smith,2,Junior,"SQL, Python, Deep Learning, Machine Learning",Google ML,0.675093,Recommend Interview,0.524144,Above group average
378,Zachary Lynch,8,Senior,"SQL, Python",Deep Learning Specialization,0.653444,Recommend Interview,0.523789,Above group average
173,Phillip Shaw,9,Senior,"SQL, Python",Deep Learning Specialization,0.653444,Recommend Interview,0.523789,Above group average
421,Stephen Mason,2,Junior,"Python, Deep Learning",AWS Certified,0.628502,Recommend Interview,0.524144,Above group average
270,Amy Poole,0,Junior,"Machine Learning, SQL, Python",Google ML,0.628134,Recommend Interview,0.524144,Above group average
959,Evan Rodriguez,3,Mid,"Machine Learning, SQL, Python",Google ML,0.628134,Recommend Interview,0.489525,Above group average
554,Frank Hale,1,Junior,"Machine Learning, Python, Deep Learning, SQL",Google ML,0.61947,Recommend Interview,0.524144,Above group average
304,Tina Howard,6,Mid,"SQL, Python, Machine Learning",AWS Certified,0.61177,Recommend Interview,0.489525,Above group average
429,Karen Wright,10,Senior,"SQL, Python, Machine Learning",AWS Certified,0.61177,Recommend Interview,0.523789,Above group average


In [69]:
final_output.to_csv("nlp_resume_screening_results.csv", index=False)
audit_nlp.to_csv("fairness_audit_results.csv")

In [71]:
import joblib

joblib.dump(nlp_model, "nlp_resume_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")

['tfidf_vectorizer.pkl']

In [73]:
import json

config = {
    "threshold": 0.4,
    "model": "LogisticRegression + TFIDF",
    "task": "Resume Screening (NLP)"
}

with open("model_config.json", "w") as f:
    json.dump(config, f, indent=2)