In [3]:
import pandas as pd

# Read the file
df = pd.read_csv("/content/drive/MyDrive/Interview Analysis Sample  - Sheet1.csv")  # for CSV

In [2]:
# interview_outcome_prediction/main.py
# === IMPORTS === #
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb
import lightgbm as lgb

# === LOAD AND CLEAN DATA === #
# Read the raw interview data
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")  # Clean column names

# Convert time fields to datetime objects for processing
df["start_time"] = pd.to_datetime(df["start_time"])
df["end_time"] = pd.to_datetime(df["end_time"])

# === FEATURE ENGINEERING === #
# Aggregate session-level metadata like number of questions, unique topics, and panel count
summary = df.groupby("session").agg({
    "start_time": "first",
    "end_time": "first",
    "question": "count",
    "topic": pd.Series.nunique,
    "panel": pd.Series.nunique
}).reset_index()

# Rename aggregated columns
summary.columns = ["session", "start_time", "end_time", "num_questions", "unique_topics", "panel_count"]

# Compute interview duration in minutes
summary["duration_mins"] = (summary["end_time"] - summary["start_time"]).dt.total_seconds() / 60

# Calculate average question length per session
df["question_length"] = df["question"].apply(lambda x: len(str(x).split()))
avg_len = df.groupby("session")["question_length"].mean().reset_index(name="avg_question_length")
summary = summary.merge(avg_len, on="session")

# === SIMULATE LABELS FOR PHASE A === #
# Simulate verdicts randomly for binary classification
np.random.seed(42)
verdicts = ["Yes", "WeakYes", "No"]
summary["verdict"] = np.random.choice(verdicts, size=len(summary))
summary["target"] = summary["verdict"].apply(lambda x: 0 if x == "No" else 1)

# === PHASE A - BINARY CLASSIFICATION === #
# Prepare features and target
y = summary["target"]
X = summary[["num_questions", "unique_topics", "panel_count", "duration_mins", "avg_question_length"]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train XGBoost for binary classification
model_xgb = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model_xgb.fit(X_train, y_train)
print("[XGBoost Phase A]")
print(classification_report(y_test, model_xgb.predict(X_test)))

# Train LightGBM for binary classification
model_lgb = lgb.LGBMClassifier()
model_lgb.fit(X_train, y_train)
print("[LightGBM Phase A]")
print(classification_report(y_test, model_lgb.predict(X_test)))

# === PHASE B - MULTICLASS STAGE OUTCOME === #
# Simulate outcome stage for multiclass prediction (e.g. R1 reject, R2, R3, Accept)
summary["stage_outcome"] = np.random.choice([0, 1, 2, 3], size=len(summary), p=[0.3, 0.2, 0.2, 0.3])
y_multi = summary["stage_outcome"]
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X, y_multi, test_size=0.3, random_state=42)

# XGBoost multiclass classification
model_xgb_m = xgb.XGBClassifier(objective='multi:softprob', num_class=4, use_label_encoder=False, eval_metric='mlogloss')
model_xgb_m.fit(X_train_m, y_train_m)
print("[XGBoost Phase B]")
print(classification_report(y_test_m, model_xgb_m.predict(X_test_m)))

# LightGBM multiclass classification
model_lgb_m = lgb.LGBMClassifier(objective='multiclass', num_class=4)
model_lgb_m.fit(X_train_m, y_train_m)
print("[LightGBM Phase B]")
print(classification_report(y_test_m, model_lgb_m.predict(X_test_m)))

# === PHASE C SIMULATION: PANELIST STRICTNESS + INTERVIEW ADJUSTMENT === #
# Simulate strictness for each panelist (based on prior behavior if available)
panel_strictness = df.groupby("panel").size().reset_index(name="interview_count")
panel_strictness["strictness"] = np.clip(np.random.normal(0.6, 0.2, len(panel_strictness)), 0, 1)

# Determine main panelist per session (for merging strictness score)
def safe_mode(series):
    mode_vals = series.mode()
    return mode_vals[0] if not mode_vals.empty else np.nan

top_panels = df.groupby("session")["panel"].agg(safe_mode).reset_index(name="main_panel")

# === PHASE C EXTENSION: SIMULATE CANDIDATE BEHAVIOR === #
# Simulate interviewee behavior metrics: confidence, speed, correctness
np.random.seed(99)
summary["confidence_score"] = np.clip(np.random.normal(loc=0.7, scale=0.15, size=len(summary)), 0, 1)
summary["answer_speed"] = np.clip(np.random.normal(loc=30, scale=10, size=len(summary)), 5, 90)
summary["correctness_prob"] = np.clip(np.random.normal(loc=0.65, scale=0.2, size=len(summary)), 0, 1)

# Merge strictness score with session-level summary
summary = summary.merge(top_panels.merge(panel_strictness, left_on="main_panel", right_on="panel", how="left")[["session", "strictness"]], on="session", how="left")

# === FINAL PHASE C MODEL TRAINING WITH PANELIST + CANDIDATE FEATURES === #
X_phase_c = summary[[
    "num_questions", "unique_topics", "panel_count", "duration_mins",
    "avg_question_length", "strictness", "confidence_score", "answer_speed", "correctness_prob"
]]
y_phase_c = summary["stage_outcome"]
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_phase_c, y_phase_c, test_size=0.3, random_state=42)

# XGBoost Phase C model
model_xgb_c = xgb.XGBClassifier(objective='multi:softprob', num_class=4, use_label_encoder=False, eval_metric='mlogloss')
model_xgb_c.fit(X_train_c, y_train_c)
print("[XGBoost Phase C - Full Features]")
print(classification_report(y_test_c, model_xgb_c.predict(X_test_c)))

# LightGBM Phase C model
model_lgb_c = lgb.LGBMClassifier(objective='multiclass', num_class=4)
model_lgb_c.fit(X_train_c, y_train_c)
print("[LightGBM Phase C - Full Features]")
print(classification_report(y_test_c, model_lgb_c.predict(X_test_c)))

Parameters: { "use_label_encoder" } are not used.



[XGBoost Phase A]
              precision    recall  f1-score   support

           0       0.33      0.20      0.25       259
           1       0.69      0.82      0.75       577

    accuracy                           0.63       836
   macro avg       0.51      0.51      0.50       836
weighted avg       0.58      0.63      0.60       836

[LightGBM] [Info] Number of positive: 1318, number of negative: 630
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1948, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.676591 -> initscore=0.738151
[LightGBM] [Info] Start training from score 0.738151
[LightGBM Phase A]
              precision    recall  f1-score   support

           0       0.36      0.15      0.21       259
           1       0.70      0.88      0.

Parameters: { "use_label_encoder" } are not used.



[XGBoost Phase B]
              precision    recall  f1-score   support

           0       0.34      0.38      0.36       263
           1       0.19      0.18      0.19       154
           2       0.18      0.14      0.15       182
           3       0.30      0.32      0.31       237

    accuracy                           0.28       836
   macro avg       0.25      0.26      0.25       836
weighted avg       0.27      0.28      0.27       836

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1948, number of used features: 5
[LightGBM] [Info] Start training from score -1.206371
[LightGBM] [Info] Start training from score -1.600749
[LightGBM] [Info] Start training from score -1.575622
[LightGBM] [Info] Start training from score -1.230678
[LightGBM Phase B]
              precision   

Parameters: { "use_label_encoder" } are not used.



[XGBoost Phase C - Full Features]
              precision    recall  f1-score   support

           0       0.32      0.38      0.35       263
           1       0.22      0.21      0.22       154
           2       0.23      0.14      0.18       182
           3       0.32      0.35      0.33       237

    accuracy                           0.29       836
   macro avg       0.27      0.27      0.27       836
weighted avg       0.28      0.29      0.28       836

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 1948, number of used features: 9
[LightGBM] [Info] Start training from score -1.206371
[LightGBM] [Info] Start training from score -1.600749
[LightGBM] [Info] Start training from score -1.575622
[LightGBM] [Info] 