In [5]:
import json
import torch
from transformers import pipeline, AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer, util

# Setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# -------------------------
# Load Embedding Model
# -------------------------
embedder = SentenceTransformer('all-MiniLM-L6-v2', device=device)

# -------------------------
# Load Small Evaluator Model (FLAN-T5)
# -------------------------
evaluator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
evaluator_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")

# -------------------------
# Example Job Docs (RAG)
# -------------------------
job_docs = [
    "Python scripting, automation, data analysis using Pandas, NumPy, Matplotlib.",
    "Experience with machine learning frameworks like scikit-learn, TensorFlow, Keras.",
    "Object-oriented programming, version control, API interaction."
]

# -------------------------
# Questions & Answers
# -------------------------
questions = [
    "Can you tell me about your experience with Python?",
    "Describe your experience with machine learning.",
    "How do you approach debugging complex software issues?"
]

candidate_answers = [
    """My experience with Python is quite extensive, spanning several years across various domains. I've primarily leveraged Python for its versatility in data analysis, machine learning, and automation.

    In data analysis, I'm proficient with libraries like NumPy, Pandas, and Matplotlib. I've used them to clean, transform, analyze, and visualize complex datasets, extracting meaningful insights to inform decision-making.

    For machine learning, I've worked extensively with scikit-learn, TensorFlow, and Keras. This includes building and deploying models for tasks such as classification, regression, clustering, and natural language processing. I'm comfortable with the entire ML pipeline, from data preprocessing and feature engineering to model training, evaluation, and hyperparameter tuning.

    Beyond data science, I have strong experience in scripting and automation using Python. I've developed scripts for file manipulation, web scraping, API interactions, and automating repetitive tasks, significantly improving efficiency. I'm also familiar with object-oriented programming principles in Python and have experience with version control systems like Git.""",
    
    """I’ve worked on multiple ML projects. One involved customer churn prediction using Random Forest and XGBoost. I handled data cleaning, feature engineering, and model tuning.

    I’ve also built NLP pipelines for sentiment analysis using TF-IDF + Logistic Regression and LSTM. I prefer TensorFlow and scikit-learn for most projects.""",

    """I first reproduce the issue, review logs, and isolate the failing component. Then I use tools like `pdb`, print statements, and logging. If it's async or multi-threaded, I use `threading` and `concurrent.futures` to track flow. I write unit tests to prevent regression."""
]

# -------------------------
# RAG Function
# -------------------------
def retrieve_context(query, docs, top_k=2):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    doc_embeddings = embedder.encode(docs, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
    top_results = torch.topk(similarities, k=top_k)
    retrieved = [docs[i] for i in top_results.indices]
    return ' '.join(retrieved)

# -------------------------
# Evaluation Function
# -------------------------
def evaluate_with_flan(question, answer, context):
    prompt = f"""You are a technical interviewer.
You are given a job context, an interview question, and a candidate's answer.

Job Context: {context}
Question: {question}
Answer: {answer}

Evaluate the quality of the answer. Give a brief comment and score from 1 (poor) to 10 (excellent)."""

    inputs = evaluator_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
    output = evaluator_model.generate(**inputs, max_new_tokens=100)
    decoded = evaluator_tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded

# -------------------------
# Main Evaluation Loop
# -------------------------
results = []

for q, a in zip(questions, candidate_answers):
    ctx = retrieve_context(q, job_docs)
    eval_result = evaluate_with_flan(q, a, ctx)
    results.append({
        "question": q,
        "answer": a,
        "context": ctx,
        "evaluation": eval_result
    })

# -------------------------
# Save to JSON
# -------------------------
with open("interview_results.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Interview evaluation complete. Results saved to 'interview_results.json'")


✅ Interview evaluation complete. Results saved to 'interview_results.json'


In [6]:
# -------------------------
# Print All Results to Terminal
# -------------------------
for idx, res in enumerate(results, 1):
    print("=" * 60)
    print(f"🔢 Question {idx}: {res['question']}\n")
    print(f"📝 Candidate Answer:\n{res['answer'].strip()}\n")
    print(f"📄 Retrieved Job Context:\n{res['context']}\n")
    print(f"🧠 Evaluation:\n{res['evaluation'].strip()}\n")


🔢 Question 1: Can you tell me about your experience with Python?

📝 Candidate Answer:
My experience with Python is quite extensive, spanning several years across various domains. I've primarily leveraged Python for its versatility in data analysis, machine learning, and automation.

    In data analysis, I'm proficient with libraries like NumPy, Pandas, and Matplotlib. I've used them to clean, transform, analyze, and visualize complex datasets, extracting meaningful insights to inform decision-making.

    For machine learning, I've worked extensively with scikit-learn, TensorFlow, and Keras. This includes building and deploying models for tasks such as classification, regression, clustering, and natural language processing. I'm comfortable with the entire ML pipeline, from data preprocessing and feature engineering to model training, evaluation, and hyperparameter tuning.

    Beyond data science, I have strong experience in scripting and automation using Python. I've developed script