In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from sentence_transformers import SentenceTransformer, util
import re

device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [None]:
embedder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device)


In [None]:
model_name = "MBZUAI/LaMini-Flan-T5-783M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [None]:
# -------------------------
# Context Retriever
# -------------------------
def retrieve_context(query, docs, top_k=2):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    doc_embeddings = embedder.encode(docs, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
    top_results = torch.topk(similarities, k=top_k)
    retrieved = [docs[i] for i in top_results.indices]
    return ' '.join(retrieved)

In [23]:
def evaluate_answer_with_comment_and_score(question, answer, context):
    prompt = f"""You are a technical interviewer evaluating a candidate's response.
You are given the job context, the interview question, and the candidate's answer.

Job Context:
{context}

Question:
{question}

Answer:
{answer}

Write a short evaluation comment (1–2 sentences) about the quality of the candidate's answer and then give a score from 1 to 10.

Format:
Comment: <your comment>
Score: <1–10>
"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
    output = model.generate(**inputs, max_new_tokens=150)
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    
    print (f"Decoded output: {decoded}")
    
    # # 🌟 Regex-based extraction
    # score_match = re.search(r'(\b\d{1,2}\b)\s*(?:out of 10|/10)?', decoded)
    # score = int(score_match.group(1)) if score_match else None
    
    # # 2. Remove the score sentence to get the comment
    # comment_part = re.split(r'I would rate.*|I’d rate.*|Score:.*', decoded, flags=re.IGNORECASE)
    # comment = comment_part[0].strip() if comment_part else "N/A"
    
    # print (f"Extracted comment: {comment}")
    # print (f"Extracted score: {score}")

     # Try format: Comment: ... Score: ...
    comment_match = re.search(r"Comment:\s*(.*?)\s*Score:", decoded, re.DOTALL)
    score_match = re.search(r"Score:\s*(\d{1,2})", decoded)

    if comment_match and score_match:
        comment = comment_match.group(1).strip()
        score = int(score_match.group(1))
        return comment, score

    # Try fallback: free-text sentence like "I would rate the candidate's answer a 9 out of 10."
    score_match = re.search(r"\b(?:rate.*?|\bscore.*?)(\d{1,2})\s*(?:out of 10|/10)?", decoded, re.IGNORECASE)
    score = int(score_match.group(1)) if score_match else None

    # Extract first sentence as comment, excluding rating phrases
    rating_phrases = ["I would rate", "I'd rate", "Score:", "I give", "My rating is"]
    for phrase in rating_phrases:
        decoded = decoded.replace(phrase, "")

    # Take first 1–2 sentences as comment
    sentences = re.split(r'[.!?]\s+', decoded.strip())
    comment = ". ".join(sentences[:2]).strip()

    print (f"Extracted comment: {comment}")
    print (f"Extracted score: {score}")

    return comment, score


In [26]:
# -------------------------
# Job Docs (RAG)
# -------------------------
job_docs = [
    "Python scripting, automation, data analysis using Pandas, NumPy, Matplotlib.",
    "Experience with machine learning frameworks like scikit-learn, TensorFlow, Keras.",
    "Object-oriented programming, version control, API interaction."
]

# -------------------------
# Questions & Answers
# -------------------------
questions = [
    "Can you tell me about your experience with Python?",
    "Describe your experience with machine learning.",
    "How do you approach debugging complex software issues?"
]

#  """I first reproduce the issue, review logs, and isolate the failing component. 
#     Then I use tools like `pdb`, print statements, and logging. 
#     If it's async or multi-threaded, I use `threading` and `concurrent.futures` to track flow. 
#     I write unit tests to prevent regression."""

candidate_answers = [
    """My experience with Python is quite extensive, spanning several years across various domains. 
    I've used Python for scripting, automation, and data analysis, leveraging libraries like Pandas, NumPy, and Matplotlib. 
    I also have experience with web frameworks like Flask and Django for building RESTful APIs. 
    My work often involves writing clean, maintainable code and adhering to best practices in software development. 
    Additionally, I am familiar with version control systems like Git and have contributed to open-source projects.""",

    """I’ve worked on multiple ML projects. One involved customer churn prediction using Random Forest and XGBoost. 
    I handled data cleaning, feature engineering, and model tuning. 
    I’ve also built NLP pipelines for sentiment analysis using TF-IDF + Logistic Regression and LSTM. 
    I prefer TensorFlow and scikit-learn for most projects.""",

   """My knowledge on this subject is very limited."""
]

In [27]:
# -------------------------
# Main Evaluation Loop
# -------------------------
results = []

for q, a in zip(questions, candidate_answers):
    ctx = retrieve_context(q, job_docs)
    comment, score = evaluate_answer_with_comment_and_score(q, a, ctx)
    results.append({
        "question": q,
        "answer": a,
        "context": ctx,
        "evaluation_comment": comment,
        "score": score
    })

Decoded output: I would rate the candidate's answer a 9 out of 10. The candidate's experience with Python is extensive and demonstrates their proficiency in various programming languages. They have a strong understanding of Python scripting, automation, and data analysis, and have experience with machine learning frameworks like scikit-learn, TensorFlow, and Keras. The candidate's work often involves writing clean, maintainable code and adhering to best practices in software development. Additionally, they are familiar with version control systems like Git and have contributed to open-source projects.
Extracted comment: the candidate's answer a 9 out of 10. The candidate's experience with Python is extensive and demonstrates their proficiency in various programming languages
Extracted score: 9
Decoded output: I would rate the candidate's answer as a 9 out of 10. The candidate's experience with machine learning is impressive and they have a strong understanding of the tools and techniqu

In [25]:
# -------------------------
# Print Results
# -------------------------
for idx, res in enumerate(results, 1):
    print("=" * 60)
    print(f"🔢 Question {idx}: {res['question']}\n")
    print(f"📝 Candidate Answer:\n{res['answer'].strip()}\n")
    print(f"📄 Retrieved Context:\n{res['context']}\n")
    print(f"🧠 Evaluation Comment:\n{res['evaluation_comment']}\n")
    print(f"📊 Score: {res['score']}\n")

🔢 Question 1: Can you tell me about your experience with Python?

📝 Candidate Answer:
My experience with Python is quite extensive, spanning several years across various domains. 
    I've used Python for scripting, automation, and data analysis, leveraging libraries like Pandas, NumPy, and Matplotlib. 
    I also have experience with web frameworks like Flask and Django for building RESTful APIs. 
    My work often involves writing clean, maintainable code and adhering to best practices in software development. 
    Additionally, I am familiar with version control systems like Git and have contributed to open-source projects.

📄 Retrieved Context:
Python scripting, automation, data analysis using Pandas, NumPy, Matplotlib. Experience with machine learning frameworks like scikit-learn, TensorFlow, Keras.

🧠 Evaluation Comment:
the candidate's answer a 9 out of 10. The candidate's experience with Python is extensive and demonstrates their proficiency in various programming languages

📊 