## An rag workflow for evaluating the performance of Performed interview.


In [112]:

import json
import os


def load_transcripts(file_path):
    """
    Load transcripts from a JSON file.

    Args:
        file_path (str): The path to the JSON file containing the transcripts.
    
    Returns:
        dict: A dictionary containing the transcripts.
    
    Raises:
        FileNotFoundError: If the file doesn't exist.
        json.JSONDecodeError: If the file contains invalid JSON.
    """
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")
        
        with open(file_path, 'r', encoding='utf-8') as f:
            transcripts = json.load(f)
            return transcripts
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        raise
    except Exception as e:
        print(f"Error loading transcripts: {e}")
        raise

print(load_transcripts(r'../../interviews/627dc248.json'))


{'tech_stack': 'html, css ,js', 'position': 'frontend developer', 'question_count': 7, 'difficulty': 'beginner', 'conversation_history': [{'role': 'interviewer', 'content': "Hello! I'm your AI interviewer for today's frontend developer interview.\n\nI see your tech stack includes: html, css ,js\n\nLet's start with something fundamental. Can you explain what html is and describe one project where you've used it effectively?"}, {'role': 'candidate', 'content': 'HTML (HyperText Markup Language) is the standard language used to structure content on the web, defining elements like headings, paragraphs, links, and images.It forms the backbone of every webpage, often paired with CSS for styling and JavaScript for interactivity.I used HTML effectively in a personal portfolio project, where I built a responsive multi-page site to showcase my development skills.It included semantic HTML tags for better accessibility and SEO.The project integrated images, forms, and navigation menus, giving a pro

In [113]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")


from typing_extensions import TypedDict
from langgraph.graph import StateGraph, END,START



from schema import (
    TechnicalSkillAssessment,
    ProblemSolvingInstance,
    EvaluationWorkFlowState,
    ConfidenceLevel,
    ProficiencyLevel
)


In [114]:
def llm2_problem_solving_skill_evaluator(state:EvaluationWorkFlowState) -> EvaluationWorkFlowState:
    """Problem solving skill Evaluator"""

    print('llm2 got called')

    try:
        system_prompt = """
         You are a senior software developer specialized in evaluating problem solving abilities and implementation abilities.

        your task is to evaluate candidate's approach to a problem, ability to analyze it and quality of implemented solution.
        
        Focus on:
        1. Problem solving mehod and attempted solution
        2. Effectiveness of solution
        3. Ability to analising the problem and logical reasoning
        4. Quality of approach taken by the candidate
        5. Debugging potential and troubleshooting skills.
        6.Creativity in finding solutions

        Return your response in JSON structure following the format given below:
        {
            "problem_solving_instances":[{
                "problem_statement": "E-commerce real-time inventory challenge",
                "solution": "Implemented a live websocket based live inventory tracking system integrated with redis for fast cache updates",
                "approach_quality":9,
                "solution_effectiveness": 8,
                "reasoning_clarity":9
            }],
            "analytical_thinking_score": 7,
            "problem_solving_score:": 6,
            "debugging_potential_score": 7,
            "problem_solving_approach": "Systematic approach with consideration of real-world constraints",
            "comments_on_clarity_of_communication": "Communicates ideas clearly with well-structured explanations, though could occasionally benefit from more concise delivery."
        }
   
        Evaluate specific instances where the candidate solved problems or explained their approach."""

        human_prompt=f"""Analyze this interview conversation for problem-solving and implementation abilities:

Interview Data: {state["interview_data"]}

Return only valid JSON following the specified structure.
        
    """
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]

        response = llm.invoke(messages)

        print("Raw LLM Response:")
        print(response.content)

        parser = JsonOutputParser()
        result = parser.parse(response.content)

        state['problem_solving_instances'] = result.get("problem_solving_instances", [])
        state['analytical_thinking_score'] = result.get("analytical_thinking_score", 0)
        state['problem_solving_score'] = result.get("problem_solving_score", 0)
        state['debugging_potential_score'] = result.get("debugging_potential_score", 0)
        state['problem_solving_approach'] = result.get("problem_solving_approach", "")
        state["comments_on_clarity_of_communication"]= result.get("comments_on_clarity_of_communication","")
        state['current_step'] = "llm2_completed"
        print("LLM2 completed successfully!")

    except Exception as e:
        print(f"Error: {e}")
        raise

    return state

In [115]:
def llm1_technical_evaluator(state: EvaluationWorkFlowState) -> EvaluationWorkFlowState:
    """LLM 1: Technical Skills Evaluator - Updates technical fields in state"""
    print('LLM1 got called') 

    # print(f'State = {state['interview_data']}')
    try:
        system_prompt = """You are a Senior Technical Interviewer specializing in evaluating technical skills and knowledge 
        depth.

Your task is to analyze the interview conversation and assess the candidate's technical competencies.

Focus on:
1. Depth of technical understanding
2. Specific skills demonstrated (HTML, CSS, JavaScript, frameworks, etc.)
3. Quality of technical explanations
4. Knowledge gaps and areas needing improvement

Return your analysis in JSON format with the following structure:
{
    "position_evaluated_for":"Frontend developer",
    "technical_skills": [
        {
            "skill_name": "JavaScript",
            "proficiency_level": "intermediate",
            "evidence": ["Explained closures correctly", "Mentioned ES6 features"],
            "confidence": "high",
            "comments": "Good understanding shown"
        }
    ],
    "technical_consistency_score": 7,
    "technical_depth_score": 6,
    "technical_knowledge_gaps": ["Advanced React patterns", "Testing frameworks"],
    "technical_strengths": ["Strong JavaScript fundamentals", "Good understanding of async programming"]
}

IMPORTANT: 
- Use "evidence" not "evidance"
- Use "confidence" not "confidence_level" 
- Always include "comments" field for each skill
- proficiency_level must be one of: "beginner", "intermediate", "advanced", "expert"
- confidence must be one of: "low", "medium", "high", "very_high"

Be thorough but fair in your assessment."""

        human_prompt = f"""Analyze this interview conversation for technical skills:

Interview Data: {state['interview_data']}

Return only valid JSON following the specified structure."""

        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]
        print('calling llm1')
        response = llm.invoke(messages)
        print("Raw LLM Response:")
        print(response.content)
        
        # Parse JSON response
        parser = JsonOutputParser()
        result = parser.parse(response.content)
        
        # Update state with technical evaluation results using bracket notation
        state["position_evaluated_for"] = result.get("position_evaluated_for","Frontend Developer")
        state['technical_skills'] = result.get("technical_skills", [])
        state['technical_consistency_score'] = result.get("technical_consistency_score", 0)
        state['technical_depth_score'] = result.get("technical_depth_score", 0)
        state['technical_knowledge_gaps'] = result.get("technical_knowledge_gaps", [])
        state['technical_strengths'] = result.get("technical_strengths", [])
        state['current_step'] = "llm1_completed"
        
        print("LLM1 completed successfully!")
        
    except KeyError as e:
        error_msg = f"LLM1 Key error - missing field: {str(e)}"
        state['errors'].append(error_msg)
        print(f"Error: {error_msg}")
        
    except json.JSONDecodeError as e:
        error_msg = f"LLM1 JSON parsing error: {str(e)}"
        state['errors'].append(error_msg)
        print(f"Error: {error_msg}")
        
    except Exception as e:
        error_msg = f"LLM1 Technical Evaluator error: {str(e)}"
        state['errors'].append(error_msg)
        print(f"Error: {error_msg}")
        
    return state




In [126]:
def aggregator(state:EvaluationWorkFlowState)->EvaluationWorkFlowState:
    """Aggregator: Synthesizes all evaluations into final comprehensive assessment"""   

    print('aggregator called')
    try:

        required_fields=[
            len(state["technical_skills"])>0,
            state["technical_consistency_score"]>0
        ]

        if not all(required_fields):
            state['errors'].append("Aggregator: Not all LLM evaluations completed successfully")
            return state


        evaluation_summary = {
            "technical_evaluation": {
                "position": state["position_evaluated_for"],
                "skills": state["technical_skills"],
                "consistency_score": state["technical_consistency_score"],
                "depth_score": state["technical_depth_score"],
                "gaps": state["technical_knowledge_gaps"],
                "strengths": state["technical_strengths"]
            },
            "problem_solving_evaluation": {
                "instances": state["problem_solving_instances"],  # Remove .dict() call
                "analytical_score": state["analytical_thinking_score"],
                "debugging_score": state["debugging_potential_score"],
                "approach": state["problem_solving_approach"],
                "overall_score": state["problem_solving_score"],
                "comments_on_clarity_of_communication": state["comments_on_clarity_of_communication"]
            },
            "original_interview": state["interview_data"],
            "position": state["position_evaluated_for"]
        }


        system_prompt="""
        You are a Senior Hiring Manager with expertise in technical recruitment and candidate assessment.

    Your task is to synthesize evaluations from three specialist areas into a comprehensive final assessment.

    Create a final evaluation that:
    - Calculates weighted overall score (0-10 scale)
    - Identifies key strengths and critical weaknesses  
    - Provides clear comments on communication clarity
    - Gives actionable recommendations




    "overall_score": 7.2,
    "key_strengths": ["Strong JavaScript fundamentals", "Good problem-solving approach"],
    "critical_weaknesses": ["Limited React experience", "No testing knowledge"],
    "evaluation_timestamp": "2024-01-15T10:30:00",
    "candidate_id": "CAND_001"
}

Be thorough, fair, and constructive. Focus only on the fields that will be used.

        """


        human_prompt = f"""Synthesize this comprehensive evaluation data into a final assessment:

        Evaluation Data: {json.dumps(evaluation_summary, indent=2)}

        Position: {state["position_evaluated_for"]}

        Calculate the overall score using these weights:
        - Technical Skills (40%): Based on technical_consistency_score and technical_depth_score
        - Problem Solving (35%): Based on analytical_thinking_score and problem_solving_score  
        - Communication (25%): Based on comments_on_clarity_of_communication
        Return only valid JSON following the specified structure."""


        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]


        response = llm.invoke(messages)
        
        # Parse JSON response
        parser = JsonOutputParser()
        result = parser.parse(response.content)
        
        print(result)

        # Update state with only fields that exist in the schema
        state["overall_score"] = result.get("overall_score", 0.0)
        state["key_strengths"] = result.get("key_strengths", [])
        state["critical_weaknesses"] = result.get("critical_weaknesses", [])
        
        # Set metadata
        from datetime import datetime
        state["evaluation_timestamp"] = datetime.now().isoformat()
        state["candidate_id"] = state["interview_data"].get("candidate_id", "unknown")
        
        state["current_step"] = "completed"
        
    except Exception as e:
        state["errors"].append(f"Aggregator error: {str(e)}")
        
    return state



In [127]:
if __name__ == "__main__":
    # Load interview data
    interview_data2 = load_transcripts(r'../../interviews/1f4db5f1.json')
    print("Loaded interview data:")
    print(interview_data2)
    
    # Create an initial state with required fields
    initial_evaluation_state = {
        "interview_data": interview_data2,
        "current_step": "start",
        "errors": [],  # Initialize empty errors list
        "technical_skills": [],
        "technical_consistency_score": 0,
        "technical_depth_score": 0,
        "technical_knowledge_gaps": [],
        "technical_strengths": []
    }
    
    # Call the technical evaluator function
    print('calling llm with updated state')
    state_after_technical_evaluation_update = llm1_technical_evaluator(initial_evaluation_state)
    state_after_problem_solving_skill_evaluation_update = llm2_problem_solving_skill_evaluator(state_after_technical_evaluation_update)
    state_after_full_evaluation = aggregator(state_after_problem_solving_skill_evaluation_update)

    print("State after technical evaluation update:")


print("Technical Skills:")
for skill in state_after_technical_evaluation_update['technical_skills']:  # Changed from updated_state.technical_skills
    print(f"  - {skill['skill_name']}: {skill['proficiency_level']}")  # Changed to bracket notation
    print(f"    Confidence: {skill['confidence']}")
    print(f"    Evidence: {skill['evidence']}")
    print(f"    Comments: {skill['comments']}")
    print()

print("position_evaluated_for",state_after_technical_evaluation_update["position_evaluated_for"])
print("Technical Consistency Score:", state_after_technical_evaluation_update['technical_consistency_score'])  # Changed to bracket notation
print("Technical Depth Score:", state_after_technical_evaluation_update['technical_depth_score'])  # Changed to bracket notation
print("Knowledge Gaps:", state_after_technical_evaluation_update['technical_knowledge_gaps'])  # Changed to bracket notation
print("Technical Strengths:", state_after_technical_evaluation_update['technical_strengths'])  # Changed to bracket notation
print("Errors:", state_after_technical_evaluation_update['errors'])  # Changed to bracket notation

print('-'*60)
print("State after problem_solving evaluation update:")

for skill in state_after_problem_solving_skill_evaluation_update['problem_solving_instances']:
    print(f"  - {skill['problem_statement']}: {skill['approach_quality']}")  # Changed to bracket notation
    print(f"    Solution: {skill['solution']}")
    print(f"    Confidence: {skill['solution_effectiveness']}")
    print(f"    Evidence: {skill['reasoning_clarity']}")
    print()


print(" problem_solving_score : ",state_after_problem_solving_skill_evaluation_update["problem_solving_score"])
print(" analytical thinking score : ",state_after_problem_solving_skill_evaluation_update['analytical_thinking_score'])
print("debugging_potential_score:",state_after_problem_solving_skill_evaluation_update['debugging_potential_score']  )
print("problem_solving_approach:",state_after_problem_solving_skill_evaluation_update['problem_solving_approach']  )
print("comments_on_clarity_of_communication",state_after_problem_solving_skill_evaluation_update["comments_on_clarity_of_communication"])
print("Errors:", state_after_problem_solving_skill_evaluation_update['errors'])  # Changed to bracket notation


print("overall_score : ",state_after_full_evaluation["overall_score"])
print("key_strengths : ",state_after_full_evaluation['key_strengths'])
print("critical_weaknesses:",state_after_full_evaluation['critical_weaknesses']  )
print("evaluation_timestamp:",state_after_full_evaluation['evaluation_timestamp']  )
print("candidate_id:", state_after_full_evaluation['candidate_id'])  # Changed to bracket notation
print("Errors:", state_after_full_evaluation['errors'])  # Changed to bracket notation


# print("overall_score:", state_after_full_evaluation.overall_score)
# print("key_strengths:", state_after_full_evaluation.key_strengths)
# print("critical_weaknesses:", state_after_full_evaluation.critical_weaknesses)



Loaded interview data:
{'tech_stack': 'langchain,langgraph,ai', 'position': 'ai engineering', 'question_count': 5, 'difficulty': 'beginner', 'conversation_history': [{'role': 'interviewer', 'content': "Hello! I'm your AI interviewer for today's ai engineering interview.\n\nI see your tech stack includes: langchain,langgraph,ai\n\nLet's start with something fundamental. Can you explain what langchain is and describe one project where you've used it effectively?"}, {'role': 'candidate', 'content': 'LangChain is a framework for building applications powered by large language models (LLMs), designed to make it easier to connect models with data sources, APIs, and workflows.It provides tools for prompt management, memory, chaining multiple model calls, and integrating with retrieval systems.I used LangChain effectively to build a retrieval-augmented chatbot that answered company policy questions for employees.It integrated with a vector database to fetch relevant documents before generating

AttributeError: 'dict' object has no attribute 'overall_score'