## An rag workflow for evaluating the performance of Performed interview.


In [71]:

import json
import os


def load_transcripts(file_path):
    """
    Load transcripts from a JSON file.

    Args:
        file_path (str): The path to the JSON file containing the transcripts.
    
    Returns:
        dict: A dictionary containing the transcripts.
    
    Raises:
        FileNotFoundError: If the file doesn't exist.
        json.JSONDecodeError: If the file contains invalid JSON.
    """
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")
        
        with open(file_path, 'r', encoding='utf-8') as f:
            transcripts = json.load(f)
            return transcripts
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        raise
    except Exception as e:
        print(f"Error loading transcripts: {e}")
        raise

print(load_transcripts(r'../../interviews/627dc248.json'))


{'tech_stack': 'html, css ,js', 'position': 'frontend developer', 'question_count': 5, 'difficulty': 'beginner', 'conversation_history': [{'role': 'interviewer', 'content': "Hello! I'm your AI interviewer for today's frontend developer interview.\n\nI see your tech stack includes: html, css ,js\n\nLet's start with something fundamental. Can you explain what html is and describe one project where you've used it effectively?"}, {'role': 'candidate', 'content': 'HTML (HyperText Markup Language) is the standard language used to structure content on the web, defining elements like headings, paragraphs, links, and images.It forms the backbone of every webpage, often paired with CSS for styling and JavaScript for interactivity.I used HTML effectively in a personal portfolio project, where I built a responsive multi-page site to showcase my development skills.It included semantic HTML tags for better accessibility and SEO.The project integrated images, forms, and navigation menus, giving a pro

In [48]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")


from typing_extensions import TypedDict
from langgraph.graph import StateGraph, END,START



from schema import (
    TechnicalSkillAssessment,
    ProblemSolvingInstance,
    EvaluationWorkFlowState,
    ConfidenceLevel,
    ProficiencyLevel
)


In [77]:
def llm1_technical_evaluator(state: EvaluationWorkFlowState) -> EvaluationWorkFlowState:
    """LLM 1: Technical Skills Evaluator - Updates technical fields in state"""
    print('LLM1 got called') 

    print(f'State = {state}')
    try:
        system_prompt = """You are a Senior Technical Interviewer specializing in evaluating technical skills and knowledge depth.

Your task is to analyze the interview conversation and assess the candidate's technical competencies.

Focus on:
1. Depth of technical understanding
2. Specific skills demonstrated (HTML, CSS, JavaScript, frameworks, etc.)
3. Quality of technical explanations
4. Knowledge gaps and areas needing improvement

Return your analysis in JSON format with the following structure:
{
    "technical_skills": [
        {
            "skill_name": "JavaScript",
            "proficiency_level": "intermediate",
            "evidence": ["Explained closures correctly", "Mentioned ES6 features"],
            "confidence": "high",
            "comments": "Good understanding shown"
        }
    ],
    "technical_consistency_score": 7,
    "technical_depth_score": 6,
    "technical_knowledge_gaps": ["Advanced React patterns", "Testing frameworks"],
    "technical_strengths": ["Strong JavaScript fundamentals", "Good understanding of async programming"]
}

IMPORTANT: 
- Use "evidence" not "evidance"
- Use "confidence" not "confidence_level" 
- Always include "comments" field for each skill
- proficiency_level must be one of: "beginner", "intermediate", "advanced", "expert"
- confidence must be one of: "low", "medium", "high", "very_high"

Be thorough but fair in your assessment."""

        human_prompt = f"""Analyze this interview conversation for technical skills:

Interview Data: {state.interview_data}

Return only valid JSON following the specified structure."""

        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]
        print('calling llm1')
        response = llm.invoke(messages)
        print("Raw LLM Response:")
        print(response.content)
        
        # Parse JSON response
        parser = JsonOutputParser()
        result = parser.parse(response.content)
        
        # Update state with technical evaluation results
        state.technical_skills = [TechnicalSkillAssessment(**skill) for skill in result.get("technical_skills", [])]
        state.technical_consistency_score = result.get("technical_consistency_score", 0)
        state.technical_depth_score = result.get("technical_depth_score", 0)
        state.technical_knowledge_gaps = result.get("technical_knowledge_gaps", [])
        state.technical_strengths = result.get("technical_strengths", [])
        
        state.current_step = "llm1_completed"
        
    except Exception as e:
        error_msg = f"LLM1 Technical Evaluator error: {str(e)}"
        state.errors.append(error_msg)
        print(f"Error: {error_msg}")
        
    return state

if __name__ == "__main__":
    # Load interview data
    interview_data2 = load_transcripts(r'../../interviews/1f4db5f1.json')
    print("Loaded interview data:")
    print(interview_data2)
    
    # Create an initial state with required fields
    initial_evaluation_state = {
        interview_data: interview_data2,
        current_step: "start",
        errors: [],  # Initialize empty errors list
        technical_skills: [],
        technical_consistency_score: 0,
        technical_depth_score: 0,
        technical_knowledge_gaps: [],
        technical_strengths: []
    }
    
    # Call the technical evaluator function
    print('calling llm with updated state')
    updated_state = llm1_technical_evaluator(initial_evaluation_state)
    print(updated_state)
    # Print results
    print("\n=== EVALUATION RESULTS ===")
    print("Technical Skills:")
    for skill in updated_state.technical_skills:
        print(f"  - {skill.skill_name}: {skill.proficiency_level}")
        print(f"    Confidence: {skill.confidence}")
        print(f"    Evidence: {skill.evidence}")
        print(f"    Comments: {skill.comments}")
        print()
    
    print("Technical Consistency Score:", updated_state.technical_consistency_score)
    print("Technical Depth Score:", updated_state.technical_depth_score)
    print("Knowledge Gaps:", updated_state.technical_knowledge_gaps)
    print("Technical Strengths:", updated_state.technical_strengths)
    # print("Errors:", updated_state.errors)

Loaded interview data:
{'tech_stack': 'langchain,langgraph,ai', 'position': 'ai engineering', 'question_count': 5, 'difficulty': 'beginner', 'conversation_history': [{'role': 'interviewer', 'content': "Hello! I'm your AI interviewer for today's ai engineering interview.\n\nI see your tech stack includes: langchain,langgraph,ai\n\nLet's start with something fundamental. Can you explain what langchain is and describe one project where you've used it effectively?"}, {'role': 'candidate', 'content': 'LangChain is a framework for building applications powered by large language models (LLMs), designed to make it easier to connect models with data sources, APIs, and workflows.It provides tools for prompt management, memory, chaining multiple model calls, and integrating with retrieval systems.I used LangChain effectively to build a retrieval-augmented chatbot that answered company policy questions for employees.It integrated with a vector database to fetch relevant documents before generating

NameError: name 'current_step' is not defined