In [1]:
import pandas as pd
import numpy as np
import json
import re
import os
from sklearn.metrics import jaccard_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from textstat import flesch_reading_ease, flesch_kincaid_grade
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from datetime import datetime
import openai
import time
from typing import List, Dict, Any
import warnings
from dotenv import load_dotenv
warnings.filterwarnings('ignore')

In [2]:
# Load environment variables from .env file
load_dotenv()

True

In [3]:
class LLMChatAnalyzer:
    """
    Advanced analyzer for GPT conversations using LLM-based contextual understanding 
    with experimental context awareness.
    
    This class provides comprehensive analysis of user-GPT conversations including:
    - True contextual concept detection using GPT-4o-mini
    - Distinction between authentic user engagement and data-enhanced assistant analysis
    - User engagement pattern analysis with experimental context consideration
    - Conversation flow and coherence evaluation
    - Energy domain-specific concept attribution with explanations
    
    Unlike embedding-based approaches, this analyzer uses language models to actually
    understand the context and meaning of conversations, providing explainable results
    while accounting for experimental data provision.
    """
    
    def __init__(self, api_key: str = None, analysis_model: str = "gpt-4o-mini"):
        """
        Initialize the analyzer with OpenAI's chat completion model for concept analysis.
        
        Args:
            api_key (str): OpenAI API key. If None, will try to get from .env file or environment variable OPENAI_API_KEY
            analysis_model (str): OpenAI chat model to use for analysis. Options:
                                 - 'gpt-4o-mini': Fast, cost-effective, good performance (recommended)
                                 - 'gpt-4o': Slower, more expensive, best performance
                                 - 'gpt-4': High quality but more expensive
                                 - 'gpt-3.5-turbo': Fastest and cheapest, decent performance
        
        The analyzer uses structured prompting with multi-factor confidence scoring to understand conversation content contextually,
        while distinguishing between authentic user engagement and data-enhanced assistant analysis.
        """
        # Load API key from .env file, parameter, or environment variable
        if api_key:
            openai.api_key = api_key
        else:
            # Try to get API key from .env file first, then environment variable
            api_key = os.getenv('OPENAI_API_KEY')
            if not api_key:
                raise ValueError(
                    "OpenAI API key not found. Please either:\n"
                    "1. Create a .env file with OPENAI_API_KEY=your-key-here\n"
                    "2. Set OPENAI_API_KEY environment variable\n"
                    "3. Pass api_key parameter to the constructor"
                )
            openai.api_key = api_key
        
        self.client = openai.OpenAI(api_key=api_key)
        self.analysis_model = analysis_model
        
        print(f"🧠 Initialized with OpenAI chat model: {analysis_model}")
        print(f"🔑 API key loaded successfully from {'parameter' if api_key else '.env file/environment'}")
        print(f"📊 Using experimental context-aware multi-factor confidence scoring framework")
        
        # === EXPERIMENTAL CONTEXT-AWARE MULTI-FACTOR SCORING FRAMEWORK ===
        self.scoring_factors = {
            "explicitness": {
                "weight": 0.30,
                "description": "How directly and clearly the concept is mentioned using appropriate terminology",
                "user_criteria": {
                    1.0: "Domain terminology used correctly in context (energy bills, HVAC, kWh)",
                    0.8: "General energy terms used appropriately in context",
                    0.6: "Related terms that imply concept understanding", 
                    0.4: "Indirect references requiring some inference",
                    0.2: "Vague connection or unclear terminology",
                    0.0: "No mention or connection"
                },
                "assistant_criteria": {
                    1.0: "Precise domain terminology with technical accuracy (kWh, TOU rates, HVAC)",
                    0.8: "Appropriate technical terms with clear explanations",
                    0.6: "General energy terminology used correctly",
                    0.4: "Basic energy terms with some accuracy", 
                    0.2: "Imprecise or unclear terminology",
                    0.0: "No meaningful terminology used"
                }
            },
            "depth": {
                "weight": 0.25,
                "description": "Quality and authenticity of engagement with the concept",
                "user_criteria": {
                    1.0: "Deep contextual engagement with detailed reasoning, constraints, or comprehensive understanding",
                    0.8: "Substantial contextual engagement with clear reasoning or thoughtful analysis",
                    0.6: "Moderate contextual engagement with some reasoning or understanding",
                    0.4: "Basic contextual engagement with minimal reasoning or surface-level understanding", 
                    0.2: "Shallow engagement with little reasoning or very limited understanding",
                    0.0: "No meaningful engagement demonstrated"
                },
                "assistant_criteria": {
                    1.0: "Comprehensive multi-faceted analysis examining multiple dimensions of the concept",
                    0.8: "Thorough analysis exploring several aspects or implications of the concept",
                    0.6: "Moderate analysis covering key aspects with reasonable detail",
                    0.4: "Basic analysis touching on main points with limited development",
                    0.2: "Superficial analysis with minimal exploration or development",
                    0.0: "No meaningful analytical processing demonstrated"
                }
            },
            "consideration": {
                "weight": 0.25,
                "description": "Whether the concept was meaningfully present in the participant's thinking",
                "user_criteria": {
                    1.0: "Concept clearly influences user's decisions, preferences, or thinking",
                    0.8: "Concept is actively considered in relation to user's situation",
                    0.6: "Concept is acknowledged and shows contextual relevance",
                    0.4: "Concept is mentioned with minimal contextual connection",
                    0.2: "Concept is barely acknowledged or referenced",
                    0.0: "Concept is not considered in user's thinking"
                },
                "assistant_criteria": {
                    1.0: "Concept is fully integrated into analysis and recommendations",
                    0.8: "Concept is clearly incorporated into response strategy", 
                    0.6: "Concept is meaningfully addressed in the analysis",
                    0.4: "Concept is mentioned but not well integrated",
                    0.2: "Concept is briefly touched upon",
                    0.0: "Concept is not considered in the response"
                }
            },
            "evidence": {
                "weight": 0.20,
                "description": "Quality and authenticity of supporting evidence provided",
                "user_criteria": {
                    1.0: "Contextual examples, specific constraints, or experiential details",
                    0.8: "Clear situational context or constraints with details",
                    0.6: "General contextual examples or reasonable situational context",
                    0.4: "Some contextual information or basic examples",
                    0.2: "Minimal supporting contextual information",
                    0.0: "No contextual evidence or examples provided"
                },
                "assistant_criteria": {
                    1.0: "Multiple high-quality sources: quantitative data + domain expertise + specific examples",
                    0.8: "Strong primary source with additional supporting information",
                    0.6: "Solid single source with reasonable supporting details",
                    0.4: "Basic source material with minimal additional support",
                    0.2: "Weak or limited source material with little support",
                    0.0: "No credible supporting information provided"
                }
            }
        }

        # === ENERGY-SPECIFIC CONCEPT DEFINITIONS ===
        # These definitions guide the LLM in understanding what to look for
        self.energy_concepts = {
            "energy_consumption": {
                "description": "Discussion of circuit or appliance energy consumption data, or analysis of how much energy circuits or appliances in a household consume.",
                "examples": [
                    "HVAC unit used 5000 kWh per month",
                    "The pool pump was consuming the most energy in July",
                    "I want to analyze my energy usage of appliances",
                    "What is the energy consumption of my refrigerator?"
                    "Which appliances used the most energy?"
                ]
            },
            "cost_awareness": {
                "description": "Discussion of electricity bills, energy costs, utility rates, time-of-use pricing, cost savings, or financial impact of energy decisions.",
                "examples": [
                    "Your electricity bill was $150 last month",
                    "Peak hours are more expensive",
                    "I want to reduce my energy bills",
                    "Please consider time-of-use rates in your calculations",
                    "How much will this save on my electricity bill?"
                ]
            },
            "behavioral_change": {
                "description": "Discussion of adjusting usage patterns, adopting energy-saving behaviors, modifying habits, or actionable steps to reduce energy consumption.",
                "examples": [
                    "I suggest running the dishwasher at night",
                    "Your HVAC running times can be adjusted to off-peak hours",
                    "Let's evalaute how I can change my energy usage patterns to reduce energy bills",
                    "I can't change my energy use patterns for the refrigerator",
                    "I'm willing to adjust my thermostat settings up to 2 degree Fahrenheit"
                ]
            },
            "use_flexibility": {
                "description": "Discussion of how regularly or irregularly appliances are used, flexibility of operation, patterns of appliance operation throughout different time periods.",
                "examples": [
                    "Dishwasher is used irregularly throughout the day",
                    "Let's see if I am using my microwave at regular times"
                ]
            },
            "use_frequency": {
                "description": "Discussion of how often appliances are used, frequency of operation.",
                "examples": [
                    "HVAC is running consistently during peak hours",
                    "I run my dishwasher every day",
                    "The pool pump operates 6 hours daily",
                    "We rarely use the oven",
                ]
            },
            "comfort_association": {
                "description": "Discussion of comfort or convenience issues when appliances are used in different times.",
                "examples": [
                    "I don't want to change my thermostat settings too much",
                    "I should not run the dishwasher at night because it makes noise",
                    "This would impact my daily routine"
                ]
            },
            "technical_knowledge": {
                "description": "Discussion showing understanding of how appliances work, system operations, equipment specifications, or technical aspects of energy use.",
                "examples": [
                    "Heat pumps are more efficient than traditional HVAC systems like furnaces",
                    "Smart thermostats can optimize energy use through scheduling and sensors",
                    "Variable-speed pumps adjust their speed based on demand",
                    "Microwaves provide quick heating but can be inefficient for large meals"
                ]
            }
        }
        
        # === CONVERSATION INTERACTION PATTERNS (Updated for Experimental Context) ===
        self.engagement_patterns = {
            "information_seeking": {
                "description": "Directive speech acts aimed at obtaining information, clarification, or elaboration.",
                "examples": [
                    "Can you explain that in more detail?",
                    "How does that work?",
                    "What are the specific steps to implement this?"
                ],
                "theoretical_basis": "Searle's Directives - utterances intended to get the listener to provide information or explanation"
            },
            "constraint_articulation": {
                "description": "Assertive speech acts that state personal limitations, boundaries, or situational constraints.",
                "examples": [
                    "I'm sensitive to being cold",
                    "That's not feasible for my situation",
                    "I don't want to change my mealtime"
                ],
                "theoretical_basis": "Searle's Assertives - statements that convey information about the speaker's situation or limitations"
            },
            "solution_evaluation": {
                "description": "Assertive speech acts that express judgments, assessments, or evaluations of proposed solutions.",
                "examples": [
                    "The proposed method seems reasonable",
                    "I think the oven idea makes sense",
                    "Electric vehicle charging at night is an excellent idea",
                    "This would be effective for my situation"
                ],
                "theoretical_basis": "Searle's Assertives - statements that assess or evaluate the truth, feasibility, or value of proposals"
            },
            "commitment_expression": {
                "description": "Commissive speech acts that express willingness to try solutions or commit to behavioral changes.",
                "examples": [
                    "I'm willing to adjust my thermostat settings",
                    "I can reschedule when I run my dishwasher",
                    "I'll try using ceiling fans to comfort myself",
                    "I'll implement this change gradually"
                ],
                "theoretical_basis": "Searle's Commissives - utterances that commit the speaker to future courses of action"
            }
        }
    
    def analyze_concepts_with_llm(self, conversation_text: str, participant: str = "both", max_retries: int = 3) -> Dict[str, Any]:
        """
        Analyze conversation for energy concepts using LLM contextual understanding with experimental context awareness.
        
        Args:
            conversation_text (str): The conversation text to analyze
            participant (str): Which participant to analyze ('user', 'assistant', or 'both')
            max_retries (int): Maximum number of retry attempts for API calls
            
        Returns:
            Dict[str, Any]: Analysis results with detected concepts, confidence, evidence, and reasoning
        """
        
        # Create comprehensive prompt for concept detection
        prompt = self._create_analysis_prompt(conversation_text, participant)
        
        for attempt in range(max_retries):
            try:
                print(f"🧠 Analyzing concepts with {self.analysis_model} (experimental context-aware)...")
                
                response = self.client.chat.completions.create(
                    model=self.analysis_model,
                    messages=[
                        {
                            "role": "system", 
                            "content": "You are an expert analyst specializing in home energy efficiency conversations in experimental settings. You understand that users are provided with energy data by researchers, and you distinguish between authentic user engagement and data-enhanced assistant analysis. Analyze conversations for specific energy-related concepts with high accuracy and provide detailed explanations."
                        },
                        {
                            "role": "user", 
                            "content": prompt
                        }
                    ],
                    temperature=0.1,  # Low temperature for consistent analysis
                    response_format={"type": "json_object"}  # Ensure JSON response
                )
                
                # Parse the response
                analysis_result = json.loads(response.choices[0].message.content)
                return self._process_llm_response(analysis_result, participant)
                
            except json.JSONDecodeError as e:
                print(f"⚠️ JSON parsing error (attempt {attempt + 1}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(1)
                else:
                    print("❌ Failed to parse LLM response after multiple attempts")
                    return self._create_empty_response(participant)
                    
            except openai.RateLimitError:
                if attempt < max_retries - 1:
                    wait_time = (2 ** attempt)
                    print(f"⏳ Rate limit hit, waiting {wait_time} seconds...")
                    time.sleep(wait_time)
                else:
                    raise
                    
            except Exception as e:
                if attempt < max_retries - 1:
                    print(f"⚠️ API error (attempt {attempt + 1}): {e}")
                    time.sleep(1)
                else:
                    raise
    
    def _create_analysis_prompt(self, conversation_text: str, participant: str) -> str:
        """Create a structured prompt for LLM concept analysis with experimental context awareness."""
        
        # Create concept descriptions for the prompt
        energy_concepts_desc = "\n".join([
            f"- {name.replace('_', ' ').title()}: {info['description']}"
            for name, info in self.energy_concepts.items()
        ])
        
        engagement_patterns_desc = "\n".join([
            f"- {name.replace('_', ' ').title()}: {info['description']}"
            for name, info in self.engagement_patterns.items()
        ])
        
        participant_instruction = {
            "user": "Focus only on what the user (human participant) says and discusses. Remember that users have been provided with energy data by researchers, so focus on their authentic engagement, personal constraints, preferences, and how they apply or respond to information rather than their data recall ability.",
            "assistant": "Focus only on what ChatGPT/assistant says and discusses. Evaluate how well the assistant analyzes provided experimental data, adds domain expertise, and provides practical guidance beyond just restating data.",
            "both": "Analyze the entire conversation including both participants, but apply different criteria for user (authentic engagement) vs assistant (data analysis quality)."
        }[participant]
        
        # Create scoring framework description with participant-specific criteria
        scoring_framework = ""
        for factor_name, factor_info in self.scoring_factors.items():
            scoring_framework += f"\n{factor_name.upper()} (Weight: {factor_info['weight']}) - {factor_info['description']}:\n"
            
            if participant in ["user", "both"]:
                scoring_framework += "USER CRITERIA:\n"
                for score, description in factor_info['user_criteria'].items():
                    scoring_framework += f"  {score}: {description}\n"
            
            if participant in ["assistant", "both"]:
                scoring_framework += "ASSISTANT CRITERIA:\n"
                for score, description in factor_info['assistant_criteria'].items():
                    scoring_framework += f"  {score}: {description}\n"
            
            scoring_framework += "\n"
        
        prompt = f"""
Please analyze this conversation for the following concepts using an experimental context-aware multi-factor scoring approach. 

IMPORTANT EXPERIMENTAL CONTEXT:
- This is an experimental setting where users have been provided with energy data files by researchers
- Users are not expected to know specific energy data from memory
- Focus on AUTHENTIC USER ENGAGEMENT vs DATA-ENHANCED ASSISTANT ANALYSIS
- Distinguish between personal insights/constraints and data-driven analysis

{participant_instruction}

ENERGY CONCEPTS TO DETECT:
{energy_concepts_desc}

INTERACTION PATTERNS TO DETECT:
{engagement_patterns_desc}

EXPERIMENTAL CONTEXT-AWARE SCORING FRAMEWORK:
{scoring_framework}

CONVERSATION TO ANALYZE:
{conversation_text}

For each concept (both energy concepts and interaction patterns):

1. Determine if the concept was meaningfully discussed (not just mentioned in passing)
2. Apply the appropriate criteria (USER vs ASSISTANT) based on who is being analyzed
3. Score each of the 4 factors (explicitness, depth, consideration, evidence) from 0.0 to 1.0
4. Calculate weighted confidence: (explicitness × 0.30) + (depth × 0.25) + (consideration × 0.25) + (evidence × 0.20)
5. Provide specific evidence from the conversation
6. Give brief reasoning for each factor score, noting whether it's authentic engagement or data analysis

Provide your analysis in this exact JSON format:
{{
    "energy_concepts": {{
        "energy_consumption": {{
            "detected": true/false,
            "factor_scores": {{
                "explicitness": {{
                    "score": 0.0-1.0,
                    "justification": "Brief explanation considering experimental context"
                }},
                "depth": {{
                    "score": 0.0-1.0,
                    "justification": "Brief explanation distinguishing authentic vs data-driven depth"
                }},
                "consideration": {{
                    "score": 0.0-1.0,
                    "justification": "Brief explanation of role in participant's conversation focus"
                }},
                "evidence": {{
                    "score": 0.0-1.0,
                    "justification": "Brief explanation of evidence type and quality"
                }}
            }},
            "weighted_confidence": "calculated weighted score",
            "evidence_quote": "Direct quote from conversation supporting detection",
            "overall_reasoning": "Summary considering experimental context and authentic vs data-driven analysis",
            "analysis_type": "authentic_engagement/data_analysis/mixed"
        }},
        ...other energy concepts...
    }},
    "engagement_patterns": {{
        ...interaction patterns with same structure...
    }},
    "summary": {{
        "total_energy_concepts_detected": number,
        "total_engagement_patterns_detected": number,
        "avg_confidence_energy": 0.0-1.0,
        "avg_confidence_interaction": 0.0-1.0,
        "conversation_focus": "Brief description of main topics",
        "scoring_method": "experimental_context_aware_multi_factor",
        "participant_role": "user/assistant/both"
    }}
}}

Be thorough and precise. Apply experimental context-aware criteria consistently. Only mark concepts as detected if they meet a reasonable threshold (typically weighted_confidence ≥ 0.4). Remember to distinguish between authentic user engagement and data-enhanced assistant analysis.
"""
        
        return prompt
    
    def _process_llm_response(self, analysis_result: Dict[str, Any], participant: str) -> Dict[str, Any]:
        """Process and validate LLM analysis response with experimental context awareness."""
        
        processed_result = {
            "energy_concepts": {},
            "engagement_patterns": {},
            "summary": analysis_result.get("summary", {}),
            "analysis_method": "experimental_context_aware_llm_multi_factor",
            "participant_analyzed": participant
        }
        
        # Process energy concepts
        energy_results = analysis_result.get("energy_concepts", {})
        for concept_name in self.energy_concepts.keys():
            concept_result = energy_results.get(concept_name, {})
            
            # Extract factor scores
            factor_scores = concept_result.get("factor_scores", {})
            
            # Calculate or extract weighted confidence
            weighted_confidence = concept_result.get("weighted_confidence", 0.0)
            if isinstance(weighted_confidence, str):
                # If it's a string, try to extract the number
                try:
                    weighted_confidence = float(re.findall(r'\d+\.?\d*', weighted_confidence)[0])
                except:
                    weighted_confidence = 0.0
            
            # If weighted_confidence is 0 or missing, calculate it from factor scores
            if weighted_confidence == 0.0 and factor_scores:
                explicitness = factor_scores.get("explicitness", {}).get("score", 0.0)
                depth = factor_scores.get("depth", {}).get("score", 0.0)
                consideration = factor_scores.get("consideration", {}).get("score", 0.0)
                evidence = factor_scores.get("evidence", {}).get("score", 0.0)
                
                weighted_confidence = (
                    explicitness * self.scoring_factors["explicitness"]["weight"] +
                    depth * self.scoring_factors["depth"]["weight"] +
                    consideration * self.scoring_factors["consideration"]["weight"] +
                    evidence * self.scoring_factors["evidence"]["weight"]
                )
            
            processed_result["energy_concepts"][f"energy_{concept_name}"] = {
                "detected": concept_result.get("detected", False),
                "confidence": float(weighted_confidence),
                "factor_scores": {
                    "explicitness": {
                        "score": float(factor_scores.get("explicitness", {}).get("score", 0.0)),
                        "justification": factor_scores.get("explicitness", {}).get("justification", "")
                    },
                    "depth": {
                        "score": float(factor_scores.get("depth", {}).get("score", 0.0)),
                        "justification": factor_scores.get("depth", {}).get("justification", "")
                    },
                    "consideration": {
                        "score": float(factor_scores.get("consideration", {}).get("score", 0.0)),
                        "justification": factor_scores.get("consideration", {}).get("justification", "")
                    },
                    "evidence": {
                        "score": float(factor_scores.get("evidence", {}).get("score", 0.0)),
                        "justification": factor_scores.get("evidence", {}).get("justification", "")
                    }
                },
                "evidence_quote": concept_result.get("evidence_quote", ""),
                "overall_reasoning": concept_result.get("overall_reasoning", ""),
                "analysis_type": concept_result.get("analysis_type", "unknown"),
                "scoring_method": "experimental_context_aware_multi_factor",
                "participant_analyzed": participant,
                "original_llm_detection": concept_result.get("original_llm_detection", False),
                "threshold_applied": concept_result.get("threshold_applied", 0.4),
                "threshold_override": concept_result.get("threshold_override", False)
            }
        
        # Process interaction patterns (similar structure)
        interaction_results = analysis_result.get("engagement_patterns", {})
        for pattern_name in self.engagement_patterns.keys():
            pattern_result = interaction_results.get(pattern_name, {})
            
            # Extract factor scores
            factor_scores = pattern_result.get("factor_scores", {})
            
            # Calculate or extract weighted confidence
            weighted_confidence = pattern_result.get("weighted_confidence", 0.0)
            if isinstance(weighted_confidence, str):
                try:
                    weighted_confidence = float(re.findall(r'\d+\.?\d*', weighted_confidence)[0])
                except:
                    weighted_confidence = 0.0
            
            # Calculate if missing
            if weighted_confidence == 0.0 and factor_scores:
                explicitness = factor_scores.get("explicitness", {}).get("score", 0.0)
                depth = factor_scores.get("depth", {}).get("score", 0.0)
                consideration = factor_scores.get("consideration", {}).get("score", 0.0)
                evidence = factor_scores.get("evidence", {}).get("score", 0.0)
                
                weighted_confidence = (
                    explicitness * self.scoring_factors["explicitness"]["weight"] +
                    depth * self.scoring_factors["depth"]["weight"] +
                    consideration * self.scoring_factors["consideration"]["weight"] +
                    evidence * self.scoring_factors["evidence"]["weight"]
                )
            
            processed_result["engagement_patterns"][f"interaction_{pattern_name}"] = {
                "detected": pattern_result.get("detected", False),
                "confidence": float(weighted_confidence),
                "factor_scores": {
                    "explicitness": {
                        "score": float(factor_scores.get("explicitness", {}).get("score", 0.0)),
                        "justification": factor_scores.get("explicitness", {}).get("justification", "")
                    },
                    "depth": {
                        "score": float(factor_scores.get("depth", {}).get("score", 0.0)),
                        "justification": factor_scores.get("depth", {}).get("justification", "")
                    },
                    "consideration": {
                        "score": float(factor_scores.get("consideration", {}).get("score", 0.0)),
                        "justification": factor_scores.get("consideration", {}).get("justification", "")
                    },
                    "evidence": {
                        "score": float(factor_scores.get("evidence", {}).get("score", 0.0)),
                        "justification": factor_scores.get("evidence", {}).get("justification", "")
                    }
                },
                "evidence_quote": pattern_result.get("evidence_quote", ""),
                "overall_reasoning": pattern_result.get("overall_reasoning", ""),
                "analysis_type": pattern_result.get("analysis_type", "unknown"),
                "scoring_method": "experimental_context_aware_multi_factor",
                "participant_analyzed": participant
            }
        
        return processed_result
    
    def _create_empty_response(self, participant: str) -> Dict[str, Any]:
        """Create empty response structure when LLM analysis fails."""
        
        empty_factor_scores = {
            "explicitness": {"score": 0.0, "justification": "Analysis failed"},
            "depth": {"score": 0.0, "justification": "Analysis failed"},
            "consideration": {"score": 0.0, "justification": "Analysis failed"},
            "evidence": {"score": 0.0, "justification": "Analysis failed"}
        }
        
        empty_result = {
            "energy_concepts": {},
            "engagement_patterns": {},
            "summary": {
                "total_energy_concepts_detected": 0,
                "total_engagement_patterns_detected": 0,
                "avg_confidence_energy": 0.0,
                "avg_confidence_interaction": 0.0,
                "conversation_focus": "Analysis failed",
                "scoring_method": "experimental_context_aware_multi_factor",
                "participant_role": participant
            },
            "analysis_method": "experimental_context_aware_llm_failed",
            "participant_analyzed": participant
        }
        
        # Add empty entries for all concepts
        for concept_name in self.energy_concepts.keys():
            empty_result["energy_concepts"][f"energy_{concept_name}"] = {
                "detected": False,
                "confidence": 0.0,
                "factor_scores": empty_factor_scores.copy(),
                "evidence_quote": "",
                "overall_reasoning": "Analysis failed",
                "analysis_type": "unknown",
                "scoring_method": "experimental_context_aware_multi_factor",
                "participant_analyzed": participant
            }
        
        for pattern_name in self.engagement_patterns.keys():
            empty_result["engagement_patterns"][f"interaction_{pattern_name}"] = {
                "detected": False,
                "confidence": 0.0,
                "factor_scores": empty_factor_scores.copy(),
                "evidence_quote": "",
                "overall_reasoning": "Analysis failed",
                "analysis_type": "unknown",
                "scoring_method": "experimental_context_aware_multi_factor",
                "participant_analyzed": participant
            }
        
        return empty_result
    
    def comprehensive_analysis(self, file_path: str, detection_threshold: float = 0.4) -> Dict[str, Any]:
        """
        Perform complete experimental context-aware LLM-based analysis of a single conversation file.
        
        Args:
            file_path (str): Path to conversation JSON file
            detection_threshold (float): Confidence threshold for concept detection (0.0-1.0)
            
        Returns:
            Dict[str, Any]: Complete analysis results with experimental context awareness
        """
        print(f"📖 Loading conversation from: {file_path}")
        
        # Load conversation data
        df = self.load_conversation(file_path)
        
        # Extract subject number from folder path
        import os
        folder_name = os.path.basename(os.path.dirname(file_path))
        if folder_name.isdigit():
            subject_id = folder_name # Use folder number (e.g., "002")
        else:
            # Fallback to original method if folder structure is different
            subject_id = df['subject_id'].iloc[0]

        # Prepare conversation text for analysis
        conversation_text = self._format_conversation_for_analysis(df)
        
        # Analyze user messages separately with experimental context awareness
        user_texts = df[df['role'] == 'user']['text'].tolist()
        user_conversation = "\n".join([f"User: {text}" for text in user_texts])
        
        # Analyze GPT messages separately with experimental context awareness
        gpt_texts = df[df['role'] == 'assistant']['text'].tolist()
        gpt_conversation = "\n".join([f"Assistant: {text}" for text in gpt_texts])
        
        print("🧠 Analyzing user concepts (authentic engagement focus)...")
        user_analysis = self.analyze_concepts_with_llm(user_conversation, "user")
        
        print("🧠 Analyzing GPT concepts (data analysis quality focus)...")
        gpt_analysis = self.analyze_concepts_with_llm(gpt_conversation, "assistant")

        print("🔍 Applying detection threshold to user analysis...")
        user_analysis = self._apply_detection_threshold(user_analysis, detection_threshold)

        print("🔍 Applying detection threshold to GPT analysis...")
        gpt_analysis = self._apply_detection_threshold(gpt_analysis, detection_threshold)
             
        print("📊 Analyzing engagement patterns...")
        user_engagement = self.analyze_user_engagement(df)
        conversation_flow = self.analyze_conversation_flow(df)
        
        # Compile results with experimental context awareness
        results = {
            'subject_id': subject_id,
            'analysis_framework': 'experimental_context_aware_multi_factor',
            'conversation_metadata': {
                'total_turns': len(df),
                'user_turns': len(df[df['role'] == 'user']),
                'gpt_turns': len(df[df['role'] == 'assistant']),
                'total_words': df['word_count'].sum(),
                'experimental_context': 'users_provided_energy_data'
            },
            'user_concepts': {**user_analysis['energy_concepts'], **user_analysis['engagement_patterns']},
            'gpt_concepts': {**gpt_analysis['energy_concepts'], **gpt_analysis['engagement_patterns']},
            'user_engagement': user_engagement,
            'conversation_flow': conversation_flow,
            'concept_attribution': self._analyze_concept_attribution(user_analysis, gpt_analysis),
            'analysis_summary': {
                'user_analysis_summary': user_analysis.get('summary', {}),
                'gpt_analysis_summary': gpt_analysis.get('summary', {}),
                'analysis_method': 'experimental_context_aware_llm',
                'scoring_framework': 'authentic_engagement_vs_data_analysis'
            }
        }
        
        return results
    
    def load_conversation(self, file_path):
        """
        Load and parse conversation data from JSON file into structured DataFrame.
        
        Args:
            file_path (str): Path to JSON file containing conversation data
            
        Returns:
            pd.DataFrame: Structured conversation data
        """
        with open(file_path, "r", encoding="utf-8") as f:
            conversation_data = json.load(f)
        
        records = []
        subject_id = file_path.split('/')[-1].replace('.json', '')
        
        for turn_idx, turn in enumerate(conversation_data):
            for key, value in turn.items():
                if key.lower().startswith("you said"):
                    records.append({
                        'subject_id': subject_id, 
                        'turn': turn_idx + 1,
                        'role': 'user', 
                        'text': value,
                        'word_count': len(value.split()),
                        'char_count': len(value)
                    })
                elif key.lower().startswith("chatgpt said"):
                    records.append({
                        'subject_id': subject_id, 
                        'turn': turn_idx + 1,
                        'role': 'assistant', 
                        'text': value,
                        'word_count': len(value.split()),
                        'char_count': len(value)
                    })
        
        return pd.DataFrame(records)
    
    def analyze_user_engagement(self, df):
        """Analyze user engagement patterns throughout the conversation."""
        
        user_data = df[df['role'] == 'user']
        gpt_data = df[df['role'] == 'assistant']
        
        total_prompts = len(user_data)
        avg_prompt_length = user_data['word_count'].mean()
        prompt_length_std = user_data['word_count'].std()
        total_user_words = user_data['word_count'].sum()
        total_gpt_words = gpt_data['word_count'].sum()
        
        if total_gpt_words > 0:
            prompt_response_ratio = total_user_words / total_gpt_words
        else:
            prompt_response_ratio = 0.0
        
        return {
            'total_prompts': total_prompts,
            'avg_prompt_length': avg_prompt_length,
            'prompt_length_std': prompt_length_std,
            'total_user_words': total_user_words,
            'total_gpt_words': total_gpt_words,
            'prompt_response_ratio': prompt_response_ratio,
        }
    
    def analyze_conversation_flow(self, df):
        """Analyze basic conversation flow without requiring embeddings."""
        
        user_texts = df[df['role'] == 'user']['text'].tolist()
        
        return {
            'conversation_length': len(df),
            'user_prompt_count': len(user_texts),
            'avg_user_prompt_length': df[df['role'] == 'user']['word_count'].mean(),
            'conversation_turns': len(df[df['role'] == 'user']) + len(df[df['role'] == 'assistant'])
        }
    
    def _apply_detection_threshold(self, analysis_result: Dict[str, Any], threshold: float) -> Dict[str, Any]:
        """
        Apply consistent detection threshold to override LLM detection decisions.
        
        Args:
            analysis_result (Dict): Analysis result from LLM
            threshold (float): Confidence threshold for detection
            
        Returns:
            Dict: Analysis result with threshold-based detection decisions
        """
        categories_to_process = ['energy_concepts', 'engagement_patterns']
        
        for category in categories_to_process:
            if category in analysis_result:
                for concept_name, concept_data in analysis_result[category].items():
                    confidence = concept_data.get('confidence', 0.0)
                    original_detection = concept_data.get('detected', False)
                    
                    # Apply threshold-based detection
                    threshold_detection = confidence >= threshold
                    
                    # Update detection status
                    concept_data['detected'] = threshold_detection
                    concept_data['original_llm_detection'] = original_detection
                    concept_data['threshold_applied'] = threshold
                    concept_data['threshold_override'] = (original_detection != threshold_detection)
                    
                    # Add threshold info to reasoning
                    if concept_data.get('overall_reasoning'):
                        concept_data['overall_reasoning'] += f" [Threshold {threshold}: {'DETECTED' if threshold_detection else 'NOT DETECTED'}]"
        
        return analysis_result

    def _format_conversation_for_analysis(self, df: pd.DataFrame) -> str:
        """Format conversation DataFrame into text for LLM analysis."""
        
        conversation_parts = []
        for _, row in df.iterrows():
            if row['role'] == 'user':
                conversation_parts.append(f"User: {row['text']}")
            elif row['role'] == 'assistant':
                conversation_parts.append(f"Assistant: {row['text']}")
        
        return "\n\n".join(conversation_parts)
       
    def _analyze_concept_attribution(self, user_analysis: Dict, gpt_analysis: Dict) -> Dict[str, str]:
        """Analyze which participant introduced each concept."""
        
        attribution = {}
        
        all_concepts = set()
        if 'energy_concepts' in user_analysis:
            all_concepts.update(user_analysis['energy_concepts'].keys())
        if 'engagement_patterns' in user_analysis:
            all_concepts.update(user_analysis['engagement_patterns'].keys())
        if 'energy_concepts' in gpt_analysis:
            all_concepts.update(gpt_analysis['energy_concepts'].keys())
        if 'engagement_patterns' in gpt_analysis:
            all_concepts.update(gpt_analysis['engagement_patterns'].keys())
        
        for concept in all_concepts:
            user_detected = self._get_concept_detection(user_analysis, concept)
            gpt_detected = self._get_concept_detection(gpt_analysis, concept)
            
            if user_detected and gpt_detected:
                attribution[concept] = "both"
            elif user_detected:
                attribution[concept] = "user_guided"
            elif gpt_detected:
                attribution[concept] = "gpt_introduced"
            else:
                attribution[concept] = "not_detected"
        
        return attribution
    
    def _get_concept_detection(self, analysis: Dict, concept: str) -> bool:
        """Helper to get concept detection status from analysis result."""
        
        for category in ['energy_concepts', 'engagement_patterns']:
            if category in analysis and concept in analysis[category]:
                return analysis[category][concept].get('detected', False)
        return False
    
    def export_results(self, results: Dict[str, Any], output_folder: str = "Result"):
        """Export analysis results to JSON file."""
        
        os.makedirs(output_folder, exist_ok=True)
        
        subject_id = results['subject_id']
        number_match = re.search(r'(\d+)', subject_id)
        if number_match:
            subject_number = number_match.group(1).zfill(3)
        else:
            subject_number = "001"
        
        output_file = os.path.join(output_folder, "SCALE", f"llm_analysis_results_{subject_number}.json")
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, default=str)
        
        print(f"💾 Results exported to: {output_file}")
    
    def create_detailed_report(self, results: Dict[str, Any]) -> str:
        """Generate a detailed human-readable report with multi-factor explanations."""
        
        report = f"""
CONVERSATION ANALYSIS REPORT (Multi-Factor LLM Analysis)
=======================================================

Subject ID: {results['subject_id']}
Analysis Method: Multi-Factor LLM Analysis using {self.analysis_model}
Scoring Framework: Weighted combination of Explicitness (30%), Depth (25%), Consideration (25%), Evidence (20%)
Total Conversation Turns: {results['conversation_metadata']['total_turns']}
Total Words: {results['conversation_metadata']['total_words']}

USER ENGAGEMENT ANALYSIS
------------------------
- Number of prompts: {results['user_engagement']['total_prompts']}
- Average prompt length: {results['user_engagement']['avg_prompt_length']:.1f} words
- Total user words: {results['user_engagement']['total_user_words']}
- Total GPT words: {results['user_engagement']['total_gpt_words']}
- Prompt-response ratio: {results['user_engagement']['prompt_response_ratio']:.3f}

DETECTED ENERGY CONCEPTS (Multi-Factor Analysis)
===============================================
User-detected concepts:
"""
        
        for concept, data in results['user_concepts'].items():
            if concept.startswith('energy_') and data.get('detected', False):
                concept_name = concept.replace('energy_', '').replace('_', ' ').title()
                confidence = data.get('confidence', 0)
                
                report += f"\n✅ {concept_name} (Overall Confidence: {confidence:.3f})\n"
                report += f"   Evidence: \"{data.get('evidence_quote', 'N/A')[:120]}...\"\n"
                report += f"   Reasoning: {data.get('overall_reasoning', 'N/A')[:150]}...\n"
                
                # Add factor breakdown
                factor_scores = data.get('factor_scores', {})
                if factor_scores:
                    report += f"   Factor Breakdown:\n"
                    for factor_name, factor_data in factor_scores.items():
                        score = factor_data.get('score', 0)
                        weight = self.scoring_factors.get(factor_name, {}).get('weight', 0)
                        weighted_contribution = score * weight
                        report += f"     • {factor_name.title()}: {score:.2f} (×{weight:.2f} = {weighted_contribution:.3f}) - {factor_data.get('justification', '')[:80]}...\n"
        
        report += "\nGPT-detected concepts:\n"
        for concept, data in results['gpt_concepts'].items():
            if concept.startswith('energy_') and data.get('detected', False):
                concept_name = concept.replace('energy_', '').replace('_', ' ').title()
                confidence = data.get('confidence', 0)
                
                report += f"\n✅ {concept_name} (Overall Confidence: {confidence:.3f})\n"
                report += f"   Evidence: \"{data.get('evidence_quote', 'N/A')[:120]}...\"\n"
                report += f"   Reasoning: {data.get('overall_reasoning', 'N/A')[:150]}...\n"
                
                # Add factor breakdown
                factor_scores = data.get('factor_scores', {})
                if factor_scores:
                    report += f"   Factor Breakdown:\n"
                    for factor_name, factor_data in factor_scores.items():
                        score = factor_data.get('score', 0)
                        weight = self.scoring_factors.get(factor_name, {}).get('weight', 0)
                        weighted_contribution = score * weight
                        report += f"     • {factor_name.title()}: {score:.2f} (×{weight:.2f} = {weighted_contribution:.3f}) - {factor_data.get('justification', '')[:80]}...\n"
        
        report += f"\nDETECTED INTERACTION PATTERNS\n"
        report += f"=============================\n"
        
        for concept, data in results['user_concepts'].items():
            if concept.startswith('interaction_') and data.get('detected', False):
                concept_name = concept.replace('interaction_', '').replace('_', ' ').title()
                confidence = data.get('confidence', 0)
                
                report += f"\n✅ {concept_name} (Confidence: {confidence:.3f})\n"
                report += f"   Evidence: \"{data.get('evidence_quote', 'N/A')[:100]}...\"\n"
                
                # Add factor breakdown for interaction patterns too
                factor_scores = data.get('factor_scores', {})
                if factor_scores:
                    report += f"   Factor Breakdown:\n"
                    for factor_name, factor_data in factor_scores.items():
                        score = factor_data.get('score', 0)
                        report += f"     • {factor_name.title()}: {score:.2f} - {factor_data.get('justification', '')[:60]}...\n"
        
        # Add analysis summary
        if 'analysis_summary' in results:
            user_summary = results['analysis_summary'].get('user_analysis_summary', {})
            gpt_summary = results['analysis_summary'].get('gpt_analysis_summary', {})
            
            report += f"\nMULTI-FACTOR ANALYSIS SUMMARY\n"
            report += f"============================\n"
            report += f"User energy concepts detected: {user_summary.get('total_energy_concepts_detected', 0)}\n"
            report += f"GPT energy concepts detected: {gpt_summary.get('total_energy_concepts_detected', 0)}\n"
            report += f"Average user confidence: {user_summary.get('avg_confidence_energy', 0):.3f}\n"
            report += f"Average GPT confidence: {gpt_summary.get('avg_confidence_energy', 0):.3f}\n"
            report += f"Conversation focus: {user_summary.get('conversation_focus', 'Not specified')}\n"
            report += f"Scoring method: {user_summary.get('scoring_method', 'multi_factor_weighted')}\n"
        
        report += f"\nSCORING FRAMEWORK WEIGHTS\n"
        report += f"========================\n"
        for factor_name, factor_info in self.scoring_factors.items():
            report += f"• {factor_name.title()}: {factor_info['weight']:.0%} - {factor_info['description']}\n"
        
        return report

In [4]:
def analyze_single_conversation_llm(file_path: str, api_key: str = None, analysis_model: str = "gpt-4o-mini", detection_threshold: float = 0.4) -> Dict[str, Any]:
    """
    Analyze a single conversation file using experimental context-aware LLM-based understanding.
    
    Args:
        file_path (str): Path to the conversation JSON file
        api_key (str): OpenAI API key (optional - will auto-load from .env file if not provided)
        analysis_model (str): OpenAI chat model to use for analysis
        detection_threshold (float): Confidence threshold for concept detection (0.0-1.0)
                                   Higher values = more conservative detection
    
    Returns:
        Dict[str, Any]: Complete analysis results with experimental context explanations
    """
    try:
        analyzer = LLMChatAnalyzer(api_key=api_key, analysis_model=analysis_model)
        
        results = analyzer.comprehensive_analysis(file_path, detection_threshold=detection_threshold)
        
        # Generate and display detailed report
        detailed_report = analyzer.create_detailed_report(results)
        print(detailed_report)
        
        # Export results
        analyzer.export_results(results)
        
        print("\n" + "="*70)
        print("✅ Experimental Context-Aware Multi-Factor LLM Analysis completed successfully!")
        print(f"🧠 Model used: {analysis_model}")
        print("💾 Results exported to Result folder with subject numbering")
        print("📊 Includes experimental context-aware multi-factor confidence scoring")
        print("🔍 Distinguishes authentic user engagement from data-enhanced assistant analysis")
        print("🎯 Each concept scored with participant-appropriate criteria")
        print("="*70)
        
        return results
        
    except Exception as e:
        print(f"❌ Error during experimental context-aware LLM analysis: {e}")
        return None

In [24]:
if __name__ == "__main__":
    print("🧠 Experimental Context-Aware Multi-Factor LLM-Based GPT Chat Analyzer Ready!")
    print("📊 This version distinguishes authentic user engagement from data-enhanced assistant analysis")
    print("🔑 Make sure you have a .env file with your OpenAI API key")
    print("💰 Note: Experimental context-aware LLM analysis provides systematic confidence scores")
    print("\\n📋 Experimental Context-Aware Scoring Framework:")
    print("   • USER: Authentic engagement (personal insights, constraints, genuine questions)")
    print("   • ASSISTANT: Data analysis quality (synthesis, domain expertise, practical guidance)")
    print("   • Explicitness (30%): Appropriate terminology for participant role")
    print("   • Depth (25%): Authentic engagement vs data analysis quality")
    print("   • Consideration (25%): Role-appropriate conversation focus")
    print("   • Evidence (20%): Personal examples vs quantitative analysis")
    
    # Uncomment to test:
    results = analyze_single_conversation_llm("./Data/020/EntireConversation_extracted.json", detection_threshold=0.4)

🧠 Experimental Context-Aware Multi-Factor LLM-Based GPT Chat Analyzer Ready!
📊 This version distinguishes authentic user engagement from data-enhanced assistant analysis
🔑 Make sure you have a .env file with your OpenAI API key
💰 Note: Experimental context-aware LLM analysis provides systematic confidence scores
\n📋 Experimental Context-Aware Scoring Framework:
   • USER: Authentic engagement (personal insights, constraints, genuine questions)
   • ASSISTANT: Data analysis quality (synthesis, domain expertise, practical guidance)
   • Explicitness (30%): Appropriate terminology for participant role
   • Depth (25%): Authentic engagement vs data analysis quality
   • Consideration (25%): Role-appropriate conversation focus
   • Evidence (20%): Personal examples vs quantitative analysis
🧠 Initialized with OpenAI chat model: gpt-4o-mini
🔑 API key loaded successfully from parameter
📊 Using experimental context-aware multi-factor confidence scoring framework
📖 Loading conversation from: ./D

In [25]:
def analyze_all_conversations_llm(
    data_folder: str = "./Data", 
    api_key: str = None, 
    analysis_model: str = "gpt-4o-mini", 
    detection_threshold: float = 0.4,
    file_pattern: str = "EntireConversation_extracted.json",
    max_files: int = None,
    resume_from: str = None,
    start_subject: int = None,
    end_subject: int = None
) -> Dict[str, Any]:
    """
    Batch process all conversation files using experimental context-aware LLM analysis.
    
    Args:
        data_folder (str): Root folder containing subject directories
        api_key (str): OpenAI API key (optional - will auto-load from .env file)
        analysis_model (str): OpenAI chat model to use for analysis
        detection_threshold (float): Confidence threshold for concept detection (0.0-1.0)
        file_pattern (str): Name pattern of conversation files to process
        max_files (int): Maximum number of files to process (for testing)
        resume_from (str): Subject ID to resume from (useful for interrupted runs)
        start_subject (int): Starting subject number (e.g., 20 to start from subject 020)
        end_subject (int): Ending subject number (e.g., 85 to end at subject 085)
        
    Returns:
        Dict[str, Any]: Batch processing results with aggregated statistics
    """
    import glob
    import time
    from pathlib import Path
    
    print("🚀 Starting Batch Processing of All Conversation Files")
    print(f"📂 Data folder: {data_folder}")
    print(f"🧠 Analysis model: {analysis_model}")
    print(f"🎯 Detection threshold: {detection_threshold}")
    print(f"📄 File pattern: {file_pattern}")
    if start_subject is not None or end_subject is not None:
        print(f"📊 Subject range: {start_subject or 1} to {end_subject or 'end'}")
    print("="*70)
    
    # Find all conversation files
    search_pattern = os.path.join(data_folder, "*", file_pattern)
    conversation_files = glob.glob(search_pattern)
    conversation_files.sort()  # Process in order
    
    if not conversation_files:
        print(f"❌ No conversation files found matching pattern: {search_pattern}")
        return None
    
    print(f"📋 Found {len(conversation_files)} conversation files total")
    
    # Filter by subject number range if specified
    if start_subject is not None or end_subject is not None:
        filtered_files = []
        for file_path in conversation_files:
            subject_folder = os.path.basename(os.path.dirname(file_path))
            try:
                # Extract numeric part from subject folder (e.g., "016" -> 16)
                subject_num = int(subject_folder.lstrip('0')) if subject_folder.isdigit() else None
                if subject_num is None:
                    print(f"⚠️ Skipping non-numeric subject folder: {subject_folder}")
                    continue
                    
                # Check if subject number is in range
                in_range = True
                if start_subject is not None and subject_num < start_subject:
                    in_range = False
                if end_subject is not None and subject_num > end_subject:
                    in_range = False
                    
                if in_range:
                    filtered_files.append(file_path)
                    
            except ValueError:
                print(f"⚠️ Could not parse subject number from folder: {subject_folder}")
                continue
        
        conversation_files = filtered_files
        print(f"🎯 Filtered to {len(conversation_files)} files in subject range {start_subject or 1}-{end_subject or 'end'}")
    
    if not conversation_files:
        print(f"❌ No conversation files found in specified range")
        return None
    
    # Apply max_files limit for testing
    if max_files:
        conversation_files = conversation_files[:max_files]
        print(f"🔬 Processing first {len(conversation_files)} files for testing")
    
    # Resume functionality
    if resume_from:
        start_index = 0
        for i, file_path in enumerate(conversation_files):
            subject_folder = os.path.basename(os.path.dirname(file_path))
            if subject_folder == resume_from:
                start_index = i
                break
        conversation_files = conversation_files[start_index:]
        print(f"🔄 Resuming from subject {resume_from} ({len(conversation_files)} files remaining)")
    
    # Initialize batch tracking
    batch_results = {
        "processing_summary": {
            "total_files": len(conversation_files),
            "successful_analyses": 0,
            "failed_analyses": 0,
            "skipped_files": 0,
            "start_time": datetime.now().isoformat(),
            "analysis_model": analysis_model,
            "detection_threshold": detection_threshold,
            "subject_range": {
                "start_subject": start_subject,
                "end_subject": end_subject,
                "range_specified": start_subject is not None or end_subject is not None
            }
        },
        "individual_results": {},
        "failed_files": {},
        "aggregated_statistics": {},
        "concept_frequency": {},
        "subject_summaries": []
    }
    
    # Initialize analyzer once for the batch
    try:
        analyzer = LLMChatAnalyzer(api_key=api_key, analysis_model=analysis_model)
        print(f"✅ LLM Analyzer initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize LLM Analyzer: {e}")
        return None
    
    # Process each conversation file
    for file_index, file_path in enumerate(conversation_files, 1):
        subject_folder = os.path.basename(os.path.dirname(file_path))
        
        print(f"\n{'='*50}")
        print(f"📁 Processing {file_index}/{len(conversation_files)}: Subject {subject_folder}")
        print(f"📄 File: {file_path}")
        
        # Check if file exists
        if not os.path.exists(file_path):
            print(f"⚠️ File not found, skipping: {file_path}")
            batch_results["processing_summary"]["skipped_files"] += 1
            continue
        
        try:
            # Use the existing comprehensive analysis method
            print(f"🧠 Starting experimental context-aware analysis...")
            start_time = time.time()
            
            # Call the individual file analysis
            individual_results = analyzer.comprehensive_analysis(file_path, detection_threshold=detection_threshold)
            
            processing_time = time.time() - start_time
            
            if individual_results:
                # Store results
                subject_id = individual_results.get("subject_id", subject_folder)
                batch_results["individual_results"][subject_id] = individual_results
                
                # Export individual results
                analyzer.export_results(individual_results)
                
                # Track success
                batch_results["processing_summary"]["successful_analyses"] += 1
                
                # Collect summary for aggregation
                summary_data = {
                    "subject_id": subject_id,
                    "processing_time": processing_time,
                    "user_concepts_detected": sum(1 for concept_data in individual_results.get("user_concepts", {}).values() 
                                                if concept_data.get("detected", False)),
                    "gpt_concepts_detected": sum(1 for concept_data in individual_results.get("gpt_concepts", {}).values() 
                                               if concept_data.get("detected", False)),
                    "total_turns": individual_results.get("conversation_metadata", {}).get("total_turns", 0),
                    "total_words": individual_results.get("conversation_metadata", {}).get("total_words", 0),
                    "user_prompts": individual_results.get("user_engagement", {}).get("total_prompts", 0)
                }
                batch_results["subject_summaries"].append(summary_data)
                
                print(f"✅ Subject {subject_id} analyzed successfully in {processing_time:.1f}s")
                print(f"   User concepts: {summary_data['user_concepts_detected']}, GPT concepts: {summary_data['gpt_concepts_detected']}")
                
            else:
                print(f"⚠️ Analysis returned empty results for {subject_folder}")
                batch_results["processing_summary"]["failed_analyses"] += 1
                
        except Exception as e:
            print(f"❌ Error analyzing {subject_folder}: {str(e)}")
            batch_results["failed_files"][subject_folder] = str(e)
            batch_results["processing_summary"]["failed_analyses"] += 1
            
            # Continue with next file instead of stopping
            continue
        
        # Progress update every 10 files
        if file_index % 10 == 0:
            success_rate = (batch_results["processing_summary"]["successful_analyses"] / file_index) * 100
            print(f"\n📊 Progress Update: {file_index}/{len(conversation_files)} files processed")
            print(f"   Success rate: {success_rate:.1f}%")
            print(f"   Successful: {batch_results['processing_summary']['successful_analyses']}")
            print(f"   Failed: {batch_results['processing_summary']['failed_analyses']}")
    
    # Calculate final aggregated statistics
    print(f"\n{'='*70}")
    print("📊 Calculating aggregated statistics...")
    
    batch_results["aggregated_statistics"] = calculate_batch_statistics(batch_results)
    batch_results["concept_frequency"] = calculate_concept_frequency(batch_results)
    batch_results["processing_summary"]["end_time"] = datetime.now().isoformat()
    batch_results["processing_summary"]["total_processing_time"] = sum(s["processing_time"] for s in batch_results["subject_summaries"])
    
    # Export batch results
    export_batch_results(batch_results)
    
    # Print final summary
    print_batch_summary(batch_results)
    
    return batch_results


def calculate_batch_statistics(batch_results: Dict[str, Any]) -> Dict[str, Any]:
    """Calculate aggregated statistics across all processed conversations."""
    
    summaries = batch_results["subject_summaries"]
    
    if not summaries:
        return {}
    
    stats = {
        "conversation_metrics": {
            "avg_turns": np.mean([s["total_turns"] for s in summaries]),
            "avg_words": np.mean([s["total_words"] for s in summaries]),
            "avg_user_prompts": np.mean([s["user_prompts"] for s in summaries]),
            "avg_processing_time": np.mean([s["processing_time"] for s in summaries]),
            "total_conversations": len(summaries)
        },
        "concept_detection": {
            "avg_user_concepts": np.mean([s["user_concepts_detected"] for s in summaries]),
            "avg_gpt_concepts": np.mean([s["gpt_concepts_detected"] for s in summaries]),
            "max_user_concepts": max([s["user_concepts_detected"] for s in summaries]),
            "max_gpt_concepts": max([s["gpt_concepts_detected"] for s in summaries]),
            "users_with_concepts": sum(1 for s in summaries if s["user_concepts_detected"] > 0),
            "conversations_with_gpt_concepts": sum(1 for s in summaries if s["gpt_concepts_detected"] > 0)
        }
    }
    
    return stats


def calculate_concept_frequency(batch_results: Dict[str, Any]) -> Dict[str, Any]:
    """Calculate frequency of each concept across all conversations."""
    
    concept_counts = {
        "user_energy_concepts": {},
        "user_interaction_patterns": {},
        "gpt_energy_concepts": {},
        "gpt_interaction_patterns": {}
    }
    
    total_conversations = len(batch_results["individual_results"])
    
    for subject_id, results in batch_results["individual_results"].items():
        # Count user concepts
        for concept_name, concept_data in results.get("user_concepts", {}).items():
            if concept_data.get("detected", False):
                if concept_name.startswith("energy_"):
                    category = "user_energy_concepts"
                elif concept_name.startswith("interaction_"):
                    category = "user_interaction_patterns"
                else:
                    continue
                
                if concept_name not in concept_counts[category]:
                    concept_counts[category][concept_name] = 0
                concept_counts[category][concept_name] += 1
        
        # Count GPT concepts
        for concept_name, concept_data in results.get("gpt_concepts", {}).items():
            if concept_data.get("detected", False):
                if concept_name.startswith("energy_"):
                    category = "gpt_energy_concepts"
                elif concept_name.startswith("interaction_"):
                    category = "gpt_interaction_patterns"
                else:
                    continue
                
                if concept_name not in concept_counts[category]:
                    concept_counts[category][concept_name] = 0
                concept_counts[category][concept_name] += 1
    
    # Convert to frequencies (percentages)
    concept_frequencies = {}
    for category, counts in concept_counts.items():
        concept_frequencies[category] = {
            concept: (count / total_conversations) * 100
            for concept, count in counts.items()
        }
    
    return concept_frequencies


def export_batch_results(batch_results: Dict[str, Any]):
    """Export batch processing results to files."""
    
    output_folder = "Result"
    os.makedirs(os.path.join(output_folder, "SCALE"), exist_ok=True)
    
    # Export main batch results
    batch_file = os.path.join(output_folder, "SCALE", "batch_analysis_results.json")
    with open(batch_file, 'w', encoding='utf-8') as f:
        json.dump(batch_results, f, indent=2, default=str)
    
    # Export summary CSV for easy analysis
    summary_file = os.path.join(output_folder, "SCALE", "batch_summary.csv")
    df_summary = pd.DataFrame(batch_results["subject_summaries"])
    df_summary.to_csv(summary_file, index=False)
    
    # Export concept frequency CSV
    freq_file = os.path.join(output_folder, "SCALE", "concept_frequencies.csv")
    freq_data = []
    for category, frequencies in batch_results["concept_frequency"].items():
        for concept, freq in frequencies.items():
            freq_data.append({
                "category": category,
                "concept": concept,
                "frequency_percent": freq,
                "participant_type": "user" if "user_" in category else "gpt",
                "concept_type": "energy" if "energy" in category else "interaction"
            })
    
    if freq_data:
        df_freq = pd.DataFrame(freq_data)
        df_freq.to_csv(freq_file, index=False)
    
    print(f"💾 Batch results exported:")
    print(f"   📄 Main results: {batch_file}")
    print(f"   📊 Summary CSV: {summary_file}")
    print(f"   📈 Frequencies CSV: {freq_file}")


def print_batch_summary(batch_results: Dict[str, Any]):
    """Print a comprehensive summary of batch processing results."""
    
    processing = batch_results["processing_summary"]
    stats = batch_results.get("aggregated_statistics", {})
    subject_range = processing.get("subject_range", {})
    
    print(f"\n{'='*70}")
    print("🎉 BATCH PROCESSING COMPLETED!")
    print(f"{'='*70}")
    
    print(f"📊 PROCESSING SUMMARY:")
    print(f"   Total files found: {processing['total_files']}")
    print(f"   Successfully analyzed: {processing['successful_analyses']}")
    print(f"   Failed analyses: {processing['failed_analyses']}")
    print(f"   Skipped files: {processing['skipped_files']}")
    
    # Show subject range if specified
    if subject_range.get("range_specified", False):
        start = subject_range.get("start_subject", "start")
        end = subject_range.get("end_subject", "end")
        print(f"   Subject range processed: {start} to {end}")
    
    if processing['successful_analyses'] > 0:
        success_rate = (processing['successful_analyses'] / processing['total_files']) * 100
        print(f"   Success rate: {success_rate:.1f}%")
    
    if stats:
        conv_metrics = stats.get("conversation_metrics", {})
        concept_metrics = stats.get("concept_detection", {})
        
        print(f"\n📈 CONVERSATION STATISTICS:")
        print(f"   Average turns per conversation: {conv_metrics.get('avg_turns', 0):.1f}")
        print(f"   Average words per conversation: {conv_metrics.get('avg_words', 0):.0f}")
        print(f"   Average user prompts: {conv_metrics.get('avg_user_prompts', 0):.1f}")
        print(f"   Average processing time: {conv_metrics.get('avg_processing_time', 0):.1f}s")
        
        print(f"\n🎯 CONCEPT DETECTION STATISTICS:")
        print(f"   Average user concepts detected: {concept_metrics.get('avg_user_concepts', 0):.1f}")
        print(f"   Average GPT concepts detected: {concept_metrics.get('avg_gpt_concepts', 0):.1f}")
        print(f"   Users with detected concepts: {concept_metrics.get('users_with_concepts', 0)}/{conv_metrics.get('total_conversations', 0)}")
        print(f"   Conversations with GPT concepts: {concept_metrics.get('conversations_with_gpt_concepts', 0)}/{conv_metrics.get('total_conversations', 0)}")
    
    if batch_results.get("failed_files"):
        print(f"\n⚠️ FAILED FILES:")
        for subject, error in batch_results["failed_files"].items():
            print(f"   {subject}: {error}")
    
    print(f"\n💾 Results exported to Result/SCALE/ folder")
    print(f"🔍 Individual analysis reports available for each subject")
    print(f"📊 Aggregated statistics available in CSV format")
    print(f"{'='*70}")


# Convenience function for quick batch processing
def run_batch_analysis(
    data_folder: str = "./Data",
    detection_threshold: float = 0.4,
    analysis_model: str = "gpt-4o-mini",
    max_files: int = None,
    start_subject: int = None,
    end_subject: int = None
):
    """
    Quick function to run batch analysis with default settings.
    
    Args:
        data_folder (str): Path to data folder containing subject directories
        detection_threshold (float): Confidence threshold for concept detection
        analysis_model (str): OpenAI model to use
        max_files (int): Limit number of files for testing (None = process all)
        start_subject (int): Starting subject number (e.g., 20 to start from subject 020)
        end_subject (int): Ending subject number (e.g., 85 to end at subject 085)
    """
    
    print("🚀 Starting Quick Batch Analysis")
    print(f"📂 Data folder: {data_folder}")
    print(f"🎯 Detection threshold: {detection_threshold}")
    print(f"🧠 Analysis model: {analysis_model}")
    
    if max_files:
        print(f"🔬 Testing mode: Processing only {max_files} files")
    
    if start_subject is not None or end_subject is not None:
        print(f"📊 Subject range: {start_subject or 1} to {end_subject or 'end'}")
    
    results = analyze_all_conversations_llm(
        data_folder=data_folder,
        detection_threshold=detection_threshold,
        analysis_model=analysis_model,
        max_files=max_files,
        start_subject=start_subject,
        end_subject=end_subject
    )
    
    return results


def process_subject_range(
    start_subject: int,
    end_subject: int,
    data_folder: str = "./Data",
    detection_threshold: float = 0.4,
    analysis_model: str = "gpt-4o-mini"
):
    """
    Process a specific range of subjects (e.g., subjects 20-85).
    
    Args:
        start_subject (int): Starting subject number (e.g., 20 for subject 020)
        end_subject (int): Ending subject number (e.g., 85 for subject 085)
        data_folder (str): Path to data folder containing subject directories
        detection_threshold (float): Confidence threshold for concept detection
        analysis_model (str): OpenAI model to use
        
    Example:
        # Process subjects 20 through 85
        results = process_subject_range(20, 85)
        
        # Process subjects 1 through 30
        results = process_subject_range(1, 30)
    """
    
    print(f"🎯 Processing Subject Range: {start_subject} to {end_subject}")
    print(f"📂 Looking for folders like: {start_subject:03d}, {start_subject+1:03d}, ..., {end_subject:03d}")
    
    results = analyze_all_conversations_llm(
        data_folder=data_folder,
        detection_threshold=detection_threshold,
        analysis_model=analysis_model,
        start_subject=start_subject,
        end_subject=end_subject
    )
    
    if results:
        processed_subjects = len(results["subject_summaries"])
        print(f"\n✅ Successfully processed {processed_subjects} subjects in range {start_subject}-{end_subject}")
    
    return results

In [32]:
results = run_batch_analysis(
    data_folder="./Data",
    start_subject=66,
    end_subject=85,
    detection_threshold=0.4
)

🚀 Starting Quick Batch Analysis
📂 Data folder: ./Data
🎯 Detection threshold: 0.4
🧠 Analysis model: gpt-4o-mini
📊 Subject range: 66 to 85
🚀 Starting Batch Processing of All Conversation Files
📂 Data folder: ./Data
🧠 Analysis model: gpt-4o-mini
🎯 Detection threshold: 0.4
📄 File pattern: EntireConversation_extracted.json
📊 Subject range: 66 to 85
📋 Found 85 conversation files total
🎯 Filtered to 20 files in subject range 66-85
🧠 Initialized with OpenAI chat model: gpt-4o-mini
🔑 API key loaded successfully from parameter
📊 Using experimental context-aware multi-factor confidence scoring framework
✅ LLM Analyzer initialized successfully

📁 Processing 1/20: Subject 066
📄 File: ./Data/066/EntireConversation_extracted.json
🧠 Starting experimental context-aware analysis...
📖 Loading conversation from: ./Data/066/EntireConversation_extracted.json
🧠 Analyzing user concepts (authentic engagement focus)...
🧠 Analyzing concepts with gpt-4o-mini (experimental context-aware)...
🧠 Analyzing GPT concept