In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Generate base dates (12-week semester)
start_date = datetime(2024, 1, 15)
dates = [start_date + timedelta(days=i) for i in range(84)]  # 12 weeks

## 1. Student Profiles
students = [
    {
        'student_id': 'S001',
        'name': 'Alex Johnson',
        'major': 'Computer Engineering',
        'year': 'Sophomore',
        'academic_risk_base': 0.2,
        'wellbeing_risk_base': 0.3
    },
    {
        'student_id': 'S002',
        'name': 'Maria Garcia',
        'major': 'Mechanical Engineering',
        'year': 'Junior',
        'academic_risk_base': 0.1,
        'wellbeing_risk_base': 0.2
    },
    {
        'student_id': 'S003',
        'name': 'Jordan Smith',
        'major': 'Psychology',
        'year': 'Freshman',
        'academic_risk_base': 0.4,
        'wellbeing_risk_base': 0.5
    },
    {
        'student_id': 'S004',
        'name': 'Taylor Chen',
        'major': 'Biology',
        'year': 'Senior',
        'academic_risk_base': 0.3,
        'wellbeing_risk_base': 0.4
    },
    {
        'student_id': 'S005',
        'name': 'Casey Williams',
        'major': 'Civil Engineering',
        'year': 'Sophomore',
        'academic_risk_base': 0.25,
        'wellbeing_risk_base': 0.35
    }
]

students_df = pd.DataFrame(students)

## 2. Academic Data - Courses and Assignments
courses = [
    {'course_id': 'ECE2514', 'name': 'Digital Design', 'difficulty': 0.8, 'department': 'Engineering'},
    {'course_id': 'MATH2214', 'name': 'Linear Algebra', 'difficulty': 0.7, 'department': 'Math'},
    {'course_id': 'PHYS2305', 'name': 'Foundations of Physics', 'difficulty': 0.75, 'department': 'Physics'},
    {'course_id': 'CS2114', 'name': 'Software Design', 'difficulty': 0.8, 'department': 'Computer Science'},
    {'course_id': 'PSYC2004', 'name': 'Intro to Psychology', 'difficulty': 0.5, 'department': 'Psychology'},
    {'course_id': 'BIOL2005', 'name': 'Cell Biology', 'difficulty': 0.7, 'department': 'Biology'}
]

# Generate academic records
academic_records = []
assignment_id = 1

for student in students:
    student_courses = random.sample(courses, 4)  # Each student takes 4 courses

    for course in student_courses:
        # Generate 3-5 assignments per course
        num_assignments = random.randint(3, 5)
        base_performance = 85 - (course['difficulty'] * 20)  # Base performance adjusted by difficulty
        student_factor = (1 - student['academic_risk_base']) * 15  # Student's capability

        for i in range(num_assignments):
            due_date = start_date + timedelta(weeks=i*3 + random.randint(0, 7))
            submission_date = due_date + timedelta(days=random.randint(-2, 5))

            # Create performance trends based on student risk profile
            week_progress = (due_date - start_date).days / 7
            stress_factor = max(0, min(1, week_progress / 6))  # Stress increases over semester

            if student['student_id'] == 'S003':  # Jordan - struggling student pattern
                performance_decline = stress_factor * 25
                late_submission_bias = max(0, stress_factor * 4)
            elif student['student_id'] == 'S001':  # Alex - moderate struggle
                performance_decline = stress_factor * 15
                late_submission_bias = max(0, stress_factor * 2)
            else:  # Other students - minimal decline
                performance_decline = stress_factor * 5
                late_submission_bias = 0

            grade = base_performance + student_factor - performance_decline + random.randint(-5, 5)
            grade = max(50, min(100, grade))  # Keep grades reasonable

            submission_delay = max(0, (submission_date - due_date).days) + random.randint(0, int(late_submission_bias))

            academic_records.append({
                'assignment_id': f'A{assignment_id:03d}',
                'student_id': student['student_id'],
                'course_id': course['course_id'],
                'course_name': course['name'],
                'assignment_name': f'{course["name"]} Assignment {i+1}',
                'due_date': due_date.strftime('%Y-%m-%d'),
                'submission_date': submission_date.strftime('%Y-%m-%d'),
                'grade': round(grade),
                'submission_delay_days': submission_delay,
                'difficulty_level': course['difficulty']
            })
            assignment_id += 1

academic_df = pd.DataFrame(academic_records)

## 3. Well-being Data - Daily metrics
wellbeing_data = []

for student in students:
    sleep_base = 7.5
    steps_base = 8000
    wellbeing_base = 4.0

    # Set different baseline patterns
    if student['student_id'] == 'S003':  # Jordan - poor wellbeing
        sleep_base = 6.0
        steps_base = 5000
        wellbeing_base = 3.0
    elif student['student_id'] == 'S001':  # Alex - declining wellbeing
        sleep_base = 7.0
        steps_base = 7000
        wellbeing_base = 3.5

    for i, date in enumerate(dates):
        week = i // 7
        day_of_week = date.weekday()

        # Weekend effects
        if day_of_week >= 5:
            sleep_modifier = random.uniform(0.5, 2.0)
            steps_modifier = random.uniform(1.2, 2.0)
            wellbeing_modifier = random.uniform(0.1, 0.5)
        else:  # Weekday
            sleep_modifier = random.uniform(-1.0, 0.5)
            steps_modifier = random.uniform(0.8, 1.2)
            wellbeing_modifier = random.uniform(-0.3, 0.2)

        # Semester stress progression
        stress_factor = max(0, min(1, week / 10))

        if student['student_id'] == 'S003':  # Jordan - deteriorating pattern
            sleep_decline = stress_factor * 2.5
            steps_decline = stress_factor * 3000
            wellbeing_decline = stress_factor * 1.5
        elif student['student_id'] == 'S001':  # Alex - moderate decline
            sleep_decline = stress_factor * 1.5
            steps_decline = stress_factor * 1500
            wellbeing_decline = stress_factor * 0.8
        else:  # Stable students
            sleep_decline = stress_factor * 0.5
            steps_decline = stress_factor * 500
            wellbeing_decline = stress_factor * 0.2

        sleep = max(4.0, sleep_base + sleep_modifier - sleep_decline + random.uniform(-0.5, 0.5))
        steps = max(1000, steps_base + (steps_modifier * 1000) - steps_decline + random.randint(-500, 500))
        wellbeing = max(1.0, wellbeing_base + wellbeing_modifier - wellbeing_decline + random.uniform(-0.3, 0.3))

        wellbeing_data.append({
            'student_id': student['student_id'],
            'date': date.strftime('%Y-%m-%d'),
            'sleep_duration': round(sleep, 1),
            'step_count': int(steps),
            'wellbeing_score': round(wellbeing, 1),
            'week_of_semester': week + 1,
            'day_type': 'Weekend' if day_of_week >= 5 else 'Weekday'
        })

wellbeing_df = pd.DataFrame(wellbeing_data)

## 4. Environmental Data - Campus engagement
environmental_data = []

for student in students:
    meals_base = 2.0
    library_base = 1.5
    gym_base = 0.3

    if student['student_id'] == 'S003':  # Jordan - disengaging
        meals_base = 1.2
        library_base = 0.8
        gym_base = 0.1
    elif student['student_id'] == 'S001':  # Alex - variable engagement
        meals_base = 1.8
        library_base = 2.0
        gym_base = 0.2

    for i, date in enumerate(dates):
        week = i // 7
        day_of_week = date.weekday()

        # Stress progression
        stress_factor = max(0, min(1, week / 10))

        if student['student_id'] == 'S003':  # Disengagement pattern
            meals_decline = stress_factor * 0.8
            library_change = stress_factor * -0.5  # Goes to library less
            gym_decline = stress_factor * 0.2
        elif student['student_id'] == 'S001':  # Increased isolation but more studying
            meals_decline = stress_factor * 0.6
            library_change = stress_factor * 1.0  # Studies more when stressed
            gym_decline = stress_factor * 0.15
        else:  # Stable engagement
            meals_decline = stress_factor * 0.2
            library_change = stress_factor * 0.3
            gym_decline = stress_factor * 0.05

        meals = max(0, meals_base - meals_decline + random.uniform(-0.3, 0.3))
        library_hours = max(0, library_base + library_change + random.uniform(-0.5, 0.5))
        gym_visit = 1 if (gym_base - gym_decline + random.uniform(-0.1, 0.1)) > 0.2 else 0

        # No gym on weekends for most students
        if day_of_week >= 5 and random.random() > 0.3:
            gym_visit = 0

        environmental_data.append({
            'student_id': student['student_id'],
            'date': date.strftime('%Y-%m-%d'),
            'meals_on_campus': round(meals, 1),
            'library_hours': round(library_hours, 1),
            'gym_visit': gym_visit,
            'campus_engagement_score': round((meals + library_hours + gym_visit) / 3, 2)
        })

environmental_df = pd.DataFrame(environmental_data)

## 5. Resources Database
resources = [
    {
        'resource_id': 'R001',
        'name': 'Engineering Tutoring Center',
        'type': 'Academic Support',
        'description': 'Free tutoring for engineering courses',
        'department': 'Engineering',
        'location': 'Randolph Hall 204',
        'contact': 'engtutor@vt.edu',
        'keywords': 'engineering, homework, tutoring, STEM'
    },
    {
        'resource_id': 'R002',
        'name': 'Cook Counseling Center',
        'type': 'Mental Health',
        'description': 'Professional counseling and mental health services',
        'department': 'Student Affairs',
        'location': 'McComas Hall',
        'contact': 'counseling@vt.edu',
        'keywords': 'mental health, stress, anxiety, counseling'
    },
    {
        'resource_id': 'R003',
        'name': 'Sleep & Wellness Workshop',
        'type': 'Wellness',
        'description': 'Weekly workshop on sleep hygiene and stress management',
        'department': 'Health Center',
        'location': 'Squires Student Center',
        'contact': 'wellness@vt.edu',
        'keywords': 'sleep, stress, wellness, self-care'
    },
    {
        'resource_id': 'R004',
        'name': 'Math Emporium',
        'type': 'Academic Support',
        'description': 'Open lab for mathematics assistance',
        'department': 'Mathematics',
        'location': 'University Mall',
        'contact': 'mathelp@vt.edu',
        'keywords': 'math, homework, calculus, algebra'
    },
    {
        'resource_id': 'R005',
        'name': 'Student Success Center',
        'type': 'Academic Support',
        'description': 'Study skills, time management, and academic coaching',
        'department': 'Student Affairs',
        'location': 'Newman Library',
        'contact': 'success@vt.edu',
        'keywords': 'study skills, time management, academic coaching'
    }
]

resources_df = pd.DataFrame(resources)

## 6. AI Interventions (Proactive Recommendations)
interventions = [
    {
        'intervention_id': 'I001',
        'student_id': 'S003',
        'date': '2024-02-28',
        'trigger_reason': 'Academic performance decline correlated with sleep deprivation and social isolation',
        'recommendation': 'Attend Sleep & Wellness Workshop and connect with Engineering Tutoring',
        'resources_recommended': 'R003, R001',
        'student_response': 'Viewed',
        'outcome': 'Pending'
    },
    {
        'intervention_id': 'I002',
        'student_id': 'S001',
        'date': '2024-03-15',
        'trigger_reason': 'Increased library hours with declining meal consistency suggests academic burnout',
        'recommendation': 'Schedule break times and meet with academic coach',
        'resources_recommended': 'R005',
        'student_response': 'Accepted',
        'outcome': 'Scheduled appointment'
    },
    {
        'intervention_id': 'I003',
        'student_id': 'S003',
        'date': '2024-03-22',
        'trigger_reason': 'Continued sleep pattern disruption affecting academic engagement',
        'recommendation': 'Connect with Cook Counseling Center for stress management',
        'resources_recommended': 'R002',
        'student_response': 'Declined',
        'outcome': 'Student preferred self-management'
    }
]

interventions_df = pd.DataFrame(interventions)

## Save all datasets to CSV files
students_df.to_csv('students.csv', index=False)
academic_df.to_csv('academic_data.csv', index=False)
wellbeing_df.to_csv('wellbeing_data.csv', index=False)
environmental_df.to_csv('environmental_data.csv', index=False)
resources_df.to_csv('resources.csv', index=False)
interventions_df.to_csv('interventions.csv', index=False)

print("✅ Synthetic dataset generated successfully!")
print(f"📊 Students: {len(students_df)}")
print(f"📚 Academic records: {len(academic_df)}")
print(f"😴 Well-being records: {len(wellbeing_df)}")
print(f"🏫 Environmental records: {len(environmental_df)}")
print(f"🛟 Resources: {len(resources_df)}")
print(f"💡 Interventions: {len(interventions_df)}")

####1. Real Predictive Model (Replace Rule-Based Detection)


In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class AIWellbeingPredictor:
    def __init__(self):
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.scaler = StandardScaler()
        self.feature_importance = None

    def prepare_features(self, academic_df, wellbeing_df, environmental_df):
        """Create ML-ready features from raw data"""
        # Aggregate weekly features for each student
        features = []

        for student_id in academic_df['student_id'].unique():
            student_academic = academic_df[academic_df['student_id'] == student_id]
            student_wellbeing = wellbeing_df[wellbeing_df['student_id'] == student_id]
            student_env = environmental_df[environmental_df['student_id'] == student_id]

            # Calculate trend features (what real AI would use)
            grade_trend = np.polyfit(range(len(student_academic)),
                                   student_academic['grade'], 1)[0]  # Slope
            sleep_consistency = student_wellbeing['sleep_duration'].std()
            engagement_trend = student_env['campus_engagement_score'].mean()

            # Create feature vector
            feature_vector = [
                grade_trend,  # Academic trend
                sleep_consistency,  # Sleep stability
                engagement_trend,  # Social engagement
                student_academic['grade'].mean(),  # Current performance
                student_wellbeing['sleep_duration'].mean(),  # Sleep quality
                student_env['library_hours'].mean(),  # Study habits
                len(student_academic[student_academic['submission_delay_days'] > 0])  # Late submissions
            ]

            features.append(feature_vector)

        return np.array(features)

    def create_labels(self, academic_df, threshold=70):
        """Create target labels (at-risk vs not-at-risk)"""
        labels = []
        for student_id in academic_df['student_id'].unique():
            student_grades = academic_df[academic_df['student_id'] == student_id]['grade']
            # Label as at-risk if final grade average below threshold
            is_at_risk = 1 if student_grades.mean() < threshold else 0
            labels.append(is_at_risk)
        return np.array(labels)

    def train(self, academic_df, wellbeing_df, environmental_df):
        """Train the AI model on student data"""
        X = self.prepare_features(academic_df, wellbeing_df, environmental_df)
        y = self.create_labels(academic_df)

        # Split and scale data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        X_train_scaled = self.scaler.fit_transform(X_train)

        # Train model
        self.model.fit(X_train_scaled, y_train)

        # Store feature importance for explainability
        self.feature_importance = dict(zip([
            'grade_trend', 'sleep_consistency', 'engagement_trend',
            'current_performance', 'sleep_quality', 'study_habits', 'late_submissions'
        ], self.model.feature_importances_))

        # Evaluate
        train_score = self.model.score(X_train_scaled, y_train)
        test_score = self.model.score(self.scaler.transform(X_test), y_test)

        print(f"✅ Model trained! Train accuracy: {train_score:.3f}, Test accuracy: {test_score:.3f}")
        return self

    def predict_risk(self, student_data):
        """Predict risk level and provide explanation"""
        features = self.prepare_features(*student_data)
        features_scaled = self.scaler.transform(features)

        predictions = self.model.predict(features_scaled)
        probabilities = self.model.predict_proba(features_scaled)

        return predictions, probabilities

    def explain_prediction(self, student_features):
        """AI-powered explanation of why a student is at risk"""
        # Get the most influential features for this student
        feature_names = ['grade_trend', 'sleep_consistency', 'engagement_trend',
                        'current_performance', 'sleep_quality', 'study_habits', 'late_submissions']

        contributions = []
        for i, importance in enumerate(self.feature_importance):
            if student_features[0][i] < -0.5:  # Negative trend threshold
                contributions.append(f"{feature_names[i].replace('_', ' ')} is declining")
            elif student_features[0][i] > 0.5:  # Positive trend
                contributions.append(f"{feature_names[i].replace('_', ' ')} is improving")

        return " and ".join(contributions) if contributions else "Patterns are within normal range"

2.1 Create the Causal Inference Engine (Simplified)


In [None]:
class HokieWellAgent:
    def __init__(self, resources_df):
        self.resources = resources_df
        self.pattern_responses = {
            'academic_burnout': {
                'message': "I notice you've been working really hard and your sleep has been affected. Balancing coursework can be challenging.",
                'resources': ['R001', 'R005'],  # Tutoring + Success Center
                'action': "Would you like me to schedule a tutoring session or a study skills workshop?"
            },
            'social_isolation': {
                'message': "It looks like you might be feeling a bit disconnected. Campus engagement can really help with overall well-being.",
                'resources': ['R003', 'R002'],  # Wellness + Counseling
                'action': "There's a wellness workshop this week, or we could explore some student clubs?"
            },
            'high_risk': {
                'message': "I'm seeing several areas where you might need some support. Your well-being is really important.",
                'resources': ['R002', 'R001', 'R003'],  # Counseling + Tutoring + Wellness
                'action': "I strongly recommend connecting with Cook Counseling Center. Would you like me to help you schedule an appointment?"
            }
        }

    def analyze_student(self, student_id):
        patterns = detect_student_patterns(student_id)
        student_name = students[students['student_id'] == student_id]['name'].values[0]

        if not patterns:
            return {
                'status': 'healthy',
                'message': f"Hi {student_name}! You're doing great. Keep up the good balance!"
            }

        # Use the most severe pattern
        primary_pattern = patterns[0]
        response = self.pattern_responses[primary_pattern]

        # Get resource details
        resource_details = self.resources[self.resources['resource_id'].isin(response['resources'])]

        return {
            'status': 'intervention_needed',
            'student_name': student_name,
            'pattern': primary_pattern,
            'message': response['message'],
            'action': response['action'],
            'resources': resource_details.to_dict('records')
        }

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

class CausalInferenceEngine:
    def __init__(self):
        self.causal_models = {}

    def infer_root_cause(self, student_data, historical_data):
        """Estimate likely causes using a supported model"""
        features = self.extract_causal_features(student_data)

        intervention_effects = {
            'tutoring': self.estimate_tutoring_effect(features),
            'counseling': self.estimate_counseling_effect(features),
            'sleep_intervention': self.estimate_sleep_effect(features)
        }

        best_intervention = max(intervention_effects.items(), key=lambda x: x[1])

        return {
            'likely_cause': self.identify_likely_cause(features),
            'recommended_intervention': best_intervention[0],
            'expected_impact': best_intervention[1],
            'confidence': 0.85
        }

    def estimate_tutoring_effect(self, features):
        return max(0.3, min(0.9, features['academic_deficit'] * 2))

    def estimate_counseling_effect(self, features):
        return max(0.2, min(0.8, features['stress_indicator'] * 1.5))

#Databricks powered AI Agent

In [None]:
%pip install databricks-sdk mlflow

Collecting databricks-sdk
  Downloading databricks_sdk-0.67.0-py3-none-any.whl.metadata (39 kB)
Collecting mlflow
  Downloading mlflow-3.4.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==3.4.0 (from mlflow)
  Downloading mlflow_skinny-3.4.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.4.0 (from mlflow)
  Downloading mlflow_tracing-3.4.0-py3-none-any.whl.metadata (19 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fastmcp<3,>=2.0.0 (from mlflow)
  Downloading fastmcp-2.12.4-py3-none-any.whl.metadata (19 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting opentelemetry-proto<3,>=1.9.0 (from mlflow-skinny==3.4.0->mlflow)
  Downloading opentelemetry_proto-1.37.0-py3-none-any.whl.metadata (2.3 kB)
Collecting cyclopts>=3.0.0

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
from databricks.sdk import WorkspaceClient
import mlflow
from mlflow.models import infer_signature
import warnings
warnings.filterwarnings('ignore')

class DatabricksHokieWellAgent:
    """
    Advanced AI Agent using Databricks SDK and MLflow
    """
    def __init__(self, agent_id="HokieWell_Agent_V1"):
        self.agent_id = agent_id
        self.workspace_client = self._initialize_databricks_client()
        self.model_registry = ModelRegistryManager()
        self.feature_store = FeatureStoreManager()
        self.mlflow_tracker = MLflowTracker()
        print(f"🚀 Databricks AI Agent {agent_id} initialized with enterprise capabilities")

    def _initialize_databricks_client(self):
        try:
            client = WorkspaceClient(
                host="https://dbc-your-workspace.cloud.databricks.com",
                token="your-databricks-token"
            )
            print("✅ Connected to Databricks workspace")
            return client
        except Exception as e:
            print(f"⚠️ Using simulated Databricks client: {e}")
            return SimulatedDatabricksClient()

    # Tool implementations as methods
    def analyze_academic_performance(self, student_data):
        with self.mlflow_tracker.start_run("academic_analysis"):
            features = self._extract_academic_features(student_data)
            try:
                model = mlflow.pyfunc.load_model("models:/academic_risk_predictor/production")
                risk_score = model.predict([features])[0]
            except:
                risk_score = self._calculate_academic_risk(features)
            trends = self._analyze_academic_trends(student_data)
            return {
                "risk_score": float(risk_score),
                "trend_direction": trends['direction'],
                "confidence": trends['confidence'],
                "key_insights": self._generate_academic_insights(student_data, risk_score),
                "model_version": "dbrx-1.0"
            }

    def assess_student_wellbeing(self, wellbeing_data):
        with self.mlflow_tracker.start_run("wellbeing_assessment"):
            dimensions = {
                'sleep_health': self._analyze_sleep_patterns(wellbeing_data),
                'stress_levels': self._assess_stress_indicators(wellbeing_data),
                'social_engagement': self._evaluate_social_metrics(wellbeing_data),
                'physical_activity': self._analyze_activity_patterns(wellbeing_data)
            }
            wellbeing_score = np.mean([d['score'] for d in dimensions.values()])
            return {
                "overall_score": float(wellbeing_score),
                "dimensions": dimensions,
                "risk_factors": self._identify_wellbeing_risks(dimensions),
                "recommendations": self._generate_wellbeing_recommendations(dimensions)
            }

    def plan_personalized_intervention(self, student_profile, risk_assessment):
        intervention_strategy = {
            "student_id": student_profile['student_id'],
            "intervention_id": f"INT_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            "risk_level": risk_assessment['overall_risk'],
            "primary_factors": risk_assessment['key_factors'],
            "planned_actions": [],
            "expected_outcomes": [],
            "success_metrics": []
        }
        optimal_actions = self._select_optimal_interventions(student_profile, risk_assessment)
        for action in optimal_actions:
            intervention_strategy['planned_actions'].append({
                "type": action['type'],
                "description": action['description'],
                "resources_needed": action['resources'],
                "timeline": action['timeline'],
                "confidence": action['confidence_score']
            })
        return intervention_strategy

    def recommend_university_resources(self, student_needs, preferences=None):
        resources = self._load_university_resources()
        matches = self._match_resources_to_needs(student_needs, resources)
        personalized_matches = self._personalize_recommendations(matches, preferences or {})
        return {
            "recommended_resources": personalized_matches,
            "matching_algorithm": "collaborative_filtering_v2",
            "confidence_scores": [match['match_score'] for match in personalized_matches]
        }

    def perform_causal_analysis(self, student_data, outcome_metric):
        try:
            causal_model = self._load_causal_model()
            analysis = causal_model.analyze(
                treatment_variables=['sleep_duration', 'study_hours', 'social_engagement'],
                outcome_variable=outcome_metric,
                data=student_data
            )
            return {
                "causal_factors": analysis['significant_treatments'],
                "effect_sizes": analysis['effect_sizes'],
                "confidence_intervals": analysis['confidence_intervals'],
                "recommended_interventions": analysis['suggested_actions']
            }
        except Exception as e:
            return self._fallback_causal_analysis(student_data, outcome_metric)

    def run_holistic_analysis(self, student_id, data_sources):
        print(f"🔍 Databricks Agent running holistic analysis for {student_id}")
        with mlflow.start_run(run_name=f"student_analysis_{student_id}"):
            academic_analysis = self.analyze_academic_performance(data_sources['academic'])
            wellbeing_assessment = self.assess_student_wellbeing(data_sources['wellbeing'])
            environmental_analysis = self._analyze_environmental_factors(data_sources['environmental'])
            causal_analysis = self.perform_causal_analysis(
                self._integrate_data_sources(data_sources),
                outcome_metric='academic_performance'
            )
            risk_profile = self._assess_overall_risk(
                academic_analysis,
                wellbeing_assessment,
                environmental_analysis
            )
            intervention_plan = self.plan_personalized_intervention(
                {'student_id': student_id},
                risk_profile
            )
            mlflow.log_metric("overall_risk_score", risk_profile['overall_risk'])
            mlflow.log_params({
                "student_id": student_id,
                "analysis_timestamp": datetime.now().isoformat()
            })
            return {
                "agent_id": self.agent_id,
                "student_id": student_id,
                "analysis_timestamp": datetime.now().isoformat(),
                "academic_analysis": academic_analysis,
                "wellbeing_assessment": wellbeing_assessment,
                "causal_analysis": causal_analysis,
                "risk_profile": risk_profile,
                "intervention_plan": intervention_plan,
                "databricks_features_used": [
                    "mlflow_tracking",
                    "model_registry",
                    "causal_ml",
                    "feature_store"
                ]
            }

# The rest of your management classes remain unchanged
class ModelRegistryManager:
    def __init__(self):
        self.registered_models = {
            'academic_risk_predictor': 'v3_production',
            'wellbeing_assessor': 'v2_staging',
            'causal_inference_engine': 'v1_experimental'
        }
    def load_model(self, model_name, version="latest"):
        try:
            return f"loaded_model_{model_name}_{version}"
        except Exception as e:
            print(f"Model loading failed: {e}")
            return self._load_fallback_model(model_name)

class FeatureStoreManager:
    def __init__(self):
        self.feature_tables = {
            'student_academic_features': 'hokiewell.student_academic_features',
            'student_wellbeing_features': 'hokiewell.student_wellbeing_features',
            'intervention_outcomes': 'hokiewell.intervention_outcomes'
        }
    def get_features(self, table_name, student_id):
        return {
            'feature_vector': [0.1, 0.5, 0.3],
            'feature_names': ['grade_trend', 'sleep_consistency', 'engagement_level'],
            'timestamp': datetime.now().isoformat()
        }

class MLflowTracker:
    def __init__(self):
        self.active_runs = {}
    def start_run(self, run_name):
        return MLflowRunContext(run_name)

class MLflowRunContext:
    def __init__(self, run_name):
        self.run_name = run_name
    def __enter__(self):
        print(f"📊 MLflow tracking started: {self.run_name}")
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        print(f"📊 MLflow tracking completed: {self.run_name}")

class SimulatedDatabricksClient:
    def __init__(self):
        self.simulated = True
    def query(self, sql):
        return {"rows": [], "simulated": True}

#Gardio Interface

In [None]:
!pip install gradio



In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime
import json

class GradioHokieWellApp:
    def __init__(self):
        self.agent = SimulatedDatabricksAgent()
        self.load_data()

    def load_data(self):
        """Load the synthetic dataset"""
        try:
            self.students = pd.read_csv('students.csv')
            self.academic = pd.read_csv('academic_data.csv')
            self.wellbeing = pd.read_csv('wellbeing_data.csv')
            self.environmental = pd.read_csv('environmental_data.csv')
            self.resources = pd.read_csv('resources.csv')
            print("✅ Data loaded successfully")
        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if files don't exist
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])

    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student"""
        student_data = self.get_student_data(student_id)
        return self.agent.run_holistic_analysis(student_id, student_data)

    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20, 'color': 'black'}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=50, b=20),
            font=dict(color='black')  # Set all text to black
        )
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic'):
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id]
        if student_academic.empty:
            return None

        student_academic = student_academic.sort_values('due_date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False,
            font=dict(color='black'),  # Black text for chart
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing'):
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id]
        if student_wellbeing.empty:
            return None

        student_wellbeing = student_wellbeing.sort_values('date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True,
            font=dict(color='black'),  # Black text for chart
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def format_analysis_results(self, analysis_result):
        """Format analysis results for display with BLACK TEXT"""
        academic = analysis_result['academic_analysis']
        causal = analysis_result['causal_analysis']
        plan = analysis_result['intervention_plan']

        # Academic insights - ALL BLACK TEXT
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>📚 Academic Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Score:</strong> <span style='color: black;'>{academic['risk_score']:.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Trend:</strong> <span style='color: black;'>{academic['trend_direction'].title()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic['key_insights']:
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis - ALL BLACK TEXT
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🔍 Root Cause Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal['causal_factors']:
            effect = causal['effect_sizes'].get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan - ALL BLACK TEXT
        risk_level_color = "red" if plan["risk_level"] == "high" else "orange" if plan["risk_level"] == "medium" else "green"
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🎯 Intervention Plan</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Level:</strong> <span style='color: {risk_level_color}; font-weight: bold;'>{plan["risk_level"].upper()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan['planned_actions']:
            plan_html += f"""
            <li style='color: black; margin-bottom: 10px;'>
                <strong style='color: black;'>{action['type'].replace('_', ' ').title()}:</strong><br>
                <span style='color: black;'>{action['description']}</span><br>
                <em style='color: black;'>Confidence: {action['confidence']:.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        analysis = self.analyze_student(student_id)
        risk_factors = analysis['causal_analysis']['causal_factors']

        recommendations = []
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors:
            recommendations.append({
                'resource': 'Engineering Tutoring Center',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })

        # Default recommendations
        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def create_interface(self):
        """Create the Gradio interface with BLACK TEXT styling"""
        with gr.Blocks(theme=gr.themes.Soft(), title="SchoolDaddy", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 SchoolDaddy
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith",
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

                    with gr.Tab("🤖 Agent Details"):
                        agent_info = gr.JSON(label="Raw Analysis Data")

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output, agent_info]
            )

            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result['academic_analysis']['risk_score'])

        # Text outputs - Ensure black text
        risk_score = f"{analysis_result['academic_analysis']['risk_score']:.2f}"
        trend_direction = analysis_result['academic_analysis']['trend_direction'].title()
        primary_factor = analysis_result['causal_analysis']['causal_factors'][0].replace('_', ' ').title() if analysis_result['causal_analysis']['causal_factors'] else "No significant factors"

        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations(student_id)

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html, analysis_result

    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        return self.update_student_charts(student_selection)

    def format_resource_recommendations(self, student_id):
        """Format resource recommendations as HTML with BLACK TEXT"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🛟 Personalized Resource Recommendations</h3>"

        for rec in recommendations:
            html += f"""
            <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50; color: black;'>
                <h4 style='color: black;'>{rec['resource']} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec['match']:.0%} match</span></h4>
                <p style='color: black;'>{rec['reason']}</p>
            </div>
            """

        html += "</div>"
        return html

# Simulated Databricks Agent
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # Student-specific risk profiles
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

# Launch the application
def launch_gradio_app():
    """Launch the Gradio interface"""
    app = GradioHokieWellApp()
    demo = app.create_interface()

    # Launch with custom options
    demo.launch(
        server_name="0.0.0.0",
        server_port=7867,
        share=True,
        debug=True,
        show_error=True
    )

if __name__ == "__main__":
    launch_gradio_app()

✅ Data loaded successfully
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://66f5bb5554704c92ef.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7867 <> https://66f5bb5554704c92ef.gradio.live


#copy of above code with flexible quering

In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime
import json
from groq import Groq # Import Groq client
import os # Import os module to read environment variables

# Define the AgenticQueryProcessor class here
class AgenticQueryProcessor:
    def __init__(self, student_data_getter, analysis_agent):
        self.student_data_getter = student_data_getter
        self.analysis_agent = analysis_agent
        # Initialize Groq client
        groq_api_key = None
        try:
            # Attempt to get API key from Colab secrets if in Colab environment
            from google.colab import userdata
            groq_api_key = userdata.get('GROQ_API_KEY')
            print("Attempting to get API key from Colab secrets.")
        except ImportError:
            # Fallback to environment variable if not in Colab
            groq_api_key = os.getenv('GROQ_API_KEY')
            print("google.colab not found, attempting to get API key from environment variable.")

        if groq_api_key:
            try:
                self.groq_client = Groq(api_key=groq_api_key)
                print("✅ Groq client initialized.")
            except Exception as e:
                print(f"⚠️ Groq client initialization failed: {e}. Please ensure your API key is correct.")
                self.groq_client = None # Set client to None if initialization fails
        else:
            print("⚠️ GROQ_API_KEY not found in Colab secrets or environment variables. Groq LLM is disabled.")
            self.groq_client = None


    def process(self, student_id, query):
        """
        Processes a user query using a simulated agentic approach,
        enhanced by a Groq LLM for query understanding and response generation.
        """
        print(f"Agentic processor received query for {student_id}: '{query}'")

        student_data = self.student_data_getter(student_id)
        analysis_result = self.analysis_agent.run_holistic_analysis(student_id, student_data)

        response_parts = []

        # Use Groq to understand the query and potentially generate a more nuanced response
        llm_response = None
        if self.groq_client:
            try:
                # Prepare context for the LLM
                context = f"""
                Analyze the following student data and insights:
                Academic Risk Score: {analysis_result.get('academic_analysis', {}).get('risk_score', 'N/A'):.2f}
                Academic Trend: {analysis_result.get('academic_analysis', {}).get('trend_direction', 'N/A').title()}
                Key Academic Insights: {', '.join(analysis_result.get('academic_analysis', {}).get('key_insights', []))}
                Wellbeing Score: {analysis_result.get('wellbeing_assessment', {}).get('overall_score', 'N/A'):.2f}
                Sleep Health Trend: {analysis_result.get('wellbeing_assessment', {}).get('dimensions', {}).get('sleep_health', {}).get('trend', 'N/A')}
                Stress Levels Trend: {analysis_result.get('wellbeing_assessment', {}).get('dimensions', {}).get('stress_levels', {}).get('trend', 'N/A')}
                Identified Causal Factors: {', '.join(analysis_result.get('causal_analysis', {}).get('causal_factors', []))}
                Recommended Actions: {'; '.join([f"{a.get('type', 'N/A')}: {a.get('description', 'N/A')}" for a in analysis_result.get('intervention_plan', {}).get('planned_actions', [])])}

                Based on this information, answer the following question about student {student_id}: "{query}"

                Keep the answer concise and focused on the student's data and potential actions.
                """

                chat_completion = self.groq_client.chat.completions.create(
                    messages=[
                        {
                            "role": "system",
                            "content": "You are an AI assistant analyzing student data to answer questions and suggest actions."
                        },
                        {
                            "role": "user",
                            "content": context,
                        }
                    ],
                    model="llama3-8b-8192", # You can experiment with other Groq models
                    temperature=0.5,
                    max_tokens=250,
                )
                llm_response = chat_completion.choices[0].message.content
                response_parts.append(llm_response)

            except Exception as e:
                response_parts.append(f"⚠️ Error processing query with LLM: {e}. Falling back to keyword analysis.")
                self.groq_client = None # Disable LLM if it fails

        # Fallback to simulated reasoning if LLM is not available or failed
        if not response_parts or "Falling back" in response_parts[0]:
             response_parts = [] # Clear potential error message

             # Simulate agentic reasoning and response generation based on keywords
             query_lower = query.lower()

             if "academic" in query_lower or "grade" in query_lower or "performance" in query_lower or "study" in query_lower:
                 academic_analysis = analysis_result.get('academic_analysis', {})
                 if academic_analysis:
                     response_parts.append(f"Regarding academic performance for {student_id}:")
                     response_parts.append(f"- Risk Score: {academic_analysis.get('risk_score', 'N/A'):.2f}")
                     response_parts.append(f"- Trend: {academic_analysis.get('trend_direction', 'N/A').title()}")
                     response_parts.append("- Key Insights:")
                     for insight in academic_analysis.get('key_insights', []):
                         response_parts.append(f"  - {insight}")
                 else:
                     response_parts.append(f"Could not retrieve academic analysis for {student_id}.")


             if "wellbeing" in query_lower or "sleep" in query_lower or "stress" in query_lower or "health" in query_lower:
                  wellbeing_assessment = analysis_result.get('wellbeing_assessment', {})
                  if wellbeing_assessment:
                      response_parts.append(f"Regarding wellbeing for {student_id}:")
                      response_parts.append(f"- Overall Wellbeing Score: {wellbeing_assessment.get('overall_score', 'N/A'):.2f}")
                      sleep_health = wellbeing_assessment.get('dimensions', {}).get('sleep_health', {})
                      response_parts.append(f"- Sleep Health Score: {sleep_health.get('score', 'N/A'):.2f}, Trend: {sleep_health.get('trend', 'N/A')}")
                      stress_levels = wellbeing_assessment.get('dimensions', {}).get('stress_levels', {})
                      response_parts.append(f"- Stress Levels Score: {stress_levels.get('score', 'N/A'):.2f}, Trend: {stress_levels.get('trend', 'N/A')}")
                  else:
                      response_parts.append(f"Could not retrieve wellbeing assessment for {student_id}.")


             if "recommendations" in query_lower or "actions" in query_lower or "support" in query_lower or "resource" in query_lower:
                 intervention_plan = analysis_result.get('intervention_plan', {})
                 if intervention_plan:
                     response_parts.append(f"Recommended actions and resources for {student_id}:")
                     response_parts.append(f"- Overall Risk Level: {intervention_plan.get('risk_level', 'N/A').upper()}")
                     response_parts.append("- Planned Actions:")
                     for action in intervention_plan.get('planned_actions', []):
                          response_parts.append(f"  - Type: {action.get('type', 'N/A').replace('_', ' ').title()}")
                          response_parts.append(f"    Description: {action.get('description', 'N/A')}")
                          response_parts.append(f"    Confidence: {action.get('confidence', 0):.0%}")
                 else:
                      response_parts.append(f"Could not retrieve intervention plan for {student_id}.")


             if "pattern" in query_lower or "why" in query_lower or "cause" in query_lower or "factors" in query_lower:
                  causal_analysis = analysis_result.get('causal_analysis', {})
                  if causal_analysis:
                      response_parts.append(f"Likely causal factors for observed patterns in {student_id}:")
                      for factor in causal_analysis.get('causal_factors', []):
                           effect = causal_analysis.get('effect_sizes', {}).get(factor, 0)
                           response_parts.append(f"- {factor.replace('_', ' ').title()} (estimated effect size: {effect:.3f})")
                  else:
                      response_parts.append(f"Could not retrieve causal analysis for {student_id}.")

             if not response_parts:
                  response_parts.append(f"I am processing your query about {student_id}. Please run the main analysis for detailed insights or try a different question.")


        return "\n\n".join(response_parts)


class GradioHokieWellApp:
    def __init__(self):
        self.agent = SimulatedDatabricksAgent()
        self.load_data()
        self.query_processor = AgenticQueryProcessor(self.get_student_data, self.agent) # Instantiate the processor

    def load_data(self):
        """Load the synthetic dataset"""
        try:
            self.students = pd.read_csv('students.csv')
            self.academic = pd.read_csv('academic_data.csv')
            self.wellbeing = pd.read_csv('wellbeing_data.csv')
            self.environmental = pd.read_csv('environmental_data.csv')
            self.resources = pd.read_csv('resources.csv')
            print("✅ Data loaded successfully")
        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if files don't exist
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])

    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student"""
        student_data = self.get_student_data(student_id)
        return self.agent.run_holistic_analysis(student_id, student_data)

    def process_query(self, student_id, query):
        """Process user query and generate response using the AgenticQueryProcessor"""
        return self.query_processor.process(student_id, query)


    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(height=300, margin=dict(l=20, r=20, t=50, b=20))
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic'):
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id]
        if student_academic.empty:
            return None

        student_academic = student_academic.sort_values('due_date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing'):
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id]
        if student_wellbeing.empty:
            return None

        student_wellbeing = student_wellbeing.sort_values('date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True
        )

        return fig

    def format_analysis_results(self, analysis_result):
        """Format analysis results for display"""
        academic = analysis_result['academic_analysis']
        causal = analysis_result['causal_analysis']
        plan = analysis_result['intervention_plan']

        # Academic insights
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0; color: black;'>
            <h3 style='color: black;'>📚 Academic Analysis</h3>
            <p style='color: black;'><strong>Risk Score:</strong> {academic['risk_score']:.2f}</p>
            <p style='color: black;'><strong>Trend:</strong> {academic['trend_direction'].title()}</p>
            <p style='color: black;'><strong>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic['key_insights']:
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0; color: black;'>
            <h3 style='color: black;'>🔍 Root Cause Analysis</h3>
            <p style='color: black;'><strong>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal['causal_factors']:
            effect = causal['effect_sizes'].get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0; color: black;'>
            <h3 style='color: black;'>🎯 Intervention Plan</h3>
            <p style='color: black;'><strong>Risk Level:</strong> <span style='color: {"red" if plan["risk_level"] == "high" else "orange" if plan["risk_level"] == "medium" else "green"}'>{plan["risk_level"].upper()}</span></p>
            <p style='color: black;'><strong>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan['planned_actions']:
            plan_html += f"""
            <li style='color: black;'>
                <strong>{action['type'].replace('_', ' ').title()}:</strong><br>
                {action['description']}<br>
                <em>Confidence: {action['confidence']:.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        analysis = self.analyze_student(student_id)
        risk_factors = analysis['causal_analysis']['causal_factors']

        recommendations = []
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors:
            recommendations.append({
                'resource': 'Engineering Tutoring Center',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })

        # Default recommendations
        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def create_interface(self):
        """Create the Gradio interface"""
        with gr.Blocks(theme=gr.themes.Soft(), title="SchoolDaddy") as demo:
            gr.Markdown(
                """
                # 🎓 SchoolDaddy
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith"
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary")
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats")
                    risk_score = gr.Textbox(label="Risk Score", interactive=False)
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False)
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False)

                with gr.Column(scale=2):
                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results")

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations")

                    with gr.Tab("🤖 Agent Details"):
                        agent_info = gr.JSON(label="Raw Analysis Data")

                    with gr.Tab("❓ Ask the Agent"):
                        query_input = gr.Textbox(label="Ask a question about the student", placeholder="e.g., How is their academic performance trending?", lines=2)
                        query_button = gr.Button("Ask")
                        query_output = gr.Textbox(label="Agent Response", interactive=False, lines=5)

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output, agent_info]
            )

            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            query_button.click(
                fn=self.run_query_analysis,
                inputs=[student_dropdown, query_input],
                outputs=[query_output]
            )


            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result['academic_analysis']['risk_score'])

        # Text outputs
        risk_score = f"{analysis_result['academic_analysis']['risk_score']:.2f}"
        trend_direction = analysis_result['academic_analysis']['trend_direction'].title()
        primary_factor = analysis_result['causal_analysis']['causal_factors'][0].replace('_', ' ').title() if analysis_result['causal_analysis']['causal_factors'] else "No significant factors"

        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations(student_id)

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html, analysis_result

    def run_query_analysis(self, student_selection, query):
        """Run analysis based on user query"""
        student_id = student_selection.split(' - ')[0]
        return self.query_processor.process(student_id, query)


    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        return self.update_student_charts(student_selection)

    def format_resource_recommendations(self, student_id):
        """Format resource recommendations as HTML"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px;'>"
        html += "<h3>🛟 Personalized Resource Recommendations</h3>"

        for rec in recommendations:
            html += f"""
            <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50;'>
                <h4>{rec['resource']} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec['match']:.0%} match</span></h4>
                <p>{rec['reason']}</p>
            </div>
            """

        html += "</div>"
        return html

# Simulated Databricks Agent (same as before)
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # Student-specific risk profiles
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }


# Launch the application
def launch_gradio_app():
    """Launch the Gradio interface"""
    app = GradioHokieWellApp()
    demo = app.create_interface()

    # Launch with custom options
    demo.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7867,        # Default Gradio port
        share=True,              # Create public link
        debug=True,              # Show errors
        show_error=True
    )

if __name__ == "__main__":
    launch_gradio_app()

ModuleNotFoundError: No module named 'groq'

In [None]:
%pip install groq

Collecting groq
  Downloading groq-0.32.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.32.0-py3-none-any.whl (135 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.32.0


#instead of Groq llm using databrick foundational model

#####...final 01

In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime
import json

class DatabricksAIAgent:
    """Use intelligent response generation without external APIs"""

    def __init__(self):
        print("✅ Databricks AI Agent initialized")

    def get_enhanced_response(self, user_query, context_data):
        """Get enhanced response using intelligent pattern matching"""
        try:
            return self._generate_intelligent_response(user_query, context_data)
        except Exception as e:
            return self._get_smart_fallback(user_query, context_data)

    def _generate_intelligent_response(self, user_query, context_data):
        """Generate intelligent, context-aware responses"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        query_lower = user_query.lower()

        # Study-related questions
        if any(word in query_lower for word in ['study', 'studying', 'homework', 'assignments', 'learn', 'academic']):
            return self._get_study_analysis(user_query, context_data)

        # Sleep-related questions
        elif any(word in query_lower for word in ['sleep', 'rest', 'tired', 'fatigue', 'energy', 'bed']):
            return self._get_sleep_analysis(user_query, context_data)

        # Stress-related questions
        elif any(word in query_lower for word in ['stress', 'overwhelm', 'pressure', 'anxiety', 'worry', 'burnout']):
            return self._get_stress_analysis(user_query, context_data)

        # Social-related questions
        elif any(word in query_lower for word in ['social', 'friends', 'lonely', 'isolated', 'community', 'friendship']):
            return self._get_social_analysis(user_query, context_data)

        # General analysis questions
        elif any(word in query_lower for word in ['how', 'what', 'why', 'explain', 'tell me', 'analyze']):
            return self._get_general_analysis(user_query, context_data)

        # Resource questions
        elif any(word in query_lower for word in ['resource', 'help', 'support', 'recommend', 'suggest', 'advice']):
            return self._get_resource_analysis(user_query, context_data)

        # Default intelligent response
        else:
            return self._get_comprehensive_analysis(user_query, context_data)

    def _get_study_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**📚 Detailed Study Analysis for {student_id}**

**Current Study Patterns:**
Based on the academic data, {student_id}'s study habits show {['concerning patterns requiring immediate attention', 'areas for significant improvement', 'some opportunities for optimization'][min(2, int(risk_score//0.3))]}.

**Key Findings:**
- **Study Consistency**: {['Highly irregular patterns detected', 'Inconsistent study sessions', 'Generally stable routine'][min(2, int(risk_score//0.3))]}
- **Learning Efficiency**: {['Significantly impacted by external factors', 'Moderately affected', 'Reasonably effective'][min(2, int(risk_score//0.3))]}
- **Time Management**: {['Major challenges with scheduling', 'Some difficulties in planning', 'Adequate time allocation'][min(2, int(risk_score//0.3))]}

**Specific Issues Identified:**
- Assignment submission patterns suggest {['last-minute cramming', 'rushed completion', 'planned approach'][min(2, int(risk_score//0.3))]}
- Grade trends indicate {['conceptual understanding gaps', 'inconsistent preparation', 'steady comprehension'][min(2, int(risk_score//0.3))]}
- Engagement data shows {['declining participation', 'variable involvement', 'consistent engagement'][min(2, int(risk_score//0.3))]}

**Recommendations:**
1. **Structured Study Plan**: 2-hour focused blocks with 15-minute breaks
2. **Active Learning Techniques**: Practice testing and self-explanation
3. **Consistent Schedule**: Same study times daily for routine building
4. **Distributed Practice**: Shorter, frequent sessions over cramming

**Immediate Actions:**
- Schedule academic coaching session
- Implement weekly study planning
- Join peer study groups for accountability
"""

    def _get_sleep_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**😴 Comprehensive Sleep Analysis for {student_id}**

**Sleep Health Assessment:**
The data indicates {['critical sleep deprivation affecting multiple areas', 'significant sleep issues impacting wellbeing', 'moderate sleep concerns', 'generally adequate sleep patterns'][min(3, int(risk_score//0.25))]}.

**Impact Analysis:**
- **Cognitive Function**: Sleep quality affects {['memory consolidation, focus, and academic performance', 'learning efficiency and information retention', 'daily energy levels'][min(2, int(risk_score//0.3))]}
- **Emotional Regulation**: {['Significant impact on stress management and mood', 'Moderate effect on emotional stability', 'Minor influence on daily temperament'][min(2, int(risk_score//0.3))]}
- **Academic Correlation**: Research shows sleep deprivation can reduce academic performance by {['30-40%', '20-30%', '10-20%'][min(2, int(risk_score//0.3))]}

**Recommended Interventions:**
1. **Sleep Schedule**: Consistent 7-8 hour nightly target
2. **Environment Optimization**: Cool, dark, quiet sleeping space
3. **Digital Detox**: No screens 1 hour before bedtime
4. **Relaxation Routine**: Reading, meditation, or light stretching

**University Resources:**
- Sleep & Wellness Workshop (Weekly sessions)
- Counseling Center sleep resources
- Peer wellness coaching
"""

    def _get_stress_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**😥 Stress and Wellbeing Analysis for {student_id}**

**Stress Level Assessment:**
Current data shows {['critical stress levels requiring immediate support', 'elevated stress needing proactive management', 'moderate stress with improvement opportunities', 'generally manageable stress levels'][min(3, int(risk_score//0.25))]}.

**Primary Stressors Identified:**
{chr(10).join(['- ' + factor.replace('_', ' ').title() for factor in factors])}

**Stress Impact Chain:**
1. Academic pressure → Sleep disruption → Reduced coping capacity
2. Social withdrawal → Increased perceived burden → Decreased motivation

**Management Strategies:**
- **Immediate**: 5-4-3-2-1 grounding technique, box breathing
- **Short-term**: Time blocking, priority matrix, boundary setting
- **Long-term**: Regular exercise, social connection, mindfulness

**Support Recommendations:**
1. Counseling Center appointment
2. Stress management workshop
3. Mindfulness and meditation resources
"""

    def _get_social_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**👥 Social Connection Analysis for {student_id}**

**Social Wellbeing Assessment:**
The data suggests {['significant social isolation requiring intervention', 'notable social connection challenges', 'moderate opportunities for social engagement', 'generally healthy social patterns'][min(3, int(risk_score//0.25))]}.

**Connection-Building Strategies:**
1. **Structured Opportunities**: Club meetings, study groups, campus events
2. **Low-Pressure Interactions**: Coffee chats, interest-based activities
3. **Support Systems**: Peer mentoring, faculty office hours

**Recommended Campus Resources:**
- Student Organizations Fair
- Peer Connection Program
- Community Engagement Office
"""

    def _get_general_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**🔍 Comprehensive Analysis for {student_id}**

**Overall Assessment:**
{student_id} shows a {risk_score:.2f} risk level, indicating {['significant challenges requiring proactive support', 'moderate concerns needing attention', 'generally positive patterns with minor enhancements'][min(2, int(risk_score//0.3))]}.

**Key Factors Identified:**
{chr(10).join(['- ' + factor.replace('_', ' ').title() for factor in factors])}

**Pattern Analysis:**
The data reveals interconnected challenges where each factor influences the others. Addressing the primary issues can create positive ripple effects across all areas.

**Recommended Approach:**
1. Start with the most impactful interventions
2. Monitor progress through regular check-ins
3. Adjust strategies based on response and feedback
"""

    def _get_resource_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**🛟 Resource Recommendations for {student_id}**

**Personalized Support Plan:**
Based on the specific challenges identified, these resources are tailored to address the root causes:

**Recommended Interventions:**
{chr(10).join(['- ' + action for action in actions])}

**Implementation Strategy:**
1. **Immediate Action**: {actions[0] if actions else 'Academic consultation'}
2. **Short-term Follow-up**: Regular support sessions
3. **Long-term Support**: Ongoing monitoring and adjustment

**Expected Outcomes:**
- Improvement in key challenge areas within 4-6 weeks
- Enhanced coping skills and resilience
- Sustainable academic success habits
"""

    def _get_comprehensive_analysis(self, user_query, context_data):
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'Student')

        return f"""
**🤖 AI Analysis for {student_id}**

**Response to: "{user_query}"**

Based on my comprehensive analysis of {student_id}'s data, here are the key insights:

**Current Situation:**
- **Risk Level**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern)
- **Primary Challenges**: {', '.join([f.replace('_', ' ').title() for f in factors])}
- **Overall Trend**: {context_data.get('trend', 'stable').title()}

**Detailed Assessment:**
The data indicates that {student_id} is experiencing a pattern where {factors[0] if factors else 'academic pressures'} are contributing to {factors[1] if len(factors) > 1 else 'overall challenges'}. This creates a cycle that affects multiple areas of academic and personal wellbeing.

**Evidence-Based Recommendations:**
{chr(10).join(['• ' + action for action in actions][:3])}

**Next Steps:**
I recommend discussing these findings with {student_id} and developing a collaborative action plan. The university's support systems are well-equipped to help address these challenges.

*Analysis generated using advanced pattern recognition and educational research principles.*
"""

    def _get_smart_fallback(self, user_query, context_data):
        return self._get_comprehensive_analysis(user_query, context_data)

class GradioHokieWellApp:
    def __init__(self):
        self.agent = SimulatedDatabricksAgent()
        self.ai_agent = DatabricksAIAgent()
        self.load_data()

    def load_data(self):
        """Load the synthetic dataset"""
        try:
            self.students = pd.read_csv('students.csv')
            self.academic = pd.read_csv('academic_data.csv')
            self.wellbeing = pd.read_csv('wellbeing_data.csv')
            self.environmental = pd.read_csv('environmental_data.csv')
            self.resources = pd.read_csv('resources.csv')
            print("✅ Data loaded successfully")
        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])

    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student"""
        student_data = self.get_student_data(student_id)
        return self.agent.run_holistic_analysis(student_id, student_data)

    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20, 'color': 'black'}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=50, b=20),
            font=dict(color='black')
        )
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic'):
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id]
        if student_academic.empty:
            return None

        student_academic = student_academic.sort_values('due_date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing'):
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id]
        if student_wellbeing.empty:
            return None

        student_wellbeing = student_wellbeing.sort_values('date')
        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def format_analysis_results(self, analysis_result):
        """Format analysis results for display with BLACK TEXT"""
        academic = analysis_result['academic_analysis']
        causal = analysis_result['causal_analysis']
        plan = analysis_result['intervention_plan']

        # Academic insights - ALL BLACK TEXT
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>📚 Academic Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Score:</strong> <span style='color: black;'>{academic['risk_score']:.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Trend:</strong> <span style='color: black;'>{academic['trend_direction'].title()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic['key_insights']:
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis - ALL BLACK TEXT
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🔍 Root Cause Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal['causal_factors']:
            effect = causal['effect_sizes'].get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan - ALL BLACK TEXT
        risk_level_color = "red" if plan["risk_level"] == "high" else "orange" if plan["risk_level"] == "medium" else "green"
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🎯 Intervention Plan</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Level:</strong> <span style='color: {risk_level_color}; font-weight: bold;'>{plan["risk_level"].upper()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan['planned_actions']:
            plan_html += f"""
            <li style='color: black; margin-bottom: 10px;'>
                <strong style='color: black;'>{action['type'].replace('_', ' ').title()}:</strong><br>
                <span style='color: black;'>{action['description']}</span><br>
                <em style='color: black;'>Confidence: {action['confidence']:.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        analysis = self.analyze_student(student_id)
        risk_factors = analysis['causal_analysis']['causal_factors']

        recommendations = []
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors:
            recommendations.append({
                'resource': 'Engineering Tutoring Center',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })

        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def handle_user_query(self, user_query, student_selection):
        """Handle natural language queries with intelligent responses"""
        if not user_query.strip():
            return "Please enter a question about the student's analysis."

        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Prepare context for AI agent
        academic = analysis_result['academic_analysis']
        causal = analysis_result['causal_analysis']
        plan = analysis_result['intervention_plan']

        context_data = {
            'risk_score': academic['risk_score'],
            'trend': academic['trend_direction'],
            'factors': causal['causal_factors'],
            'actions': [action['description'] for action in plan['planned_actions']],
            'risk_level': plan['risk_level'],
            'student_id': student_id
        }

        # Get ENHANCED response from AI agent
        enhanced_response = self.ai_agent.get_enhanced_response(user_query, context_data)

        return f"""
        <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
            <h4 style='color: black; margin-top: 0;'>💬 AI Response to: "{user_query}"</h4>
            <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                {enhanced_response}
            </div>
            <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                <small style='color: #666;'>
                    <strong>Analysis Context:</strong> Student {student_id} | Risk Score: {academic['risk_score']:.2f} | Primary Factors: {', '.join(causal['causal_factors'])}
                </small>
            </div>
        </div>
        """

    def create_interface(self):
        """Create the Gradio interface with AGENT RESPONSE tab"""
        with gr.Blocks(theme=gr.themes.Soft(), title="SchoolDaddy", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 SchoolDaddy
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith",
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("🤖 Agent Response"):
                        gr.Markdown("### 💬 Ask Anything About the Student")
                        gr.Markdown("""
                        **Example questions to try:**
                        - "How is he studying?"
                        - "Explain the sleep issues"
                        - "What causes the stress?"
                        - "Why are grades declining?"
                        - "What interventions would help?"
                        """)

                        user_query = gr.Textbox(
                            label="Enter your question about the student:",
                            placeholder="Type your question here...",
                            lines=3,
                            elem_classes=["black-text"]
                        )

                        ask_btn = gr.Button("🎯 Get AI Analysis", variant="primary")
                        agent_response = gr.HTML(label="AI Agent Response", elem_classes=["black-text"])

                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output]
            )

            ask_btn.click(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            user_query.submit(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result['academic_analysis']['risk_score'])

        # Text outputs
        risk_score = f"{analysis_result['academic_analysis']['risk_score']:.2f}"
        trend_direction = analysis_result['academic_analysis']['trend_direction'].title()
        primary_factor = analysis_result['causal_analysis']['causal_factors'][0].replace('_', ' ').title() if analysis_result['causal_analysis']['causal_factors'] else "No significant factors"

        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations_html(student_id)

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html

    def format_resource_recommendations_html(self, student_id):
        """Format resource recommendations as HTML with BLACK TEXT"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🛟 Personalized Resource Recommendations</h3>"

        for rec in recommendations:
            html += f"""
            <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50; color: black;'>
                <h4 style='color: black;'>{rec['resource']} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec['match']:.0%} match</span></h4>
                <p style='color: black;'>{rec['reason']}</p>
            </div>
            """

        html += "</div>"
        return html

    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        return self.update_student_charts(student_selection)

# Simulated Databricks Agent (keep your existing one)
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"

    def run_holistic_analysis(self, student_id, data_sources):
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

def launch_gradio_app():
    """Launch the Gradio interface"""
    app = GradioHokieWellApp()
    demo = app.create_interface()

    demo.launch(
        server_name="0.0.0.0",
        server_port=7881,
        share=True,
        debug=True,
        show_error=True
    )

if __name__ == "__main__":
    launch_gradio_app()

✅ Databricks AI Agent initialized
✅ Data loaded successfully
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://edb31307938eb0c1a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


## Databricks AI Capabilities in SchoolDaddy (Simulated)

While this prototype utilizes simulated components, the architecture is designed to showcase how a real SchoolDaddy could leverage Databricks AI features:

*   **MLflow Tracking:** (Simulated in `MLflowTracker` class)
    *   Track experiments, log parameters, metrics (e.g., risk scores), and potentially model artifacts for student analysis runs.
    *   Provides a centralized platform for experiment management and reproducibility.

*   **Model Registry:** (Simulated in `ModelRegistryManager` class)
    *   Manage the lifecycle of AI models (e.g., academic risk predictor, wellbeing assessor).
    *   Version control, stage transitions (Staging, Production), and serve models consistently.

*   **Feature Store:** (Simulated in `FeatureStoreManager` class)
    *   Centralize and manage curated features (e.g., aggregated academic trends, wellbeing metrics).
    *   Ensure consistency between training and inference data, improving model reliability.

*   **Causal ML:** (Simulated in `CausalInferenceEngine` class)
    *   Estimate the causal impact of different factors (e.g., sleep, study habits) on outcomes (e.g., grades).
    *   Inform more effective intervention strategies by identifying root causes.

*   **Foundation Models / LLMs:** (Represented by the potential Groq integration in `AgenticQueryProcessor` and the structure of `DatabricksAIAgent`)
    *   Utilize large language models (LLMs) for natural language query understanding and response generation in the "Ask the Agent" feature.
    *   Potentially use Databricks' own Foundation Models for tasks like text summarization of student notes or generating personalized communication.

*   **AI Agent Framework:** (Conceptualized in `DatabricksHokieWellAgent` and `AgenticQueryProcessor`)
    *   Orchestrate different AI tools and data sources to perform complex tasks like holistic student analysis and intervention planning.
    *   Enable more intelligent and autonomous decision-making within the system.

**This project demonstrates the architectural pattern for building AI-powered applications on a platform like Databricks, leveraging its integrated MLOps and AI capabilities.**

#test for flexible querying

In [None]:
class AgenticQueryProcessor:
    def __init__(self, student_data_getter, analysis_agent):
        self.student_data_getter = student_data_getter
        self.analysis_agent = analysis_agent
        # In a real scenario, you might initialize an LLM or other agent components here

    def process(self, student_id, query):
        """
        Processes a user query using a simulated agentic approach.
        In a real implementation, this would involve:
        1.  Understanding the query (using an LLM or NLP)
        2.  Determining which data sources and analysis tools are needed
        3.  Calling the appropriate analysis functions
        4.  Synthesizing insights and recommendations
        5.  Generating a natural language response
        """
        print(f"Agentic processor received query for {student_id}: '{query}'")

        student_data = self.student_data_getter(student_id)
        analysis_result = self.analysis_agent.run_holistic_analysis(student_id, student_data)

        response_parts = []

        # Simulate agentic reasoning and response generation
        query_lower = query.lower()

        if "academic" in query_lower or "grade" in query_lower or "performance" in query_lower or "study" in query_lower:
            academic_analysis = analysis_result.get('academic_analysis', {})
            if academic_analysis:
                response_parts.append(f"Regarding academic performance for {student_id}:")
                response_parts.append(f"- Risk Score: {academic_analysis.get('risk_score', 'N/A'):.2f}")
                response_parts.append(f"- Trend: {academic_analysis.get('trend_direction', 'N/A').title()}")
                response_parts.append("- Key Insights:")
                for insight in academic_analysis.get('key_insights', []):
                    response_parts.append(f"  - {insight}")
            else:
                response_parts.append(f"Could not retrieve academic analysis for {student_id}.")


        if "wellbeing" in query_lower or "sleep" in query_lower or "stress" in query_lower or "health" in query_lower:
             wellbeing_assessment = analysis_result.get('wellbeing_assessment', {})
             if wellbeing_assessment:
                 response_parts.append(f"Regarding wellbeing for {student_id}:")
                 response_parts.append(f"- Overall Wellbeing Score: {wellbeing_assessment.get('overall_score', 'N/A'):.2f}")
                 sleep_health = wellbeing_assessment.get('dimensions', {}).get('sleep_health', {})
                 response_parts.append(f"- Sleep Health Score: {sleep_health.get('score', 'N/A'):.2f}, Trend: {sleep_health.get('trend', 'N/A')}")
                 stress_levels = wellbeing_assessment.get('dimensions', {}).get('stress_levels', {})
                 response_parts.append(f"- Stress Levels Score: {stress_levels.get('score', 'N/A'):.2f}, Trend: {stress_levels.get('trend', 'N/A')}")
             else:
                 response_parts.append(f"Could not retrieve wellbeing assessment for {student_id}.")


        if "recommendations" in query_lower or "actions" in query_lower or "support" in query_lower or "resource" in query_lower:
            intervention_plan = analysis_result.get('intervention_plan', {})
            if intervention_plan:
                response_parts.append(f"Recommended actions and resources for {student_id}:")
                response_parts.append(f"- Overall Risk Level: {intervention_plan.get('risk_level', 'N/A').upper()}")
                response_parts.append("- Planned Actions:")
                for action in intervention_plan.get('planned_actions', []):
                     response_parts.append(f"  - Type: {action.get('type', 'N/A').replace('_', ' ').title()}")
                     response_parts.append(f"    Description: {action.get('description', 'N/A')}")
                     response_parts.append(f"    Confidence: {action.get('confidence', 0):.0%}")
            else:
                 response_parts.append(f"Could not retrieve intervention plan for {student_id}.")


        if "pattern" in query_lower or "why" in query_lower or "cause" in query_lower or "factors" in query_lower:
             causal_analysis = analysis_result.get('causal_analysis', {})
             if causal_analysis:
                 response_parts.append(f"Likely causal factors for observed patterns in {student_id}:")
                 for factor in causal_analysis.get('causal_factors', []):
                      effect = causal_analysis.get('effect_sizes', {}).get(factor, 0)
                      response_parts.append(f"- {factor.replace('_', ' ').title()} (estimated effect size: {effect:.3f})")
             else:
                 response_parts.append(f"Could not retrieve causal analysis for {student_id}.")


        if not response_parts:
             response = f"I am processing your query about {student_id}. Please run the main analysis for detailed insights or try a different question."
        else:
            response = "\n\n".join(response_parts)

        return response

# This class will be instantiated and used in the Gradio app
# processor = AgenticQueryProcessor(app_instance.get_student_data, app_instance.agent)
# Then call processor.process(student_id, query)

#Merged interface

final02

In [None]:
import os
import requests
import numpy as np
import pandas as pd
import json
import gradio as gr
import plotly.graph_objects as go
from datetime import datetime

# Databricks Model Scoring Functions
def create_tf_serving_json(data):
    return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
    # NOTE: Replace with your actual Databricks Model Serving Endpoint URL and Token
    # url = 'YOUR_DATABRICKS_MODEL_SERVING_ENDPOINT_URL'
    # token = os.environ.get("YOUR_DATABRICKS_TOKEN_SECRET_NAME") # Get token from environment variable or Colab Secrets
    # headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}

    # Placeholder for actual API call
    print("Simulating call to Databricks Model Serving Endpoint...")
    # In a real scenario, you would make the POST request here
    # response = requests.request(method='POST', headers=headers, url=url, data=data_json)
    # if response.status_code != 200:
    #     raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    # return response.json()

    # Simulated response based on input features
    # Assuming 'academic_risk_score' is one of the input features
    simulated_risk_score = dataset['academic_risk_score'].iloc[0] if not dataset.empty and 'academic_risk_score' in dataset.columns else 0.5
    simulated_prediction = min(1.0, max(0.0, simulated_risk_score + np.random.normal(0, 0.1))) # Add some noise

    simulated_result = {'predictions': [simulated_prediction]}
    print(f"Simulated model response: {simulated_result}")
    return simulated_result


class DatabricksAIAgent:
    """Use intelligent response generation with Databricks integration"""

    def __init__(self):
        self.databricks_enabled = True # Placeholder for actual Databricks connection status
        print("✅ Databricks AI Agent initialized")

    def get_enhanced_response(self, user_query, context_data):
        """Get enhanced response using intelligent pattern matching"""
        try:
            # In a real Databricks environment, this could use:
            # - Databricks Foundation Models API
            # - Custom LLM served via MLflow
            # - Agentic orchestration logic accessing multiple tools

            # For this simulation, we'll use the intelligent response generation logic
            return self._generate_intelligent_response(user_query, context_data)
        except Exception as e:
            print(f"❌ Databricks LLM failed: {e}")
            return self._get_smart_fallback(user_query, context_data)

    def _generate_intelligent_response(self, user_query, context_data):
        """Generate intelligent, context-aware responses without external API"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        query_lower = user_query.lower()

        # Study-related questions
        if any(word in query_lower for word in ['study', 'studying', 'homework', 'assignments', 'learn']):
            return self._get_study_analysis(user_query, context_data)

        # Sleep-related questions
        elif any(word in query_lower for word in ['sleep', 'rest', 'tired', 'fatigue', 'energy']):
            return self._get_sleep_analysis(user_query, context_data)

        # Stress-related questions
        elif any(word in query_lower for word in ['stress', 'overwhelm', 'pressure', 'anxiety', 'worry']):
            return self._get_stress_analysis(user_query, context_data)

        # Social-related questions
        elif any(word in query_lower for word in ['social', 'friends', 'lonely', 'isolated', 'community']):
            return self._get_social_analysis(user_query, context_data)

        # Academic performance
        elif any(word in query_lower for word in ['grade', 'performance', 'academic', 'gpa', 'score']):
            return self._get_academic_analysis(user_query, context_data)

        # Causal analysis
        elif any(word in query_lower for word in ['why', 'cause', 'reason', 'because', 'factor']):
            return self._get_causal_analysis(user_query, context_data)

        # General health/wellbeing
        elif any(word in query_lower for word in ['health', 'wellbeing', 'wellness', 'feel', 'mood']):
            return self._get_wellbeing_analysis(user_query, context_data)

        # Resource recommendations
        elif any(word in query_lower for word in ['resource', 'help', 'support', 'recommend', 'suggest']):
            return self._get_resource_analysis(user_query, context_data)

        # Default intelligent response
        else:
            return self._get_general_analysis(user_query, context_data)

    def _get_study_analysis(self, user_query, context_data):
        """Generate detailed study analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        study_insights = {
            'high_risk': "The data indicates significant challenges in study habits. There's evidence of cramming, inconsistent study schedules, and potential burnout affecting learning efficiency.",
            'medium_risk': "Study patterns show some concerning trends, including irregular study sessions and possible time management issues that could be optimized.",
            'low_risk': "Study habits appear generally healthy with minor areas for improvement in consistency and technique."
        }

        risk_level = 'high_risk' if risk_score > 0.7 else 'medium_risk' if risk_score > 0.4 else 'low_risk'

        return f"""
**📚 Detailed Study Pattern Analysis for {student_id}**

**Current Assessment:**
{study_insights[risk_level]}

**Specific Study Challenges Identified:**
- **Academic Performance Trend**: {context_data.get('trend', 'stable')}
- **Primary Factors Affecting Studies**: {', '.join(factors)}
- **Risk Level Impact**: {risk_score:.2f} ({(risk_score*100):.0f}% concern level)

**Study Pattern Breakdown:**
1. **Consistency**: The data suggests {['highly irregular', 'somewhat irregular', 'relatively consistent'][min(2, int(risk_score//0.3))]} study patterns
2. **Efficiency**: Learning efficiency appears to be {['significantly impacted', 'moderately affected', 'generally effective'][min(2, int(risk_score//0.3))]}
3. **Balance**: Study-life balance shows {['concerning imbalance', 'some imbalance', 'reasonable balance'][min(2, int(risk_score//0.3))]}

**Evidence-Based Recommendations:**
- Implement spaced repetition technique for better retention
- Establish consistent daily study blocks (2-3 hours with breaks)
- Utilize active recall methods instead of passive reading
- Schedule weekly review sessions to reinforce learning

**Immediate Action Steps:**
1. Visit the Academic Success Center for personalized study strategy
2. Download a study planning app to track and schedule sessions
3. Form a study group for accountability and collaborative learning

*This analysis is based on comprehensive academic data patterns and proven educational psychology principles.*
"""

    def _get_sleep_analysis(self, user_query, context_data):
        """Generate detailed sleep analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😴 Comprehensive Sleep Analysis for {student_id}**

**Sleep Health Assessment:**
The data indicates {['critical sleep deprivation requiring immediate attention', 'significant sleep issues', 'moderate sleep concerns', 'generally healthy sleep patterns'][min(3, int(risk_score//0.25))]}.

**Impact Analysis:**
- **Cognitive Function**: Sleep quality directly impacts {['memory consolidation, learning efficiency, and problem-solving abilities', 'academic performance and information retention'][int(risk_score > 0.5)]}
- **Academic Correlation**: Research shows sleep deprivation can reduce academic performance by up to 30%
- **Wellbeing Connection**: Poor sleep exacerbates stress and reduces coping capacity

**Identified Sleep Patterns:**
- Primary sleep-related factors: {', '.join([f for f in factors if 'sleep' in f.lower()] or ['Sleep duration and consistency'])}
- Risk level: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern)

**Recommended Sleep Interventions:**
1. **Sleep Schedule**: Establish consistent bedtime/wake-time (7-8 hours target)
2. **Environment Optimization**: Cool, dark, quiet sleeping environment
3. **Digital Hygiene**: No screens 1 hour before bedtime
4. **Relaxation Techniques**: Mindfulness, reading, or light stretching before sleep

**University Resources:**
- Sleep & Wellness Workshop (Tuesdays, 3-4 PM)
- Counseling Center sleep resources
- Peer wellness coaching program

*Analysis based on sleep science research and student wellness data patterns.*
"""

    def _get_stress_analysis(self, user_query, context_data):
        """Generate detailed stress analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😥 Stress and Wellbeing Analysis for {student_id}**

**Stress Level Assessment:**
Current data indicates {['critical stress levels requiring immediate attention', 'elevated stress levels needing proactive management', 'moderate stress with opportunities for improvement', 'generally manageable stress levels'][min(3, int(risk_score//0.25))]}.

**Stress Factor Analysis:**
- **Primary Stressors**: {', '.join(factors)}
- **Academic Pressure**: {risk_score:.2f} risk score suggests significant academic-related stress
- **Compounding Effects**: Stress appears to be affecting {['multiple areas of functioning', 'key academic performance indicators', 'overall wellbeing'][min(2, int(risk_score//0.3))]}

**Stress Impact Chain Identified:**
1. Academic demands → Sleep disruption → Reduced coping capacity → Further academic challenges
2. Social withdrawal → Increased perceived burden → Decreased motivation → Continued isolation

**Evidence-Based Stress Management:**
- **Immediate Relief**: 5-4-3-2-1 grounding technique, box breathing
- **Short-term Strategy**: Time blocking, priority matrix, saying 'no' to non-essentials
- **Long-term Resilience**: Regular exercise, social connection, mindfulness

**Proactive Support Recommendations:**
1. Schedule appointment with Counseling Center (confidential, professional support)
2. Attend stress management workshop (weekly sessions available)
3. Utilize campus mindfulness resources (guided meditations, yoga classes)

*This analysis integrates stress physiology research with student-specific data patterns.*
"""

    def _get_social_analysis(self, user_query, context_data):
        """Generate detailed social analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**👥 Social Connection Analysis for {student_id}**

**Social Wellbeing Assessment:**
The data suggests {['significant social isolation requiring intervention', 'notable social connection challenges', 'moderate opportunities for social engagement', 'generally healthy social patterns'][min(3, int(risk_score//0.25))]}.

**Social Factor Analysis:**
- **Isolation Indicators**: {', '.join([f for f in factors if 'social' in f.lower()] or ['Campus engagement metrics'])}
- **Academic Impact**: Social connection correlates with {['persistence, motivation, and academic success', 'overall student satisfaction and performance'][int(risk_score > 0.5)]}
- **Risk Level**: {risk_score:.2f} indicates {['urgent need for social support', 'important opportunity for connection building', 'moderate focus on social wellness'][min(2, int(risk_score//0.3))]}

**Connection-Building Strategies:**
1. **Structured Social Opportunities**: Club meetings, study groups, campus events
2. **Low-Pressure Interactions**: Coffee chats, interest-based activities
3. **Support Systems**: Peer mentoring, faculty office hours, counseling groups

**Recommended Campus Resources:**
- Student Organizations Fair (weekly)
- Peer Connection Program
- Community Engagement Office
- Cultural and Identity Centers

*Analysis based on social connection research and student engagement data.*
"""

    def _get_academic_analysis(self, user_query, context_data):
        """Generate detailed academic analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**📊 Comprehensive Academic Analysis for {student_id}**

**Academic Performance Overview:**
- **Current Risk Score**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern level)
- **Performance Trend**: {trend.title()} pattern identified
- **Primary Academic Factors**: {', '.join(factors)}

**Detailed Performance Insights:**
The academic data reveals {['significant challenges requiring immediate intervention', 'notable areas for improvement and support', 'moderate opportunities for academic enhancement'][min(2, int(risk_score//0.3))]}.

**Pattern Analysis:**
1. **Assignment Performance**: {['Concerning decline in recent submissions', 'Some variability in assignment quality', 'Generally consistent performance'][min(2, int(risk_score//0.3))]}
2. **Learning Progression**: {['Evidence of cumulative knowledge gaps', 'Some challenges with concept integration', 'Steady learning progression'][min(2, int(risk_score//0.3))]}
3. **Engagement Metrics**: {['Reduced course interaction and participation', 'Moderate engagement with fluctuations', 'Consistent academic engagement'][min(2, int(risk_score//0.3))]}

**Academic Support Strategy:**
- **Immediate**: Targeted tutoring for specific course challenges
- **Short-term**: Study skills workshop and time management training
- **Long-term**: Academic coaching for sustainable success habits

*Analysis based on comprehensive academic metrics and learning science principles.*
"""

    def _get_causal_analysis(self, user_query, context_data):
        """Generate detailed causal analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🔍 Causal Relationship Analysis for {student_id}**

**Root Cause Identification:**
Through comprehensive pattern analysis, I've identified several interconnected causal relationships:

**Primary Causal Chain:**
1. **Initial Trigger**: {factors[0] if factors else 'Academic demands'} creates initial pressure
2. **Secondary Effects**: This leads to {factors[1] if len(factors) > 1 else 'wellbeing challenges'}
3. **Compounding Impact**: These factors together affect {factors[2] if len(factors) > 2 else 'overall academic performance'}

**Interconnected Factors:**
- **Academic → Wellbeing**: Course pressure impacts sleep and stress levels
- **Wellbeing → Academic**: Poor sleep reduces learning capacity and motivation
- **Social → Academic**: Isolation decreases academic support and engagement
- **Environmental → All**: Campus engagement affects overall student experience

**Evidence-Based Intervention Points:**
Breaking the cycle at any point can create positive ripple effects. The most impactful intervention points appear to be:
1. Addressing {factors[0] if factors else 'the primary stressor'}
2. Implementing wellbeing supports to build resilience
3. Enhancing social connections for natural support systems

*This causal analysis uses pattern recognition and educational research to identify key leverage points.*
"""

    def _get_wellbeing_analysis(self, user_query, context_data):
        """Generate detailed wellbeing analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🌱 Comprehensive Wellbeing Analysis for {student_id}**

**Holistic Wellbeing Assessment:**
The data indicates {['significant wellbeing challenges requiring comprehensive support', 'notable wellbeing concerns needing proactive attention', 'moderate wellbeing with opportunities for enhancement', 'generally positive wellbeing patterns'][min(3, int(risk_score//0.25))]}.

**Wellbeing Dimension Analysis:**
- **Physical Wellbeing**: {['Concerning sleep and activity patterns', 'Some areas for physical health improvement', 'Generally healthy physical habits'][min(2, int(risk_score//0.3))]}
- **Emotional Wellbeing**: {['Elevated stress and emotional challenges', 'Moderate emotional fluctuations', 'Generally stable emotional patterns'][min(2, int(risk_score//0.3))]}
- **Social Wellbeing**: {['Significant social connection challenges', 'Moderate social engagement opportunities', 'Healthy social support systems'][min(2, int(risk_score//0.3))]}
- **Academic Wellbeing**: {['Academic pressures significantly impacting overall wellbeing', 'Some academic-stress interplay', 'Generally positive academic experience'][min(2, int(risk_score//0.3))]}

**Integrated Wellbeing Strategy:**
1. **Foundation**: Sleep, nutrition, and basic self-care
2. **Support Systems**: Social connections and professional resources
3. **Resilience Building**: Stress management and coping skills
4. **Thriving Skills**: Purpose, engagement, and personal growth

**Campus Wellbeing Ecosystem:**
- Counseling and Psychological Services
- Wellness Center programs and workshops
- Peer support networks
- Faculty and staff mentoring

*Analysis based on holistic wellbeing frameworks and student development research.*
"""

    def _get_resource_analysis(self, user_query, context_data):
        """Generate detailed resource analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🛟 Personalized Resource Analysis for {student_id}**

**Resource Matching Strategy:**
Based on the specific challenges identified, I've curated resources that directly address the root causes:

**Primary Resource Recommendations:**
{chr(10).join(['• ' + action for action in actions])}

**Resource Effectiveness Analysis:**
- **Targeted Support**: Each resource addresses specific factors: {', '.join(factors)}
- **Evidence-Based**: These interventions have proven effective for similar student profiles
- **Accessibility**: All resources are freely available through university services

**Implementation Timeline:**
1. **Immediate (This Week)**: {actions[0] if actions else 'Academic consultation'}
2. **Short-term (2-4 Weeks)**: Regular support sessions and skill building
3. **Ongoing**: Continuous monitoring and adjustment of support strategies

**Expected Outcomes:**
- 30-50% improvement in identified challenge areas within 4-6 weeks
- Enhanced coping skills and resilience building
- Sustainable academic and personal success habits

*Resource recommendations based on effectiveness research and student success data.*
"""

    def _get_general_analysis(self, user_query, context_data):
        """Generate general intelligent analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**🤖 Intelligent Analysis for {student_id}**

**Comprehensive Student Profile Analysis:**

I understand you're asking about "{user_query}". Based on the comprehensive data analysis, here's my assessment:

**Current Status Overview:**
- **Risk Level**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern)
- **Primary Factors**: {', '.join(factors)}
- **Trend Direction**: {trend.title()} pattern
- **Overall Outlook**: {['Requires immediate proactive support', 'Would benefit from targeted interventions', 'Shows generally positive patterns with minor enhancements needed'][min(2, int(risk_score//0.3))]}

**Detailed Insights:**
The data reveals interconnected patterns where {factors[0] if factors else 'academic pressures'} appear to be influencing {factors[1] if len(factors) > 1 else 'overall wellbeing'}. This creates a cycle that affects multiple areas of student experience.

**Evidence-Based Perspective:**
Research indicates that addressing these challenges through {actions[0] if actions else 'targeted support'} can break negative cycles and create positive momentum. The university's support systems are specifically designed to help with these types of situations.

**Recommended Approach:**
1. Start with the most impactful intervention: {actions[0] if actions else 'academic support'}
2. Monitor progress through regular check-ins
3. Adjust support strategies based on response and feedback

**Next Steps:**
I recommend discussing these findings with {student_id} and collaboratively developing an action plan that feels manageable and supportive.

*This analysis integrates educational psychology, student development theory, and pattern recognition from comprehensive data.*
"""

    def _get_smart_fallback(self, user_query, context_data):
        """Smart fallback that's actually intelligent"""
        return self._generate_intelligent_response(user_query, context_data)


class DatabricksModelAgent:
    """Agent that integrates with Databricks model endpoint"""

    def __init__(self):
        self.agent_id = "Databricks_Model_Agent"
        print("✅ Databricks Model Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Run analysis using Databricks model endpoint"""
        try:
            # Prepare data for Databricks model
            features = self._prepare_features(student_id, data_sources)

            # Score model with Databricks endpoint
            model_result = score_model(features)

            return self._format_model_response(model_result, student_id)

        except Exception as e:
            print(f"❌ Databricks model failed: {e}")
            # Fallback to simulated agent
            return self._get_simulated_response(student_id)

    def _prepare_features(self, student_id, data_sources):
        """Prepare features for Databricks model"""
        # Extract relevant features from data sources
        academic_data = data_sources.get('academic', pd.DataFrame())
        wellbeing_data = data_sources.get('wellbeing', pd.DataFrame())
        environmental_data = data_sources.get('environmental', pd.DataFrame())

        # Create feature vector (adjust based on your model's expected input)
        # This is a placeholder - you would extract meaningful features here
        features = {
            'student_id': [student_id],
            'academic_risk_score': [self._calculate_academic_risk(academic_data)], # Example feature
            'wellbeing_score': [self._calculate_wellbeing_score(wellbeing_data)],   # Example feature
            'environmental_factors': [self._assess_environmental_factors(environmental_data)] # Example feature
            # Add more relevant features based on your Databricks model
        }

        return pd.DataFrame(features)

    def _calculate_academic_risk(self, academic_data):
        """Calculate a simple academic risk score for simulation/feature prep"""
        if academic_data.empty:
            return 0.5
        # Simple logic: lower average grade = higher risk
        avg_grade = academic_data['grade'].mean() if not academic_data['grade'].empty else 75
        risk = 1.0 - (avg_grade / 100.0)
        return min(1.0, max(0.0, risk + np.random.normal(0, 0.05))) # Add some noise

    def _calculate_wellbeing_score(self, wellbeing_data):
        """Calculate a simple wellbeing score for simulation/feature prep"""
        if wellbeing_data.empty:
            return 0.5
        # Simple logic: lower wellbeing score = lower score (higher risk)
        avg_wellbeing = wellbeing_data['wellbeing_score'].mean() if not wellbeing_data['wellbeing_score'].empty else 4.0
        # Assuming wellbeing_score is on a scale (e.g., 1-5)
        score = avg_wellbeing / 5.0 # Normalize to 0-1
        return min(1.0, max(0.0, score + np.random.normal(0, 0.05))) # Add some noise

    def _assess_environmental_factors(self, environmental_data):
        """Assess a simple environmental factor score for simulation/feature prep"""
        if environmental_data.empty:
            return 0.5
        # Simple logic: lower campus engagement = higher risk
        avg_engagement = environmental_data['campus_engagement_score'].mean() if not environmental_data['campus_engagement_score'].empty else 1.0
        # Assuming engagement_score is on a scale (e.g., 0-2)
        score = avg_engagement / 2.0 # Normalize to 0-1
        return min(1.0, max(0.0, 1.0 - score + np.random.normal(0, 0.05))) # Higher risk for lower engagement


    def _format_model_response(self, model_result, student_id):
        """Format Databricks model response to match expected structure"""
        # Extract predictions from model result (adjust based on your model's output format)
        # Assuming the model returns a 'predictions' key with a list of scores
        risk_score = model_result.get('predictions', [0.5])[0] if isinstance(model_result, dict) and 'predictions' in model_result and model_result['predictions'] else 0.5

        # Map to expected analysis structure
        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.85,
                "key_insights": [
                    f"Model predicted risk score: {risk_score:.2f}",
                    "Analysis powered by Databricks ML model",
                    "Prediction based on multiple student features"
                ],
                "model_version": "databricks_endpoint"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1), # Simulate wellbeing inversely related to risk
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                # Simulate causal factors based on risk score
                "causal_factors": ["academic_pressure", "sleep_issues", "time_management"] if risk_score > 0.5 else ["minor_adjustments_needed"],
                "effect_sizes": {"academic_pressure": risk_score * 0.6, "sleep_issues": risk_score * 0.3, "time_management": risk_score * 0.1} if risk_score > 0.5 else {"minor_adjustments_needed": 0.4}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "ai_recommendation",
                        "description": "Personalized intervention based on model analysis",
                        "confidence": risk_score
                    },
                    {
                        "type": "monitoring",
                        "description": "Continuous assessment using AI models",
                        "confidence": 0.9
                    }
                ] if risk_score > 0.4 else [
                     {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_model",
                "serving_endpoint",
                "real_time_scoring"
            ]
        }


    def _get_simulated_response(self, student_id):
        """Fallback simulated response"""
        # This uses the original SimulatedDatabricksAgent as a fallback
        simulated_agent = SimulatedDatabricksAgent()
        # Call with empty data_sources as the fallback agent doesn't need real data here
        return simulated_agent.run_holistic_analysis(student_id, {})


# Keep the original SimulatedDatabricksAgent as a potential fallback or alternative
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"
        print("✅ Simulated Databricks Agent initialized")


    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # This version simulates results based on student_id
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct_simulated"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors} if factors else {}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }


class GradioHokieWellApp:
    def __init__(self):
        # Initialize both simulated and potentially real agents
        self.simulated_agent = SimulatedDatabricksAgent()
        self.databricks_ai_agent = DatabricksAIAgent() # Agent for NL queries
        self.databricks_model_agent = DatabricksModelAgent() # Agent for structured analysis via model endpoint

        # Decide which structured analysis agent to use
        self.structured_analysis_agent = self.databricks_model_agent # Use model agent first
        # Optionally add logic to fallback to self.simulated_agent if model agent fails init


        self.load_data()


    def load_data(self):
        """Load the synthetic dataset"""
        try:
            # Check if files exist before loading
            if os.path.exists('students.csv') and os.path.exists('academic_data.csv') and \
               os.path.exists('wellbeing_data.csv') and os.path.exists('environmental_data.csv') and \
               os.path.exists('resources.csv'):
                self.students = pd.read_csv('students.csv')
                self.academic = pd.read_csv('academic_data.csv')
                self.wellbeing = pd.read_csv('wellbeing_data.csv')
                self.environmental = pd.read_csv('environmental_data.csv')
                self.resources = pd.read_csv('resources.csv')
                print("✅ Data loaded successfully from CSV files")
            else:
                 print("⚠️ CSV files not found. Creating minimal data.")
                 self.create_minimal_data()

        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if any error occurs during loading
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing or loading fails"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])
        # Add minimal data for academic, wellbeing, and environmental dataframes
        self.academic = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'assignment_id': ['A001', 'A002', 'A003', 'A004'],
            'course_id': ['CS101', 'CS101', 'PSYC101', 'PSYC101'],
            'course_name': ['Intro to CS', 'Intro to CS', 'Intro to Psych', 'Intro to Psych'],
            'assignment_name': ['Assignment 1', 'Assignment 2', 'Assignment 1', 'Assignment 2'],
            'due_date': ['2024-01-20', '2024-02-05', '2024-01-20', '2024-02-05'],
            'submission_date': ['2024-01-20', '2024-02-06', '2024-01-21', '2024-02-08'],
            'grade': [85, 80, 70, 65],
            'submission_delay_days': [0, 1, 1, 3],
            'difficulty_level': [0.5, 0.5, 0.4, 0.4]
        })
        self.wellbeing = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'sleep_duration': [7.5, 7.0, 6.0, 5.5],
            'step_count': [8000, 8500, 5000, 4500],
            'wellbeing_score': [4.0, 3.8, 3.0, 2.8],
            'week_of_semester': [1, 1, 1, 1],
            'day_type': ['Weekday', 'Weekday', 'Weekday', 'Weekday']
        })
        self.environmental = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'meals_on_campus': [2.0, 1.0, 1.0, 1.0],
            'library_hours': [1.0, 1.5, 0.5, 0.2],
            'gym_visit': [1, 0, 0, 0],
            'campus_engagement_score': [0.7, 0.6, 0.5, 0.4]
        })
        print("✅ Minimal data created.")


    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student using the selected structured analysis agent"""
        student_data = self.get_student_data(student_id)
        return self.structured_analysis_agent.run_holistic_analysis(student_id, student_data)

    def handle_user_query(self, user_query, student_selection):
        """Handle natural language queries with intelligent responses"""
        if not user_query.strip():
            return "Please enter a question about the student's analysis."

        student_id = student_selection.split(' - ')[0]
        # Run the full analysis first to get the latest data and insights
        analysis_result = self.analyze_student(student_id)


        # Prepare context for AI agent
        academic = analysis_result['academic_analysis']
        causal = analysis_result['causal_analysis']
        plan = analysis_result['intervention_plan']

        context_data = {
            'risk_score': academic.get('risk_score', 0.5),
            'trend': academic.get('trend_direction', 'stable'),
            'factors': causal.get('causal_factors', []),
            'actions': [action.get('description', '') for action in plan.get('planned_actions', [])],
            'risk_level': plan.get('risk_level', 'low'),
            'student_id': student_id,
            'raw_analysis_result': analysis_result # Pass full result for potential detailed lookup
        }

        # Get ENHANCED response from AI agent
        # Use the DatabricksAIAgent for natural language query processing
        enhanced_response = self.databricks_ai_agent.get_enhanced_response(user_query, context_data)

        return f"""
        <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
            <h4 style='color: black; margin-top: 0;'>💬 AI Response to: "{user_query}"</h4>
            <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                {enhanced_response}
            </div>
            <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                <small style='color: #666;'>
                    <strong>Analysis Context:</strong> Student {student_id} | Risk Score: {academic.get('risk_score', 0.5):.2f} | Primary Factors: {', '.join(causal.get('causal_factors', []))}
                </small>
            </div>
        </div>
        """

    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20, 'color': 'black'}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=50, b=20),
            font=dict(color='black')
        )
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic') or self.academic.empty:
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id].copy() # Use .copy() to avoid SettingWithCopyWarning
        if student_academic.empty:
            return None

        # Ensure 'due_date' is datetime type for sorting and plotting
        student_academic['due_date'] = pd.to_datetime(student_academic['due_date'], errors='coerce')
        student_academic = student_academic.dropna(subset=['due_date']).sort_values('due_date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing') or self.wellbeing.empty:
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id].copy() # Use .copy()
        if student_wellbeing.empty:
            return None

        # Ensure 'date' is datetime type
        student_wellbeing['date'] = pd.to_datetime(student_wellbeing['date'], errors='coerce')
        student_wellbeing = student_wellbeing.dropna(subset=['date']).sort_values('date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig


    def format_analysis_results(self, analysis_result):
        """Format analysis results for display with BLACK TEXT"""
        # Ensure keys exist with default empty dicts
        academic = analysis_result.get('academic_analysis', {})
        causal = analysis_result.get('causal_analysis', {})
        plan = analysis_result.get('intervention_plan', {})

        # Academic insights - ALL BLACK TEXT
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>📚 Academic Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Score:</strong> <span style='color: black;'>{academic.get('risk_score', 0.0):.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Trend:</strong> <span style='color: black;'>{academic.get('trend_direction', 'N/A').title()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic.get('key_insights', []):
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis - ALL BLACK TEXT
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🔍 Root Cause Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal.get('causal_factors', []):
            effect = causal.get('effect_sizes', {}).get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan - ALL BLACK TEXT
        risk_level = plan.get('risk_level', 'low')
        risk_level_color = "red" if risk_level == "high" else "orange" if risk_level == "medium" else "green"
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🎯 Intervention Plan</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Level:</strong> <span style='color: {risk_level_color}; font-weight: bold;'>{risk_level.upper()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan.get('planned_actions', []):
            plan_html += f"""
            <li style='color: black; margin-bottom: 10px;'>
                <strong style='color: black;'>{action.get('type', 'N/A').replace('_', ' ').title()}:</strong><br>
                <span style='color: black;'>{action.get('description', 'N/A')}</span><br>
                <em style='color: black;'>Confidence: {action.get('confidence', 0):.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        # Use the structured analysis agent to get the latest analysis for factors
        analysis = self.analyze_student(student_id)
        risk_factors = analysis.get('causal_analysis', {}).get('causal_factors', [])

        recommendations = []
        # Match recommendations based on identified factors
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors or 'academic_pressure' in risk_factors or 'time_management' in risk_factors:
            recommendations.append({
                'resource': 'Academic Success Center / Tutoring',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations / Peer Programs',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })
        if 'stress' in risk_factors or 'anxiety' in risk_factors:
             recommendations.append({
                'resource': 'Counseling Services / Stress Management Workshop',
                'match': 0.90,
                'reason': 'Provides coping strategies and support'
            })


        # Default recommendations if no specific factors match
        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def format_resource_recommendations_html(self, student_id):
        """Format resource recommendations as HTML with BLACK TEXT"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🛟 Personalized Resource Recommendations</h3>"

        if not recommendations:
            html += "<p style='color: black;'>No specific recommendations available based on current analysis. General support resources are always available.</p>"
        else:
            for rec in recommendations:
                html += f"""
                <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50; color: black;'>
                    <h4 style='color: black;'>{rec.get('resource', 'N/A')} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec.get('match', 0):.0%} match</span></h4>
                    <p style='color: black;'>{rec.get('reason', 'No reason provided.')}</p>
                </div>
                """

        html += "</div>"
        return html


    def create_interface(self):
        """Create the Gradio interface with AGENT RESPONSE tab"""
        with gr.Blocks(theme=gr.themes.Soft(), title="HokieWell Navigator", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 HokieWell Navigator
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith",
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("🤖 Agent Response"):
                        gr.Markdown("### 💬 Ask Anything About the Student")
                        gr.Markdown("""
                        **Example questions to try:**
                        - "How is he studying?"
                        - "Explain the sleep issues"
                        - "What causes the stress?"
                        - "Why are grades declining?"
                        - "What interventions would help?"
                        """)

                        user_query = gr.Textbox(
                            label="Enter your question about the student:",
                            placeholder="Type your question here...",
                            lines=3,
                            elem_classes=["black-text"]
                        )

                        ask_btn = gr.Button("🎯 Get AI Analysis", variant="primary")
                        agent_response = gr.HTML(label="AI Agent Response", elem_classes=["black-text"])

                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output] # Removed agent_info output
            )

            ask_btn.click(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            user_query.submit( # Allow submitting query by pressing Enter
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )


            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result.get('academic_analysis', {}).get('risk_score', 0.5))

        # Text outputs
        academic_analysis = analysis_result.get('academic_analysis', {})
        causal_analysis = analysis_result.get('causal_analysis', {})

        risk_score_val = academic_analysis.get('risk_score', 0.5)
        risk_score = f"{risk_score_val:.2f}"
        trend_direction = academic_analysis.get('trend_direction', 'N/A').title()
        primary_factor = causal_analysis.get('causal_factors', [None])[0]
        primary_factor = primary_factor.replace('_', ' ').title() if primary_factor else "No significant factors"


        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations_html(student_id)

        # Removed agent_info from outputs

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html

    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        # This will also trigger the data loading and initial chart creation
        return self.update_student_charts(student_selection)

# Keep the original SimulatedDatabricksAgent as a potential fallback or alternative
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"
        print("✅ Simulated Databricks Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # This version simulates results based on student_id
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct_simulated"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors} if factors else {}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

def launch_gradio_app():
    """Launch the Gradio interface"""
    app = GradioHokieWellApp()
    demo = app.create_interface()

    demo.launch(
        server_name="0.0.0.0",
        server_port=7867,
        share=True,
        debug=True,
        show_error=True
    )

if __name__ == "__main__":
    launch_gradio_app()

#Isolated AI chatbot

In [None]:
import os
os.environ["DATABRICKS_TOKEN"] = "dapi5a64e232b23cc59aa271f21741f535a2"

#Schooldaddy chatbot - general

final004..

In [None]:
import os
import requests
import gradio as gr

DATABRICKS_URL = "https://dbc-c6db0812-b5cc.cloud.databricks.com"
ENDPOINT = "Agent-op"
API_URL = f"{DATABRICKS_URL}/serving-endpoints/{ENDPOINT}/invocations"
TOKEN = os.environ.get("DATABRICKS_TOKEN", "dapi5a64e232b23cc59aa271f21741f535a2")

# Store conversation history
conversation_history = []

def query_agent_chat(user_message):
    """
    Send the user message to Databricks Agent and return the assistant's response.
    """
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }

    # Include conversation context
    input_payload = [{"role": "user", "content": user_message}]

    payload = {
        "input": input_payload,
        "max_output_tokens": 200,
        "temperature": 0.7
    }

    r = requests.post(API_URL, headers=headers, json=payload)
    if r.status_code != 200:
        return f"❌ Error {r.status_code}: {r.text}", conversation_history

    try:
        data = r.json()
        assistant_text = "Sorry, I couldn't get a response."

        # Extract the assistant's final message
        for item in data.get("output", []):
            if item.get("type") == "message":
                for part in item.get("content", []):
                    if part.get("type") == "output_text":
                        assistant_text = part.get("text").strip()

        # Append to conversation history
        conversation_history.append(("User", user_message))
        conversation_history.append(("Agent", assistant_text))

        return "", conversation_history

    except Exception as e:
        return f"Parsing error: {e}", conversation_history


# --- Gradio Chat Interface ---
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 SchoolDaddy")

    chat = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
    send_btn = gr.Button("Send")

    # When user sends a message
    send_btn.click(query_agent_chat, inputs=msg, outputs=[msg, chat])
    msg.submit(query_agent_chat, inputs=msg, outputs=[msg, chat])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7877, share=True)

In [None]:
import os
import requests
import gradio as gr

DATABRICKS_URL = "https://dbc-c6db0812-b5cc.cloud.databricks.com"
ENDPOINT = "Agent-op"
API_URL = f"{DATABRICKS_URL}/serving-endpoints/{ENDPOINT}/invocations"
TOKEN = os.environ.get("DATABRICKS_TOKEN", "dapi5a64e232b23cc59aa271f21741f535a2")

# Store conversation history
conversation_history = []

def query_agent_chat(user_message):
    """
    Send the user message to Databricks Agent and return the assistant's response.
    """
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }

    # Include conversation context
    input_payload = [{"role": "user", "content": user_message}]

    payload = {
        "input": input_payload,
        "max_output_tokens": 200,
        "temperature": 0.7
    }

    r = requests.post(API_URL, headers=headers, json=payload)
    if r.status_code != 200:
        return f"❌ Error {r.status_code}: {r.text}", conversation_history

    try:
        data = r.json()
        assistant_text = "Sorry, I couldn't get a response."

        # Extract the assistant's final message
        for item in data.get("output", []):
            if item.get("type") == "message":
                for part in item.get("content", []):
                    if part.get("type") == "output_text":
                        assistant_text = part.get("text").strip()

        # Append to conversation history
        conversation_history.append(("User", user_message))
        conversation_history.append(("Agent", assistant_text))

        return "", conversation_history

    except Exception as e:
        return f"Parsing error: {e}", conversation_history


# --- Gradio Chat Interface with SchoolDaddy Theme ---
with gr.Blocks(theme=gr.themes.Soft(), title="SchoolDaddy Chat", css="""
    .gradio-container {color: black !important;}
    h2, h2, h3, h4 {color: #861F41 !important;}
""") as demo:

    gr.Markdown(
        """
        # 🎓 SchoolDaddy
        ### *AI-Powered Holistic Perfomance Agent*
        **Powered by Databricks Agent**
        """
    )

    chat = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
    send_btn = gr.Button("Send", variant="primary")

    # When user sends a message
    send_btn.click(query_agent_chat, inputs=msg, outputs=[msg, chat])
    msg.submit(query_agent_chat, inputs=msg, outputs=[msg, chat])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7880, share=True)

### Next Steps: Refine Query Processing and Test

Now that the Gradio interface has a text box for user queries and a basic `AgenticQueryProcessor` is in place, the next steps are to enhance the `process` method in the `AgenticQueryProcessor` class (in cell `OVvHSQiAhRBI`) and thoroughly test the functionality.

1.  **Enhance `AgenticQueryProcessor.process`:**
    *   Improve the logic to better understand the user's intent from the query. This could involve more sophisticated keyword matching, or integrating a small language model (LLM) to parse the query.
    *   Map the identified intent to relevant data analysis functions or insights from the `run_holistic_analysis` output.
    *   Generate more nuanced and context-aware responses.

2.  **Test the Query Feature:**
    *   Run the Gradio application (by executing the cell containing `launch_gradio_app`).
    *   Select a student.
    *   Enter various types of questions in the "Ask the Agent" tab (e.g., "How is Jordan doing academically?", "What resources are available for Alex's stress?", "Tell me about Casey's wellbeing.").
    *   Observe the responses in the "Agent Response" box.
3.  **Refine Based on Testing:**
    *   If the responses are not accurate or helpful, revisit the `AgenticQueryProcessor.process` method to improve its logic.
    *   Consider adding more specific conditions or using more advanced NLP techniques if needed.

By iteratively enhancing the `process` function and testing the Gradio interface, you can build a more robust and intelligent free-form query capability for the SchoolDaddy.

## Dataset Description

The HokieWell Navigator project utilizes a **synthetic dataset** generated specifically for this prototype. This dataset simulates realistic student data and includes several interconnected components:

1.  **Student Profiles:** Contains basic information about each simulated student, such as:
    *   `student_id`
    *   `name`
    *   `major`
    *   `year`
    *   `academic_risk_base` (simulated base risk level)
    *   `wellbeing_risk_base` (simulated base risk level)

2.  **Academic Data:** Simulates academic performance and engagement, including:
    *   `student_id`
    *   `assignment_id`, `course_id`, `course_name`, `assignment_name`
    *   `due_date`, `submission_date`
    *   `grade` (simulated grade for each assignment)
    *   `submission_delay_days`
    *   `difficulty_level` (simulated course difficulty)

3.  **Well-being Data:** Tracks daily well-being metrics:
    *   `student_id`
    *   `date`
    *   `sleep_duration` (simulated hours of sleep)
    *   `step_count` (simulated steps)
    *   `wellbeing_score` (a general simulated well-being score)
    *   `week_of_semester`
    *   `day_type` (Weekday/Weekend)

4.  **Environmental Data:** Simulates campus engagement and environmental factors:
    *   `student_id`
    *   `date`
    *   `meals_on_campus`
    *   `library_hours`
    *   `gym_visit`
    *   `campus_engagement_score`

5.  **Resources Database:** A static list of simulated university resources:
    *   `resource_id`
    *   `name`
    *   `type`
    *   `description`
    *   `department`
    *   `location`
    *   `contact`
    *   `keywords`

6.  **AI Interventions:** Sample records of simulated AI-triggered interventions and student responses.

This synthetic data is designed to exhibit patterns and correlations between academic, well-being, and environmental factors, allowing the simulated AI agent to identify potential risks and recommend interventions. It serves as a realistic proxy for real-world student data for the purpose of this prototype.

# Project Report: SchoolDaddy

This project, the "SchoolDaddy," is a prototype AI-powered system designed to proactively identify and support students who may be at academic or well-being risk. It simulates an end-to-end workflow, from synthetic data generation to a Gradio-based user interface, incorporating concepts of AI agents and potential Databricks integration.

Here's a breakdown of the key components with more technical details:

## 1. Synthetic Data Generation (Cell `uvd-HhMcZA5g`)

*   **Purpose:** To create a realistic, albeit simplified, dataset representing various facets of student life and potential risk factors.
*   **Technical Details:**
    *   Uses `pandas`, `numpy`, `datetime`, and `random` for data generation.
    *   A fixed 12-week semester is simulated using `datetime` and `timedelta`.
    *   **Student Profiles:** Stored in a pandas DataFrame (`students_df`). Includes base risk probabilities (`academic_risk_base`, `wellbeing_risk_base`) that influence subsequent data generation.
    *   **Academic Data:** Stored in `academic_df`. Generated by iterating through students and assigning a random subset of courses. Assignment due dates and submission dates are simulated with random variations and delays influenced by student risk profiles and a simulated "stress factor" that increases over the semester. Grades are calculated based on course difficulty, student base risk, and simulated performance decline due to stress.
    *   **Well-being Data:** Stored in `wellbeing_df`. Daily metrics (sleep duration, step count, wellbeing score) are generated for each student over the 12-week period. Base values are adjusted by student risk profiles, daily random modifiers, and a semester-long stress factor. Weekend effects are also simulated.
    *   **Environmental Data:** Stored in `environmental_df`. Daily campus engagement metrics (meals on campus, library hours, gym visits) are generated. Base values and trends are influenced by student risk profiles and the semester stress factor.
    *   **Resources Database:** Stored in `resources_df`. A static list of simulated university resources with attributes for matching.
    *   **AI Interventions:** Stored in `interventions_df`. Manually created sample records demonstrating potential AI-triggered interventions.
    *   All generated DataFrames are saved to CSV files using `df.to_csv()`.

## 2. AI Wellbeing Predictor (Simulated) (Cell `CbHWA2YrZU8P`)

*   **Purpose:** To simulate a machine learning approach for predicting academic risk.
*   **Technical Details:**
    *   Defines an `AIWellbeingPredictor` class.
    *   Uses `sklearn.ensemble.RandomForestClassifier` as the core model.
    *   `prepare_features`: Extracts aggregated features per student. Includes calculating the slope of grades over time (`np.polyfit`), standard deviation of sleep duration (`.std()`), mean campus engagement (`.mean()`), mean grade, mean sleep, mean library hours, and count of late submissions.
    *   `create_labels`: Creates a binary target variable (at-risk vs. not-at-risk) based on whether a student's average grade falls below a threshold (default 70).
    *   `train`: Splits data using `train_test_split`, scales features using `StandardScaler`, trains the `RandomForestClassifier`, and stores feature importances.
    *   `predict_risk`: Simulates prediction using the trained model.
    *   `explain_prediction`: Provides a basic, rule-based explanation of the prediction based on the most influential features (simulated interpretation of feature importance).
*   **Note:** This is a simplified simulation for demonstrating the concept of a predictive model; the actual model training and evaluation are basic.

## 3. Causal Inference Engine (Simulated) (Cell `cKl9HIVtZdxN`)

*   **Purpose:** To conceptually simulate identifying the root causes of student risk factors.
*   **Technical Details:**
    *   Defines a `CausalInferenceEngine` class with placeholder methods like `infer_root_cause`, `estimate_tutoring_effect`, etc.
    *   The `infer_root_cause` method returns a dictionary with simulated `likely_cause`, `recommended_intervention`, `expected_impact`, and `confidence` based on simple hardcoded rules or basic input checks, not actual causal modeling.
    *   The `estimate_..._effect` methods return simulated impact scores.
*   **Note:** This component is purely a conceptual simulation and does not perform rigorous causal inference.

## 4. Databricks Powered AI Agent (Simulated) (Cell `PZtPE-S1ZsB4`)

*   **Purpose:** To illustrate how a comprehensive AI agent might be structured and interact with components within a platform like Databricks.
*   **Technical Details:**
    *   Defines a `DatabricksHokieWellAgent` class.
    *   Includes placeholder classes (`ModelRegistryManager`, `FeatureStoreManager`, `MLflowTracker`, `SimulatedDatabricksClient`) to simulate interaction with Databricks services.
    *   `_initialize_databricks_client`: Attempts to initialize a real `WorkspaceClient` but falls back to a `SimulatedDatabricksClient` if connection fails.
    *   Methods like `analyze_academic_performance`, `assess_student_wellbeing`, `plan_personalized_intervention`, `recommend_university_resources`, and `perform_causal_analysis` are defined as "tools" the agent can use. Their implementation is largely simulated, often returning hardcoded or simply calculated results.
    *   `run_holistic_analysis`: Orchestrates calls to the simulated tool methods, integrates their results, assesses overall risk (simulated), and logs parameters and metrics using the simulated `MLflowTracker`.
*   **Note:** This section focuses on the architectural concept of an agent orchestrating tasks and interacting with platform services, with the actual task execution being simulated.

## 5. Gradio Interface (Cell `Ng1zVId-5Eu2`)

*   **Purpose:** To provide a user-friendly web interface for interacting with the SchoolDaddy system.
*   **Technical Details:**
    *   Built using the `gradio` library (`gr.Blocks`).
    *   Includes interactive components: `gr.Dropdown` for student selection, `gr.Button` for triggering analysis, `gr.Plot` for visualizations (`plotly.graph_objects`), `gr.Textbox` for quick stats and query input/output, `gr.HTML` for detailed analysis results and recommendations, and `gr.JSON` for raw agent details.
    *   Uses CSS styling (`css=".gradio-container {color: black !important;}"`) to attempt to control text color globally, with additional inline styles in HTML outputs for specific elements.
    *   Event handlers (`.click()`, `.change()`, `.load()`) connect user actions to backend Python functions (`run_complete_analysis`, `update_student_charts`, `handle_user_query`, `initial_load`).
    *   `run_complete_analysis`: Orchestrates the full analysis workflow for a selected student, calls the simulated agent, generates visualizations, formats results, and updates the Gradio output components.
    *   `create_risk_gauge`, `create_academic_trend_chart`, `create_wellbeing_chart`: Generate Plotly figures for display.
    *   `format_analysis_results`, `format_resource_recommendations_html`: Generate HTML strings with embedded data and styling for display.
    *   `handle_user_query`: (Added in recent modifications) Processes user queries from the "Ask the Agent" tab, calls the `DatabricksAIAgent.get_enhanced_response` (or the `AgenticQueryProcessor.process` if used), and formats the response as HTML.
    *   `launch_gradio_app`: Configures and launches the Gradio server.

## 6. Agentic Query Processing (Cell `iAIecdmlafu0`)

*   **Purpose:** To enable natural language interaction with the AI agent through the Gradio interface.
*   **Technical Details:**
    *   Defines the `AgenticQueryProcessor` class (initially in a separate cell, later moved into `iAIecdmlafu0` to resolve import errors).
    *   `__init__`: Initializes the Groq client (if `groq` library is installed and `GROQ_API_KEY` is available in Colab Secrets or environment variables) using a `try-except` block to handle potential `ImportError` or `SecretNotFoundError`.
    *   `process`: Takes student ID and user query as input. Calls the `analysis_agent.run_holistic_analysis` to get the student's data and analysis results.
    *   Includes logic to use the Groq LLM (`self.groq_client.chat.completions.create`) with a specific model (`llama3-8b-8192`) by providing context from the analysis results and the user's query. It attempts to generate a nuanced response using the LLM.
    *   Includes a fallback mechanism (simulated reasoning based on keyword matching) if the Groq client is not initialized or the LLM call fails. It checks for keywords related to academic, wellbeing, recommendations, and causal factors in the query and extracts relevant information from the `analysis_result` dictionary to construct a response.
    *   The `GradioHokieWellApp` instantiates this processor and calls its `process` method when a user submits a query in the Gradio interface.

## Summary

The "SchoolDaddy" project provides a foundational structure for a student support system, demonstrating an end-to-end workflow from synthetic data generation to a user-interactive Gradio interface with an agentic query capability. While many components, including the AI models and Databricks integration, are simulated for demonstration purposes, the architecture reflects principles used in real-world AI-powered applications for student success. The project incorporates concepts of data analysis, predictive modeling (simulated), causal inference (simulated), and natural language processing (via keyword matching and optional LLM integration) to provide insights and recommendations.

# Task
Modify the Gradio application code to handle user queries differently based on whether a specific student is selected. If a student is selected, the AI agent should provide responses related to that student's data. If no student is selected, the AI agent should provide responses based on the entire dataset. Ensure the Gradio interface and the underlying AI agent/processor logic are updated to support this conditional behavior.

## Modify `handle user query` function

### Subtask:
Update the `handle_user_query` function in the `GradioHokieWellApp` class (in cell `YDj-uomyatea`) to check if a student has been selected in the `student_selection` input.


**Reasoning**:
The subtask is to modify the `handle_user_query` function to check if a student is selected. I will add a check at the beginning of the function to handle the case where no student is selected.



In [None]:
# Update the handle_user_query function in the GradioHokieWellApp class
def handle_user_query(self, user_query, student_selection):
    """Handle natural language queries with intelligent responses"""
    if not user_query.strip():
        return "Please enter a question."

    # Check if a student is selected
    if student_selection is None or student_selection.strip() == "":
        # Placeholder for handling queries without a selected student
        print("No student selected. Query needs to be processed based on the entire dataset (TODO).")
        return "Please select a student to ask about, or I can provide general insights (feature coming soon)."
    else:
        student_id = student_selection.split(' - ')[0]
        # Run the full analysis first to get the latest data and insights
        analysis_result = self.analyze_student(student_id)

        # Prepare context for AI agent
        academic = analysis_result.get('academic_analysis', {})
        causal = analysis_result.get('causal_analysis', {})
        plan = analysis_result.get('intervention_plan', {})

        context_data = {
            'risk_score': academic.get('risk_score', 0.5),
            'trend': academic.get('trend_direction', 'stable'),
            'factors': causal.get('causal_factors', []),
            'actions': [action.get('description', '') for action in plan.get('planned_actions', [])],
            'risk_level': plan.get('risk_level', 'low'),
            'student_id': student_id,
            'raw_analysis_result': analysis_result # Pass full result for potential detailed lookup
        }

        # Get ENHANCED response from AI agent
        # Use the DatabricksAIAgent for natural language query processing
        enhanced_response = self.databricks_ai_agent.get_enhanced_response(user_query, context_data)

        return f"""
        <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
            <h4 style='color: black; margin-top: 0;'>💬 AI Response to: "{user_query}"</h4>
            <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                {enhanced_response}
            </div>
            <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                <small style='color: #666;'>
                    <strong>Analysis Context:</strong> Student {student_id} | Risk Score: {academic.get('risk_score', 0.5):.2f} | Primary Factors: {', '.join(causal.get('causal_factors', []))}
                </small>
            </div>
        </div>
        """

# Re-define the GradioHokieWellApp class with the modified handle_user_query function
class GradioHokieWellApp:
    def __init__(self):
        # Initialize both simulated and potentially real agents
        self.simulated_agent = SimulatedDatabricksAgent()
        self.databricks_ai_agent = DatabricksAIAgent() # Agent for NL queries
        self.databricks_model_agent = DatabricksModelAgent() # Agent for structured analysis via model endpoint

        # Decide which structured analysis agent to use
        self.structured_analysis_agent = self.databricks_model_agent # Use model agent first
        # Optionally add logic to fallback to self.simulated_agent if model agent fails init


        self.load_data()


    def load_data(self):
        """Load the synthetic dataset"""
        try:
            # Check if files exist before loading
            if os.path.exists('students.csv') and os.path.exists('academic_data.csv') and \
               os.path.exists('wellbeing_data.csv') and os.path.exists('environmental_data.csv') and \
               os.path.exists('resources.csv'):
                self.students = pd.read_csv('students.csv')
                self.academic = pd.read_csv('academic_data.csv')
                self.wellbeing = pd.read_csv('wellbeing_data.csv')
                self.environmental = pd.read_csv('environmental_data.csv')
                self.resources = pd.read_csv('resources.csv')
                print("✅ Data loaded successfully from CSV files")
            else:
                 print("⚠️ CSV files not found. Creating minimal data.")
                 self.create_minimal_data()

        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if any error occurs during loading
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing or loading fails"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])
        # Add minimal data for academic, wellbeing, and environmental dataframes
        self.academic = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'assignment_id': ['A001', 'A002', 'A003', 'A004'],
            'course_id': ['CS101', 'CS101', 'PSYC101', 'PSYC101'],
            'course_name': ['Intro to CS', 'Intro to CS', 'Intro to Psych', 'Intro to Psych'],
            'assignment_name': ['Assignment 1', 'Assignment 2', 'Assignment 1', 'Assignment 2'],
            'due_date': ['2024-01-20', '2024-02-05', '2024-01-20', '2024-02-05'],
            'submission_date': ['2024-01-20', '2024-02-06', '2024-01-21', '2024-02-08'],
            'grade': [85, 80, 70, 65],
            'submission_delay_days': [0, 1, 1, 3],
            'difficulty_level': [0.5, 0.5, 0.4, 0.4]
        })
        self.wellbeing = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'sleep_duration': [7.5, 7.0, 6.0, 5.5],
            'step_count': [8000, 8500, 5000, 4500],
            'wellbeing_score': [4.0, 3.8, 3.0, 2.8],
            'week_of_semester': [1, 1, 1, 1],
            'day_type': ['Weekday', 'Weekday', 'Weekday', 'Weekday']
        })
        self.environmental = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'meals_on_campus': [2.0, 1.0, 1.0, 1.0],
            'library_hours': [1.0, 1.5, 0.5, 0.2],
            'gym_visit': [1, 0, 0, 0],
            'campus_engagement_score': [0.7, 0.6, 0.5, 0.4]
        })
        print("✅ Minimal data created.")


    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student using the selected structured analysis agent"""
        student_data = self.get_student_data(student_id)
        return self.structured_analysis_agent.run_holistic_analysis(student_id, student_data)


    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20, 'color': 'black'}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=50, b=20),
            font=dict(color='black')
        )
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic') or self.academic.empty:
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id].copy() # Use .copy() to avoid SettingWithCopyWarning
        if student_academic.empty:
            return None

        # Ensure 'due_date' is datetime type for sorting and plotting
        student_academic['due_date'] = pd.to_datetime(student_academic['due_date'], errors='coerce')
        student_academic = student_academic.dropna(subset=['due_date']).sort_values('due_date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing') or self.wellbeing.empty:
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id].copy() # Use .copy()
        if student_wellbeing.empty:
            return None

        # Ensure 'date' is datetime type
        student_wellbeing['date'] = pd.to_datetime(student_wellbeing['date'], errors='coerce')
        student_wellbeing = student_wellbeing.dropna(subset=['date']).sort_values('date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig


    def format_analysis_results(self, analysis_result):
        """Format analysis results for display with BLACK TEXT"""
        # Ensure keys exist with default empty dicts
        academic = analysis_result.get('academic_analysis', {})
        causal = analysis_result.get('causal_analysis', {})
        plan = analysis_result.get('intervention_plan', {})

        # Academic insights - ALL BLACK TEXT
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>📚 Academic Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Score:</strong> <span style='color: black;'>{academic.get('risk_score', 0.0):.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Trend:</strong> <span style='color: black;'>{academic.get('trend_direction', 'N/A').title()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic.get('key_insights', []):
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis - ALL BLACK TEXT
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🔍 Root Cause Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal.get('causal_factors', []):
            effect = causal.get('effect_sizes', {}).get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan - ALL BLACK TEXT
        risk_level = plan.get('risk_level', 'low')
        risk_level_color = "red" if risk_level == "high" else "orange" if risk_level == "medium" else "green"
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🎯 Intervention Plan</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Level:</strong> <span style='color: {risk_level_color}; font-weight: bold;'>{risk_level.upper()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan.get('planned_actions', []):
            plan_html += f"""
            <li style='color: black; margin-bottom: 10px;'>
                <strong style='color: black;'>{action.get('type', 'N/A').replace('_', ' ').title()}:</strong><br>
                <span style='color: black;'>{action.get('description', 'N/A')}</span><br>
                <em style='color: black;'>Confidence: {action.get('confidence', 0):.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        # Use the structured analysis agent to get the latest analysis for factors
        analysis = self.analyze_student(student_id)
        risk_factors = analysis.get('causal_analysis', {}).get('causal_factors', [])

        recommendations = []
        # Match recommendations based on identified factors
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors or 'academic_pressure' in risk_factors or 'time_management' in risk_factors:
            recommendations.append({
                'resource': 'Academic Success Center / Tutoring',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations / Peer Programs',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })
        if 'stress' in risk_factors or 'anxiety' in risk_factors:
             recommendations.append({
                'resource': 'Counseling Services / Stress Management Workshop',
                'match': 0.90,
                'reason': 'Provides coping strategies and support'
            })


        # Default recommendations if no specific factors match
        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def format_resource_recommendations_html(self, student_id):
        """Format resource recommendations as HTML with BLACK TEXT"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🛟 Personalized Resource Recommendations</h3>"

        if not recommendations:
            html += "<p style='color: black;'>No specific recommendations available based on current analysis. General support resources are always available.</p>"
        else:
            for rec in recommendations:
                html += f"""
                <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50; color: black;'>
                    <h4 style='color: black;'>{rec.get('resource', 'N/A')} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec.get('match', 0):.0%} match</span></h4>
                    <p style='color: black;'>{rec.get('reason', 'No reason provided.')}</p>
                </div>
                """

        html += "</div>"
        return html


    def create_interface(self):
        """Create the Gradio interface with AGENT RESPONSE tab"""
        with gr.Blocks(theme=gr.themes.Soft(), title="HokieWell Navigator", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 HokieWell Navigator
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith",
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("🤖 Agent Response"):
                        gr.Markdown("### 💬 Ask Anything About the Student")
                        gr.Markdown("""
                        **Example questions to try:**
                        - "How is he studying?"
                        - "Explain the sleep issues"
                        - "What causes the stress?"
                        - "Why are grades declining?"
                        - "What interventions would help?"
                        """)

                        user_query = gr.Textbox(
                            label="Enter your question about the student:",
                            placeholder="Type your question here...",
                            lines=3,
                            elem_classes=["black-text"]
                        )

                        ask_btn = gr.Button("🎯 Get AI Analysis", variant="primary")
                        agent_response = gr.HTML(label="AI Agent Response", elem_classes=["black-text"])

                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output] # Removed agent_info output
            )

            ask_btn.click(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            user_query.submit( # Allow submitting query by pressing Enter
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )


            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result.get('academic_analysis', {}).get('risk_score', 0.5))

        # Text outputs
        academic_analysis = analysis_result.get('academic_analysis', {})
        causal_analysis = analysis_result.get('causal_analysis', {})

        risk_score_val = academic_analysis.get('risk_score', 0.5)
        risk_score = f"{risk_score_val:.2f}"
        trend_direction = academic_analysis.get('trend_direction', 'N/A').title()
        primary_factor = causal_analysis.get('causal_factors', [None])[0]
        primary_factor = primary_factor.replace('_', ' ').title() if primary_factor else "No significant factors"


        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations_html(student_id)

        # Removed agent_info from outputs

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html

    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        # This will also trigger the data loading and initial chart creation
        return self.update_student_charts(student_selection)

# Keep the original SimulatedDatabricksAgent as a potential fallback or alternative
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"
        print("✅ Simulated Databricks Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # This version simulates results based on student_id
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct_simulated"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors} if factors else {}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

## Implement logic for no student selected

### Subtask:
Implement the logic within the `handle_user_query` function to process queries based on the entire dataset when no specific student is selected. This involves adding methods to analyze aggregate data and generating appropriate responses.


**Reasoning**:
Define a method to analyze aggregate data within the GradioHokieWellApp class.



In [None]:
# Add a new method to GradioHokieWellApp to analyze aggregate data
def analyze_aggregate_data(self):
    """Performs basic analysis on the entire dataset."""
    aggregate_results = {}

    if hasattr(self, 'academic') and not self.academic.empty:
        avg_grade_overall = self.academic['grade'].mean()
        num_assignments = len(self.academic)
        aggregate_results['academic_summary'] = {
            'average_grade_overall': avg_grade_overall,
            'total_assignments': num_assignments,
            'insight': f"Across all students, the average assignment grade is {avg_grade_overall:.2f} based on {num_assignments} assignments."
        }

    if hasattr(self, 'wellbeing') and not self.wellbeing.empty:
        avg_sleep_overall = self.wellbeing['sleep_duration'].mean()
        avg_wellbeing_score_overall = self.wellbeing['wellbeing_score'].mean()
        aggregate_results['wellbeing_summary'] = {
            'average_sleep_hours_overall': avg_sleep_overall,
            'average_wellbeing_score_overall': avg_wellbeing_score_overall,
            'insight': f"Average daily sleep duration is {avg_sleep_overall:.1f} hours, and the average wellbeing score is {avg_wellbeing_score_overall:.1f} across all students."
        }

    if hasattr(self, 'environmental') and not self.environmental.empty:
        avg_engagement_overall = self.environmental['campus_engagement_score'].mean()
        aggregate_results['environmental_summary'] = {
            'average_campus_engagement_overall': avg_engagement_overall,
            'insight': f"The average daily campus engagement score is {avg_engagement_overall:.2f} across all students."
        }

    if not aggregate_results:
        aggregate_results['insight'] = "No data available to perform aggregate analysis."

    return aggregate_results

# Re-define the GradioHokieWellApp class with the new analyze_aggregate_data method and the modified handle_user_query
class GradioHokieWellApp:
    def __init__(self):
        # Initialize both simulated and potentially real agents
        self.simulated_agent = SimulatedDatabricksAgent()
        self.databricks_ai_agent = DatabricksAIAgent() # Agent for NL queries
        self.databricks_model_agent = DatabricksModelAgent() # Agent for structured analysis via model endpoint

        # Decide which structured analysis agent to use
        self.structured_analysis_agent = self.databricks_model_agent # Use model agent first
        # Optionally add logic to fallback to self.simulated_agent if model agent fails init


        self.load_data()


    def load_data(self):
        """Load the synthetic dataset"""
        try:
            # Check if files exist before loading
            if os.path.exists('students.csv') and os.path.exists('academic_data.csv') and \
               os.path.exists('wellbeing_data.csv') and os.path.exists('environmental_data.csv') and \
               os.path.exists('resources.csv'):
                self.students = pd.read_csv('students.csv')
                self.academic = pd.read_csv('academic_data.csv')
                self.wellbeing = pd.read_csv('wellbeing_data.csv')
                self.environmental = pd.read_csv('environmental_data.csv')
                self.resources = pd.read_csv('resources.csv')
                print("✅ Data loaded successfully from CSV files")
            else:
                 print("⚠️ CSV files not found. Creating minimal data.")
                 self.create_minimal_data()

        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if any error occurs during loading
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing or loading fails"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'}
        ])
        # Add minimal data for academic, wellbeing, and environmental dataframes
        self.academic = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'assignment_id': ['A001', 'A002', 'A003', 'A004'],
            'course_id': ['CS101', 'CS101', 'PSYC101', 'PSYC101'],
            'course_name': ['Intro to CS', 'Intro to CS', 'Intro to Psych', 'Intro to Psych'],
            'assignment_name': ['Assignment 1', 'Assignment 2', 'Assignment 1', 'Assignment 2'],
            'due_date': ['2024-01-20', '2024-02-05', '2024-01-20', '2024-02-05'],
            'submission_date': ['2024-01-20', '2024-02-06', '2024-01-21', '2024-02-08'],
            'grade': [85, 80, 70, 65],
            'submission_delay_days': [0, 1, 1, 3],
            'difficulty_level': [0.5, 0.5, 0.4, 0.4]
        })
        self.wellbeing = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'sleep_duration': [7.5, 7.0, 6.0, 5.5],
            'step_count': [8000, 8500, 5000, 4500],
            'wellbeing_score': [4.0, 3.8, 3.0, 2.8],
            'week_of_semester': [1, 1, 1, 1],
            'day_type': ['Weekday', 'Weekday', 'Weekday', 'Weekday']
        })
        self.environmental = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'meals_on_campus': [2.0, 1.0, 1.0, 1.0],
            'library_hours': [1.0, 1.5, 0.5, 0.2],
            'gym_visit': [1, 0, 0, 0],
            'campus_engagement_score': [0.7, 0.6, 0.5, 0.4]
        })
        print("✅ Minimal data created.")


    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student using the selected structured analysis agent"""
        student_data = self.get_student_data(student_id)
        return self.structured_analysis_agent.run_holistic_analysis(student_id, student_data)

    # Modified handle_user_query to include aggregate data analysis
    def handle_user_query(self, user_query, student_selection):
        """Handle natural language queries with intelligent responses"""
        if not user_query.strip():
            return "Please enter a question."

        # Check if a student is selected
        if student_selection is None or student_selection.strip() == "":
            print("No student selected. Processing query based on the entire dataset.")
            aggregate_analysis_result = self.analyze_aggregate_data()

            # Prepare context for AI agent with aggregate data
            context_data = {
                'user_query': user_query,
                'aggregate_analysis': aggregate_analysis_result,
                'is_aggregate': True
            }

            # Use the DatabricksAIAgent for natural language query processing with aggregate data
            enhanced_response = self.databricks_ai_agent.get_enhanced_response(user_query, context_data)

            return f"""
            <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
                <h4 style='color: black; margin-top: 0;'>💬 AI Response to: "{user_query}" (Overall Dataset)</h4>
                <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                    {enhanced_response}
                </div>
                <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                    <small style='color: #666;'>
                        <strong>Analysis Context:</strong> Entire Dataset
                    </small>
                </div>
            </div>
            """
        else:
            student_id = student_selection.split(' - ')[0]
            # Run the full analysis first to get the latest data and insights
            analysis_result = self.analyze_student(student_id)

            # Prepare context for AI agent with student-specific data
            academic = analysis_result.get('academic_analysis', {})
            causal = analysis_result.get('causal_analysis', {})
            plan = analysis_result.get('intervention_plan', {})

            context_data = {
                'risk_score': academic.get('risk_score', 0.5),
                'trend': academic.get('trend_direction', 'stable'),
                'factors': causal.get('causal_factors', []),
                'actions': [action.get('description', '') for action in plan.get('planned_actions', [])],
                'risk_level': plan.get('risk_level', 'low'),
                'student_id': student_id,
                'raw_analysis_result': analysis_result, # Pass full result for potential detailed lookup
                'is_aggregate': False
            }

            # Use the DatabricksAIAgent for natural language query processing
            enhanced_response = self.databricks_ai_agent.get_enhanced_response(user_query, context_data)

            return f"""
            <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
                <h4 style='color: black; margin-top: 0;'>💬 AI Response to: "{user_query}" (Student {student_id})</h4>
                <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                    {enhanced_response}
                </div>
                <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                    <small style='color: #666;'>
                        <strong>Analysis Context:</strong> Student {student_id} | Risk Score: {academic.get('risk_score', 0.5):.2f} | Primary Factors: {', '.join(causal.get('causal_factors', []))}
                    </small>
                </div>
            </div>
            """


    def create_risk_gauge(self, risk_score):
        """Create a risk gauge chart using Plotly"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = risk_score,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': "Academic Risk Score", 'font': {'size': 20, 'color': 'black'}},
            delta = {'reference': 0.5, 'increasing': {'color': "red"}, 'decreasing': {'color': "green"}},
            gauge = {
                'axis': {'range': [0, 1], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, 0.3], 'color': 'lightgreen'},
                    {'range': [0.3, 0.7], 'color': 'yellow'},
                    {'range': [0.7, 1], 'color': 'red'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 0.7}
            }
        ))

        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=50, b=20),
            font=dict(color='black')
        )
        return fig

    def create_academic_trend_chart(self, student_id):
        """Create academic trend chart"""
        if not hasattr(self, 'academic') or self.academic.empty:
            return None

        student_academic = self.academic[self.academic['student_id'] == student_id].copy() # Use .copy() to avoid SettingWithCopyWarning
        if student_academic.empty:
            return None

        # Ensure 'due_date' is datetime type for sorting and plotting
        student_academic['due_date'] = pd.to_datetime(student_academic['due_date'], errors='coerce')
        student_academic = student_academic.dropna(subset=['due_date']).sort_values('due_date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_academic['due_date'],
            y=student_academic['grade'],
            mode='lines+markers',
            name='Grades',
            line=dict(color='#861F41', width=3),
            marker=dict(size=8)
        ))

        fig.update_layout(
            title="Academic Performance Trend",
            xaxis_title="Assignment Date",
            yaxis_title="Grade",
            height=300,
            showlegend=False,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig

    def create_wellbeing_chart(self, student_id):
        """Create wellbeing metrics chart"""
        if not hasattr(self, 'wellbeing') or self.wellbeing.empty:
            return None

        student_wellbeing = self.wellbeing[self.wellbeing['student_id'] == student_id].copy() # Use .copy()
        if student_wellbeing.empty:
            return None

        # Ensure 'date' is datetime type
        student_wellbeing['date'] = pd.to_datetime(student_wellbeing['date'], errors='coerce')
        student_wellbeing = student_wellbeing.dropna(subset=['date']).sort_values('date')


        fig = go.Figure()

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['sleep_duration'],
            mode='lines',
            name='Sleep Hours',
            line=dict(color='#E87722', width=2)
        ))

        fig.add_trace(go.Scatter(
            x=student_wellbeing['date'],
            y=student_wellbeing['wellbeing_score'],
            mode='lines',
            name='Wellbeing Score',
            line=dict(color='#861F41', width=2),
            yaxis='y2'
        ))

        fig.update_layout(
            title="Wellbeing Metrics",
            xaxis_title="Date",
            yaxis_title="Sleep Hours",
            yaxis2=dict(title="Wellbeing Score", overlaying='y', side='right'),
            height=300,
            showlegend=True,
            font=dict(color='black'),
            title_font=dict(color='black'),
            xaxis=dict(tickfont=dict(color='black')),
            yaxis=dict(tickfont=dict(color='black'))
        )

        return fig


    def format_analysis_results(self, analysis_result):
        """Format analysis results for display with BLACK TEXT"""
        # Ensure keys exist with default empty dicts
        academic = analysis_result.get('academic_analysis', {})
        causal = analysis_result.get('causal_analysis', {})
        plan = analysis_result.get('intervention_plan', {})

        # Academic insights - ALL BLACK TEXT
        academic_html = f"""
        <div style='background: #f8f9fa; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>📚 Academic Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Score:</strong> <span style='color: black;'>{academic.get('risk_score', 0.0):.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Trend:</strong> <span style='color: black;'>{academic.get('trend_direction', 'N/A').title()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Key Insights:</strong></p>
            <ul style='color: black;'>
        """
        for insight in academic.get('key_insights', []):
            academic_html += f"<li style='color: black;'>{insight}</li>"
        academic_html += "</ul></div>"

        # Causal analysis - ALL BLACK TEXT
        causal_html = f"""
        <div style='background: #fff3cd; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🔍 Root Cause Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Identified Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in causal.get('causal_factors', []):
            effect = causal.get('effect_sizes', {}).get(factor, 0)
            causal_html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} (effect size: {effect:.3f})</li>"
        causal_html += "</ul></div>"

        # Intervention plan - ALL BLACK TEXT
        risk_level = plan.get('risk_level', 'low')
        risk_level_color = "red" if risk_level == "high" else "orange" if risk_level == "medium" else "green"
        plan_html = f"""
        <div style='background: #d1ecf1; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🎯 Intervention Plan</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Risk Level:</strong> <span style='color: {risk_level_color}; font-weight: bold;'>{risk_level.upper()}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Recommended Actions:</strong></p>
            <ul style='color: black;'>
        """
        for action in plan.get('planned_actions', []):
            plan_html += f"""
            <li style='color: black; margin-bottom: 10px;'>
                <strong style='color: black;'>{action.get('type', 'N/A').replace('_', ' ').title()}:</strong><br>
                <span style='color: black;'>{action.get('description', 'N/A')}</span><br>
                <em style='color: black;'>Confidence: {action.get('confidence', 0):.0%}</em>
            </li>
            """
        plan_html += "</ul></div>"

        return academic_html + causal_html + plan_html

    def get_resource_recommendations(self, student_id):
        """Get personalized resource recommendations"""
        # Use the structured analysis agent to get the latest analysis for factors
        analysis = self.analyze_student(student_id)
        risk_factors = analysis.get('causal_analysis', {}).get('causal_factors', [])

        recommendations = []
        # Match recommendations based on identified factors
        if 'sleep_deprivation' in risk_factors:
            recommendations.append({
                'resource': 'Sleep & Wellness Workshop',
                'match': 0.95,
                'reason': 'Addresses identified sleep patterns'
            })
        if 'academic_overload' in risk_factors or 'academic_pressure' in risk_factors or 'time_management' in risk_factors:
            recommendations.append({
                'resource': 'Academic Success Center / Tutoring',
                'match': 0.88,
                'reason': 'Targeted academic support'
            })
        if 'social_isolation' in risk_factors:
            recommendations.append({
                'resource': 'Student Clubs & Organizations / Peer Programs',
                'match': 0.82,
                'reason': 'Community engagement opportunities'
            })
        if 'stress' in risk_factors or 'anxiety' in risk_factors:
             recommendations.append({
                'resource': 'Counseling Services / Stress Management Workshop',
                'match': 0.90,
                'reason': 'Provides coping strategies and support'
            })


        # Default recommendations if no specific factors match
        if not recommendations:
            recommendations = [
                {'resource': 'Academic Success Center', 'match': 0.75, 'reason': 'General academic support'},
                {'resource': 'Counseling Services', 'match': 0.70, 'reason': 'Wellbeing support'}
            ]

        return recommendations

    def format_resource_recommendations_html(self, student_id):
        """Format resource recommendations as HTML with BLACK TEXT"""
        recommendations = self.get_resource_recommendations(student_id)

        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🛟 Personalized Resource Recommendations</h3>"

        if not recommendations:
            html += "<p style='color: black;'>No specific recommendations available based on current analysis. General support resources are always available.</p>"
        else:
            for rec in recommendations:
                html += f"""
                <div style='background: #e8f5e8; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4CAF50; color: black;'>
                    <h4 style='color: black;'>{rec.get('resource', 'N/A')} <span style='float: right; background: #4CAF50; color: white; padding: 2px 8px; border-radius: 10px; font-size: 12px;'>{rec.get('match', 0):.0%} match</span></h4>
                    <p style='color: black;'>{rec.get('reason', 'No reason provided.')}</p>
                </div>
                """

        html += "</div>"
        return html


    def create_interface(self):
        """Create the Gradio interface with AGENT RESPONSE tab"""
        with gr.Blocks(theme=gr.themes.Soft(), title="HokieWell Navigator", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 HokieWell Navigator
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    student_dropdown = gr.Dropdown(
                        choices=[f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()],
                        label="👤 Select Student",
                        value="S003 - Jordan Smith",
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("🤖 Agent Response"):
                        gr.Markdown("### 💬 Ask Anything About the Student")
                        gr.Markdown("""
                        **Example questions to try:**
                        - "How is he studying?"
                        - "Explain the sleep issues"
                        - "What causes the stress?"
                        - "Why are grades declining?"
                        - "What interventions would help?"
                        """)

                        user_query = gr.Textbox(
                            label="Enter your question about the student:",
                            placeholder="Type your question here...",
                            lines=3,
                            elem_classes=["black-text"]
                        )

                        ask_btn = gr.Button("🎯 Get AI Analysis", variant="primary")
                        agent_response = gr.HTML(label="AI Agent Response", elem_classes=["black-text"])

                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output] # Removed agent_info output
            )

            ask_btn.click(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            user_query.submit( # Allow submitting query by pressing Enter
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )


            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs"""
        student_id = student_selection.split(' - ')[0]
        analysis_result = self.analyze_student(student_id)

        # Risk gauge
        risk_gauge = self.create_risk_gauge(analysis_result.get('academic_analysis', {}).get('risk_score', 0.5))

        # Text outputs
        academic_analysis = analysis_result.get('academic_analysis', {})
        causal_analysis = analysis_result.get('causal_analysis', {})

        risk_score_val = academic_analysis.get('risk_score', 0.5)
        risk_score = f"{risk_score_val:.2f}"
        trend_direction = academic_analysis.get('trend_direction', 'N/A').title()
        primary_factor = causal_analysis.get('causal_factors', [None])[0]
        primary_factor = primary_factor.replace('_', ' ').title() if primary_factor else "No significant factors"


        # Analysis results HTML
        analysis_html = self.format_analysis_results(analysis_result)

        # Charts
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)

        # Resource recommendations
        resources_html = self.format_resource_recommendations_html(student_id)

        # Removed agent_info from outputs

        return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html

    def update_student_charts(self, student_selection):
        """Update charts when student changes"""
        student_id = student_selection.split(' - ')[0]
        academic_chart = self.create_academic_trend_chart(student_id)
        wellbeing_chart = self.create_wellbeing_chart(student_id)
        return academic_chart, wellbeing_chart

    def initial_load(self, student_selection):
        """Initial load of charts"""
        # This will also trigger the data loading and initial chart creation
        return self.update_student_charts(student_selection)

# Keep the original SimulatedDatabricksAgent as a potential fallback or alternative
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"
        print("✅ Simulated Databricks Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        # This version simulates results based on student_id
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct_simulated"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors} if factors else {}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

**Reasoning**:
Modify the DatabricksAIAgent class to handle aggregate data analysis results and generate appropriate responses based on the user query and the provided aggregate context.



In [None]:
# Modify the DatabricksAIAgent class to handle aggregate data
class DatabricksAIAgent:
    """Use intelligent response generation with Databricks integration"""

    def __init__(self):
        self.databricks_enabled = True # Placeholder for actual Databricks connection status
        print("✅ Databricks AI Agent initialized")

    def get_enhanced_response(self, user_query, context_data):
        """Get enhanced response using intelligent pattern matching, handling both student-specific and aggregate data."""
        try:
            if context_data.get('is_aggregate', False):
                return self._generate_aggregate_response(user_query, context_data.get('aggregate_analysis', {}))
            else:
                return self._generate_intelligent_response(user_query, context_data)
        except Exception as e:
            print(f"❌ Databricks LLM or intelligent response failed: {e}")
            # Fallback to a simple response if both methods fail
            if context_data.get('is_aggregate', False):
                 return "I am unable to provide a detailed response for the aggregate data at this time."
            else:
                 return self._get_smart_fallback(user_query, context_data)


    def _generate_intelligent_response(self, user_query, context_data):
        """Generate intelligent, context-aware responses without external API"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        query_lower = user_query.lower()

        # Study-related questions
        if any(word in query_lower for word in ['study', 'studying', 'homework', 'assignments', 'learn']):
            return self._get_study_analysis(user_query, context_data)

        # Sleep-related questions
        elif any(word in query_lower for word in ['sleep', 'rest', 'tired', 'fatigue', 'energy']):
            return self._get_sleep_analysis(user_query, context_data)

        # Stress-related questions
        elif any(word in query_lower for word in ['stress', 'overwhelm', 'pressure', 'anxiety', 'worry']):
            return self._get_stress_analysis(user_query, context_data)

        # Social-related questions
        elif any(word in query_lower for word in ['social', 'friends', 'lonely', 'isolated', 'community']):
            return self._get_social_analysis(user_query, context_data)

        # Academic performance
        elif any(word in query_lower for word in ['grade', 'performance', 'academic', 'gpa', 'score']):
            return self._get_academic_analysis(user_query, context_data)

        # Causal analysis
        elif any(word in query_lower for word in ['why', 'cause', 'reason', 'because', 'factor']):
            return self._get_causal_analysis(user_query, context_data)

        # General health/wellbeing
        elif any(word in query_lower for word in ['health', 'wellbeing', 'wellness', 'feel', 'mood']):
            return self._get_wellbeing_analysis(user_query, context_data)

        # Resource recommendations
        elif any(word in query_lower for word in ['resource', 'help', 'support', 'recommend', 'suggest']):
            return self._get_resource_analysis(user_query, context_data)

        # Default intelligent response
        else:
            return self._get_general_analysis(user_query, context_data)

    def _get_study_analysis(self, user_query, context_data):
        """Generate detailed study analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        study_insights = {
            'high_risk': "The data indicates significant challenges in study habits. There's evidence of cramming, inconsistent study schedules, and potential burnout affecting learning efficiency.",
            'medium_risk': "Study patterns show some concerning trends, including irregular study sessions and possible time management issues that could be optimized.",
            'low_risk': "Study habits appear generally healthy with minor areas for improvement in consistency and technique."
        }

        risk_level = 'high_risk' if risk_score > 0.7 else 'medium_risk' if risk_score > 0.4 else 'low_risk'

        return f"""
**📚 Detailed Study Pattern Analysis for {student_id}**

**Current Study Patterns:**
Based on the academic data, {student_id}'s study habits show {['concerning patterns requiring immediate attention', 'areas for significant improvement', 'some opportunities for optimization'][min(2, int(risk_score//0.3))]}.

**Key Findings:**
- **Study Consistency**: {['Highly irregular patterns detected', 'Inconsistent study sessions', 'Generally stable routine'][min(2, int(risk_score//0.3))]}
- **Learning Efficiency**: {['Significantly impacted by external factors', 'Moderately affected', 'Reasonably effective'][min(2, int(risk_score//0.3))]}
- **Time Management**: {['Major challenges with scheduling', 'Some difficulties in planning', 'Adequate time allocation'][min(2, int(risk_score//0.3))]}

**Specific Issues Identified:**
- Assignment submission patterns suggest {['last-minute cramming', 'rushed completion', 'planned approach'][min(2, int(risk_score//0.3))]}
- Grade trends indicate {['conceptual understanding gaps', 'inconsistent preparation', 'steady comprehension'][min(2, int(risk_score//0.3))]}
- Engagement data shows {['declining participation', 'variable involvement', 'consistent engagement'][min(2, int(risk_score//0.3))]}

**Recommendations:**
1. **Structured Study Plan**: 2-hour focused blocks with 15-minute breaks
2. **Active Learning Techniques**: Practice testing and self-explanation
3. **Consistent Schedule**: Same study times daily for routine building
4. **Distributed Practice**: Shorter, frequent sessions over cramming

**Immediate Actions:**
- Schedule academic coaching session
- Implement weekly study planning
- Join peer study groups for accountability
"""

    def _get_sleep_analysis(self, user_query, context_data):
        """Generate detailed sleep analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😴 Comprehensive Sleep Analysis for {student_id}**

**Sleep Health Assessment:**
The data indicates {['critical sleep deprivation affecting multiple areas', 'significant sleep issues impacting wellbeing', 'moderate sleep concerns', 'generally adequate sleep patterns'][min(3, int(risk_score//0.25))]}.

**Impact Analysis:**
- **Cognitive Function**: Sleep quality affects {['memory consolidation, focus, and academic performance', 'learning efficiency and information retention', 'daily energy levels'][min(2, int(risk_score//0.3))]}
- **Emotional Regulation**: {['Significant impact on stress management and mood', 'Moderate effect on emotional stability', 'Minor influence on daily temperament'][min(2, int(risk_score//0.3))]}
- **Academic Correlation**: Research shows sleep deprivation can reduce academic performance by {['30-40%', '20-30%', '10-20%'][min(2, int(risk_score//0.3))]}

**Recommended Interventions:**
1. **Sleep Schedule**: Consistent 7-8 hour nightly target
2. **Environment Optimization**: Cool, dark, quiet sleeping space
3. **Digital Detox**: No screens 1 hour before bedtime
4. **Relaxation Routine**: Reading, meditation, or light stretching

**University Resources:**
- Sleep & Wellness Workshop (Weekly sessions)
- Counseling Center sleep resources
- Peer wellness coaching
"""

    def _get_stress_analysis(self, user_query, context_data):
        """Generate detailed stress analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😥 Stress and Wellbeing Analysis for {student_id}**

**Stress Level Assessment:**
Current data shows {['critical stress levels requiring immediate support', 'elevated stress needing proactive management', 'moderate stress with improvement opportunities', 'generally manageable stress levels'][min(3, int(risk_score//0.25))]}.

**Primary Stressors Identified:**
{chr(10).join(['- ' + factor.replace('_', ' ').title() for factor in factors])}

**Stress Impact Chain:**
1. Academic pressure → Sleep disruption → Reduced coping capacity
2. Social withdrawal → Increased perceived burden → Decreased motivation

**Management Strategies:**
- **Immediate**: 5-4-3-2-1 grounding technique, box breathing
- **Short-term**: Time blocking, priority matrix, boundary setting
- **Long-term**: Regular exercise, social connection, mindfulness

**Support Recommendations:**
1. Counseling Center appointment (confidential, professional support)
2. Stress management workshop (weekly sessions available)
3. Mindfulness and meditation resources (guided meditations, yoga classes)
"""

    def _get_social_analysis(self, user_query, context_data):
        """Generate detailed social analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**👥 Social Connection Analysis for {student_id}**

**Social Wellbeing Assessment:**
The data suggests {['significant social isolation requiring intervention', 'notable social connection challenges', 'moderate opportunities for social engagement', 'generally healthy social patterns'][min(3, int(risk_score//0.25))]}.

**Connection-Building Strategies:**
1. **Structured Opportunities**: Club meetings, study groups, campus events
2. **Low-Pressure Interactions**: Coffee chats, interest-based activities
3. **Support Systems**: Peer mentoring, faculty office hours

**Recommended Campus Resources:**
- Student Organizations Fair (weekly)
- Peer Connection Program
- Community Engagement Office
- Cultural and Identity Centers
"""

    def _get_academic_analysis(self, user_query, context_data):
        """Generate detailed academic analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**📊 Comprehensive Academic Analysis for {student_id}**

**Academic Performance Overview:**
- **Current Risk Score**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern level)
- **Performance Trend**: {trend.title()} pattern identified
- **Primary Academic Factors**: {', '.join(factors)}

**Detailed Performance Insights:**
The academic data reveals {['significant challenges requiring immediate intervention', 'notable areas for improvement and support', 'moderate opportunities for academic enhancement'][min(2, int(risk_score//0.3))]}.

**Pattern Analysis:**
1. **Assignment Performance**: {['Concerning decline in recent submissions', 'Some variability in assignment quality', 'Generally consistent performance'][min(2, int(risk_score//0.3))]}
2. **Learning Progression**: {['Evidence of cumulative knowledge gaps', 'Some challenges with concept integration', 'Steady learning progression'][min(2, int(risk_score//0.3))]}
3. **Engagement Metrics**: {['Reduced course interaction and participation', 'Moderate engagement with fluctuations', 'Consistent academic engagement'][min(2, int(risk_score//0.3))]}

**Academic Support Strategy:**
- **Immediate**: Targeted tutoring for specific course challenges
- **Short-term**: Study skills workshop and time management training
- **Long-term**: Academic coaching for sustainable success habits

*Analysis based on comprehensive academic metrics and learning science principles.*
"""

    def _get_causal_analysis(self, user_query, context_data):
        """Generate detailed causal analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🔍 Causal Relationship Analysis for {student_id}**

**Root Cause Identification:**
Through comprehensive pattern analysis, I've identified several interconnected causal relationships:

**Primary Causal Chain:**
1. **Initial Trigger**: {factors[0] if factors else 'Academic demands'} creates initial pressure
2. **Secondary Effects**: This leads to {factors[1] if len(factors) > 1 else 'wellbeing challenges'}
3. **Compounding Impact**: These factors together affect {factors[2] if len(factors) > 2 else 'overall academic performance'}

**Interconnected Factors:**
- **Academic → Wellbeing**: Course pressure impacts sleep and stress levels
- **Wellbeing → Academic**: Poor sleep reduces learning capacity and motivation
- **Social → Academic**: Isolation decreases academic support and engagement
- **Environmental → All**: Campus engagement affects overall student experience

**Evidence-Based Intervention Points:**
Breaking the cycle at any point can create positive ripple effects. The most impactful intervention points appear to be:
1. Addressing {factors[0] if factors else 'the primary stressor'}
2. Implementing wellbeing supports to build resilience
3. Enhancing social connections for natural support systems

*This causal analysis uses pattern recognition and educational research to identify key leverage points.*
"""

    def _get_wellbeing_analysis(self, user_query, context_data):
        """Generate detailed wellbeing analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🌱 Comprehensive Wellbeing Analysis for {student_id}**

**Holistic Wellbeing Assessment:**
The data indicates {['significant wellbeing challenges requiring comprehensive support', 'notable wellbeing concerns needing proactive attention', 'moderate wellbeing with opportunities for enhancement', 'generally positive wellbeing patterns'][min(3, int(risk_score//0.25))]}.

**Wellbeing Dimension Analysis:**
- **Physical Wellbeing**: {['Concerning sleep and activity patterns', 'Some areas for physical health improvement', 'Generally healthy physical habits'][min(2, int(risk_score//0.3))]}
- **Emotional Wellbeing**: {['Elevated stress and emotional challenges', 'Moderate emotional fluctuations', 'Generally stable emotional patterns'][min(2, int(risk_score//0.3))]}
- **Social Wellbeing**: {['Significant social connection challenges', 'Moderate social engagement opportunities', 'Healthy social support systems'][min(2, int(risk_score//0.3))]}
- **Academic Wellbeing**: {['Academic pressures significantly impacting overall wellbeing', 'Some academic-stress interplay', 'Generally positive academic experience'][min(2, int(risk_score//0.3))]}

**Integrated Wellbeing Strategy:**
1. **Foundation**: Sleep, nutrition, and basic self-care
2. **Support Systems**: Social connections and professional resources
3. **Resilience Building**: Stress management and coping skills
4. **Thriving Skills**: Purpose, engagement, and personal growth

**Campus Wellbeing Ecosystem:**
- Counseling and Psychological Services
- Wellness Center programs and workshops
- Peer support networks
- Faculty and staff mentoring

*Analysis based on holistic wellbeing frameworks and student development research.*
"""

    def _get_resource_analysis(self, user_query, context_data):
        """Generate detailed resource analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🛟 Personalized Resource Analysis for {student_id}**

**Resource Matching Strategy:**
Based on the specific challenges identified, I've curated resources that directly address the root causes:

**Primary Resource Recommendations:**
{chr(10).join(['• ' + action for action in actions])}

**Resource Effectiveness Analysis:**
- **Targeted Support**: Each resource addresses specific factors: {', '.join(factors)}
- **Evidence-Based**: These interventions have proven effective for similar student profiles
- **Accessibility**: All resources are freely available through university services

**Implementation Timeline:**
1. **Immediate (This Week)**: {actions[0] if actions else 'Academic consultation'}
2. **Short-term (2-4 Weeks)**: Regular support sessions and skill building
3. **Ongoing**: Continuous monitoring and adjustment of support strategies

**Expected Outcomes:**
- 30-50% improvement in identified challenge areas within 4-6 weeks
- Enhanced coping skills and resilience building
- Sustainable academic and personal success habits

*Resource recommendations based on effectiveness research and student success data.*
"""

    def _get_general_analysis(self, user_query, context_data):
        """Generate general intelligent analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**🤖 Intelligent Analysis for {student_id}**

**Comprehensive Student Profile Analysis:**

I understand you're asking about "{user_query}". Based on the comprehensive data analysis, here's my assessment:

**Current Status Overview:**
- **Risk Level**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern)
- **Primary Factors**: {', '.join(factors)}
- **Trend Direction**: {trend.title()} pattern
- **Overall Outlook**: {['Requires immediate proactive support', 'Would benefit from targeted interventions', 'Shows generally positive patterns with minor enhancements needed'][min(2, int(risk_score//0.3))]}

**Detailed Insights:**
The data reveals interconnected patterns where {factors[0] if factors else 'academic pressures'} appear to be influencing {factors[1] if len(factors) > 1 else 'overall wellbeing'}. This creates a cycle that affects multiple areas of student experience.

**Evidence-Based Perspective:**
Research indicates that addressing these challenges through {actions[0] if actions else 'targeted support'} can break negative cycles and create positive momentum. The university's support systems are specifically designed to help with these types of situations.

**Recommended Approach:**
1. Start with the most impactful intervention: {actions[0] if actions else 'academic support'}
2. Monitor progress through regular check-ins
3. Adjust support strategies based on response and feedback

**Next Steps:**
I recommend discussing these findings with {student_id} and collaboratively developing an action plan that feels manageable and supportive.

*This analysis integrates educational psychology, student development theory, and pattern recognition from comprehensive data.*
"""

    def _get_smart_fallback(self, user_query, context_data):
        """Smart fallback that's actually intelligent"""
        return self._generate_intelligent_response(user_query, context_data)

    def _generate_aggregate_response(self, user_query, aggregate_analysis_result):
        """Generates a response based on aggregate dataset analysis."""
        query_lower = user_query.lower()
        response_parts = []

        response_parts.append(f"**📊 Analysis for the Entire Student Population:**")

        # Address common queries related to aggregate data
        if any(word in query_lower for word in ['average', 'mean', 'overall', 'typical']):
            response_parts.append("\nHere are some overall metrics:")
            if 'academic_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['academic_summary']['insight']}")
            if 'wellbeing_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['wellbeing_summary']['insight']}")
            if 'environmental_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['environmental_summary']['insight']}")

        elif any(word in query_lower for word in ['trend', 'pattern', 'how is the group doing']):
             response_parts.append("\nBased on aggregate data, here are some general patterns:")
             # Simulate general trends based on the synthetic data generation logic
             response_parts.append("- Academic performance tends to show slight decline over the semester, likely due to increasing course load and stress.")
             response_parts.append("- Wellbeing metrics like sleep duration and wellbeing scores also tend to decrease as the semester progresses, especially during peak assignment periods.")
             response_parts.append("- Campus engagement can fluctuate, with potential dips during stressful weeks.")

        elif any(word in query_lower for word in ['risk', 'concern', 'struggle']):
            response_parts.append("\nGeneral risk factors observed across the student population include:")
            response_parts.append("- Academic pressure and workload.")
            response_parts.append("- Maintaining consistent sleep schedules.")
            response_parts.append("- Balancing social engagement with academic demands.")
            response_parts.append("These factors are interconnected and can impact overall success.")

        elif any(word in query_lower for word in ['resource', 'support', 'help']):
            response_parts.append("\nCommonly utilized resources and support areas for the student population include:")
            response_parts.append("- Academic tutoring and study skills support.")
            response_parts.append("- Wellbeing resources like counseling and stress management workshops.")
            response_parts.append("- Opportunities for social and community engagement.")

        else:
            response_parts.append("\nI can provide insights on overall academic performance, wellbeing trends, or campus engagement for the student population.")
            if 'academic_summary' in aggregate_analysis_result:
                 response_parts.append(f"\nFor example: {aggregate_analysis_result['academic_summary']['insight']}")


        if not response_parts or (len(response_parts) == 1 and "Analysis for the Entire Student Population" in response_parts[0]):
             response_parts.append("\nI can provide general insights about the entire student population. Please ask a more specific question about overall trends, averages, or common challenges.")


        return "\n".join(response_parts)

# Keep the original DatabricksModelAgent and SimulatedDatabricksAgent classes unchanged

## Implement logic for student selected

### Subtask:
If a student is selected, ensure the current logic of performing student-specific analysis and calling the AI agent with that student's context is maintained.


**Reasoning**:
Review the `handle_user_query` function to confirm the logic for handling selected students is correct and then finish the task.



In [None]:
# Review of the handle_user_query function for the selected student case

# The 'else' block of handle_user_query is triggered when student_selection is not None or empty.
# Inside this block:
# 1. student_id is correctly extracted from the selection string.
# 2. self.analyze_student(student_id) is called to get student-specific analysis results.
# 3. context_data is prepared, including:
#    - risk_score from academic_analysis
#    - trend from academic_analysis
#    - factors from causal_analysis
#    - actions from intervention_plan
#    - risk_level from intervention_plan
#    - student_id
#    - raw_analysis_result (full analysis result)
#    - is_aggregate: False (correctly indicates student-specific context)
# 4. self.databricks_ai_agent.get_enhanced_response(user_query, context_data) is called, passing the user query and the student-specific context.
# 5. The HTML output correctly includes the student ID in the title and context information.

# The logic for the selected student case appears correct based on the requirements.

# No code changes are needed for this step as the review confirms the existing logic is correct.

## Update ai agent/processor

### Subtask:
Modify the `DatabricksAIAgent` class to handle calls with aggregate data context or a flag indicating no specific student is selected.


**Reasoning**:
I need to modify the `DatabricksAIAgent` class to accept and handle a context dictionary that contains a flag indicating if the data is aggregate or student-specific, and process the aggregate data when the flag is set. I will add a new method to generate responses based on aggregate data.



In [None]:
# Modify the DatabricksAIAgent class to handle aggregate data
class DatabricksAIAgent:
    """Use intelligent response generation with Databricks integration"""

    def __init__(self):
        self.databricks_enabled = True # Placeholder for actual Databricks connection status
        print("✅ Databricks AI Agent initialized")

    def get_enhanced_response(self, user_query, context_data):
        """Get enhanced response using intelligent pattern matching, handling both student-specific and aggregate data."""
        # Check if the context indicates aggregate data
        if context_data.get('is_aggregate', False):
            # Call the new method to generate response for aggregate data
            return self._generate_aggregate_response(user_query, context_data.get('aggregate_analysis', {}))
        else:
            # Retain existing logic for student-specific data
            try:
                return self._generate_intelligent_response(user_query, context_data)
            except Exception as e:
                print(f"❌ Databricks LLM or intelligent response failed: {e}")
                # Fallback to a simple response if student-specific method fails
                return self._get_smart_fallback(user_query, context_data)


    def _generate_intelligent_response(self, user_query, context_data):
        """Generate intelligent, context-aware responses without external API"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        query_lower = user_query.lower()

        # Study-related questions
        if any(word in query_lower for word in ['study', 'studying', 'homework', 'assignments', 'learn']):
            return self._get_study_analysis(user_query, context_data)

        # Sleep-related questions
        elif any(word in query_lower for word in ['sleep', 'rest', 'tired', 'fatigue', 'energy']):
            return self._get_sleep_analysis(user_query, context_data)

        # Stress-related questions
        elif any(word in query_lower for word in ['stress', 'overwhelm', 'pressure', 'anxiety', 'worry']):
            return self._get_stress_analysis(user_query, context_data)

        # Social-related questions
        elif any(word in query_lower for word in ['social', 'friends', 'lonely', 'isolated', 'community']):
            return self._get_social_analysis(user_query, context_data)

        # Academic performance
        elif any(word in query_lower for word in ['grade', 'performance', 'academic', 'gpa', 'score']):
            return self._get_academic_analysis(user_query, context_data)

        # Causal analysis
        elif any(word in query_lower for word in ['why', 'cause', 'reason', 'because', 'factor']):
            return self._get_causal_analysis(user_query, context_data)

        # General health/wellbeing
        elif any(word in query_lower for word in ['health', 'wellbeing', 'wellness', 'feel', 'mood']):
            return self._get_wellbeing_analysis(user_query, context_data)

        # Resource recommendations
        elif any(word in query_lower for word in ['resource', 'help', 'support', 'recommend', 'suggest']):
            return self._get_resource_analysis(user_query, context_data)

        # Default intelligent response
        else:
            return self._get_general_analysis(user_query, context_data)

    def _get_study_analysis(self, user_query, context_data):
        """Generate detailed study analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        study_insights = {
            'high_risk': "The data indicates significant challenges in study habits. There's evidence of cramming, inconsistent study schedules, and potential burnout affecting learning efficiency.",
            'medium_risk': "Study patterns show some concerning trends, including irregular study sessions and possible time management issues that could be optimized.",
            'low_risk': "Study habits appear generally healthy with minor areas for improvement in consistency and technique."
        }

        risk_level = 'high_risk' if risk_score > 0.7 else 'medium_risk' if risk_score > 0.4 else 'low_risk'

        return f"""
**📚 Detailed Study Pattern Analysis for {student_id}**

**Current Study Patterns:**
Based on the academic data, {student_id}'s study habits show {['concerning patterns requiring immediate attention', 'areas for significant improvement', 'some opportunities for optimization'][min(2, int(risk_score//0.3))]}.

**Key Findings:**
- **Study Consistency**: {['Highly irregular patterns detected', 'Inconsistent study sessions', 'Generally stable routine'][min(2, int(risk_score//0.3))]}
- **Learning Efficiency**: {['Significantly impacted by external factors', 'Moderately affected', 'Reasonably effective'][min(2, int(risk_score//0.3))]}
- **Time Management**: {['Major challenges with scheduling', 'Some difficulties in planning', 'Adequate time allocation'][min(2, int(risk_score//0.3))]}

**Specific Issues Identified:**
- Assignment submission patterns suggest {['last-minute cramming', 'rushed completion', 'planned approach'][min(2, int(risk_score//0.3))]}
- Grade trends indicate {['conceptual understanding gaps', 'inconsistent preparation', 'steady comprehension'][min(2, int(risk_score//0.3))]}
- Engagement data shows {['declining participation', 'variable involvement', 'consistent engagement'][min(2, int(risk_score//0.3))]}

**Recommendations:**
1. **Structured Study Plan**: 2-hour focused blocks with 15-minute breaks
2. **Active Learning Techniques**: Practice testing and self-explanation
3. **Consistent Schedule**: Same study times daily for routine building
4. **Distributed Practice**: Shorter, frequent sessions over cramming

**Immediate Actions:**
- Schedule academic coaching session
- Implement weekly study planning
- Join peer study groups for accountability
"""

    def _get_sleep_analysis(self, user_query, context_data):
        """Generate detailed sleep analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😴 Comprehensive Sleep Analysis for {student_id}**

**Sleep Health Assessment:**
The data indicates {['critical sleep deprivation affecting multiple areas', 'significant sleep issues impacting wellbeing', 'moderate sleep concerns', 'generally adequate sleep patterns'][min(3, int(risk_score//0.25))]}.

**Impact Analysis:**
- **Cognitive Function**: Sleep quality affects {['memory consolidation, focus, and academic performance', 'learning efficiency and information retention', 'daily energy levels'][min(2, int(risk_score//0.3))]}
- **Emotional Regulation**: {['Significant impact on stress management and mood', 'Moderate effect on emotional stability', 'Minor influence on daily temperament'][min(2, int(risk_score//0.3))]}
- **Academic Correlation**: Research shows sleep deprivation can reduce academic performance by {['30-40%', '20-30%', '10-20%'][min(2, int(risk_score//0.3))]}

**Recommended Interventions:**
1. **Sleep Schedule**: Consistent 7-8 hour nightly target
2. **Environment Optimization**: Cool, dark, quiet sleeping space
3. **Digital Detox**: No screens 1 hour before bedtime
4. **Relaxation Routine**: Reading, meditation, or light stretching

**University Resources:**
- Sleep & Wellness Workshop (Weekly sessions)
- Counseling Center sleep resources
- Peer wellness coaching
"""

    def _get_stress_analysis(self, user_query, context_data):
        """Generate detailed stress analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**😥 Stress and Wellbeing Analysis for {student_id}**

**Stress Level Assessment:**
Current data shows {['critical stress levels requiring immediate support', 'elevated stress needing proactive management', 'moderate stress with improvement opportunities', 'generally manageable stress levels'][min(3, int(risk_score//0.25))]}.

**Primary Stressors Identified:**
{chr(10).join(['- ' + factor.replace('_', ' ').title() for factor in factors])}

**Stress Impact Chain:**
1. Academic pressure → Sleep disruption → Reduced coping capacity
2. Social withdrawal → Increased perceived burden → Decreased motivation

**Management Strategies:**
- **Immediate**: 5-4-3-2-1 grounding technique, box breathing
- **Short-term**: Time blocking, priority matrix, boundary setting
- **Long-term**: Regular exercise, social connection, mindfulness

**Support Recommendations:**
1. Schedule appointment with Counseling Center (confidential, professional support)
2. Attend stress management workshop (weekly sessions available)
3. Mindfulness and meditation resources (guided meditations, yoga classes)
"""

    def _get_social_analysis(self, user_query, context_data):
        """Generate detailed social analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**👥 Social Connection Analysis for {student_id}**

**Social Wellbeing Assessment:**
The data suggests {['significant social isolation requiring intervention', 'notable social connection challenges', 'moderate opportunities for social engagement', 'generally healthy social patterns'][min(3, int(risk_score//0.25))]}.

**Connection-Building Strategies:**
1. **Structured Opportunities**: Club meetings, study groups, campus events
2. **Low-Pressure Interactions**: Coffee chats, interest-based activities
3. **Support Systems**: Peer mentoring, faculty office hours

**Recommended Campus Resources:**
- Student Organizations Fair (weekly)
- Peer Connection Program
- Community Engagement Office
- Cultural and Identity Centers
"""

    def _get_academic_analysis(self, user_query, context_data):
        """Generate detailed academic analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**📊 Comprehensive Academic Analysis for {student_id}**

**Academic Performance Overview:**
- **Current Risk Score**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern level)
- **Performance Trend**: {trend.title()} pattern identified
- **Primary Academic Factors**: {', '.join(factors)}

**Detailed Performance Insights:**
The academic data reveals {['significant challenges requiring immediate intervention', 'notable areas for improvement and support', 'moderate opportunities for academic enhancement'][min(2, int(risk_score//0.3))]}.

**Pattern Analysis:**
1. **Assignment Performance**: {['Concerning decline in recent submissions', 'Some variability in assignment quality', 'Generally consistent performance'][min(2, int(risk_score//0.3))]}
2. **Learning Progression**: {['Evidence of cumulative knowledge gaps', 'Some challenges with concept integration', 'Steady learning progression'][min(2, int(risk_score//0.3))]}
3. **Engagement Metrics**: {['Reduced course interaction and participation', 'Moderate engagement with fluctuations', 'Consistent academic engagement'][min(2, int(risk_score//0.3))]}

**Academic Support Strategy:**
- **Immediate**: Targeted tutoring for specific course challenges
- **Short-term**: Study skills workshop and time management training
- **Long-term**: Academic coaching for sustainable success habits

*Analysis based on comprehensive academic metrics and learning science principles.*
"""

    def _get_causal_analysis(self, user_query, context_data):
        """Generate detailed causal analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🔍 Causal Relationship Analysis for {student_id}**

**Root Cause Identification:**
Through comprehensive pattern analysis, I've identified several interconnected causal relationships:

**Primary Causal Chain:**
1. **Initial Trigger**: {factors[0] if factors else 'Academic demands'} creates initial pressure
2. **Secondary Effects**: This leads to {factors[1] if len(factors) > 1 else 'wellbeing challenges'}
3. **Compounding Impact**: These factors together affect {factors[2] if len(factors) > 2 else 'overall academic performance'}

**Interconnected Factors:**
- **Academic → Wellbeing**: Course pressure impacts sleep and stress levels
- **Wellbeing → Academic**: Poor sleep reduces learning capacity and motivation
- **Social → Academic**: Isolation decreases academic support and engagement
- **Environmental → All**: Campus engagement affects overall student experience

**Evidence-Based Intervention Points:**
Breaking the cycle at any point can create positive ripple effects. The most impactful intervention points appear to be:
1. Addressing {factors[0] if factors else 'the primary stressor'}
2. Implementing wellbeing supports to build resilience
3. Enhancing social connections for natural support systems

*This causal analysis uses pattern recognition and educational research to identify key leverage points.*
"""

    def _get_wellbeing_analysis(self, user_query, context_data):
        """Generate detailed wellbeing analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🌱 Comprehensive Wellbeing Analysis for {student_id}**

**Holistic Wellbeing Assessment:**
The data indicates {['significant wellbeing challenges requiring comprehensive support', 'notable wellbeing concerns needing proactive attention', 'moderate wellbeing with opportunities for enhancement', 'generally positive wellbeing patterns'][min(3, int(risk_score//0.25))]}.

**Wellbeing Dimension Analysis:**
- **Physical Wellbeing**: {['Concerning sleep and activity patterns', 'Some areas for physical health improvement', 'Generally healthy physical habits'][min(2, int(risk_score//0.3))]}
- **Emotional Wellbeing**: {['Elevated stress and emotional challenges', 'Moderate emotional fluctuations', 'Generally stable emotional patterns'][min(2, int(risk_score//0.3))]}
- **Social Wellbeing**: {['Significant social connection challenges', 'Moderate social engagement opportunities', 'Healthy social support systems'][min(2, int(risk_score//0.3))]}
- **Academic Wellbeing**: {['Academic pressures significantly impacting overall wellbeing', 'Some academic-stress interplay', 'Generally positive academic experience'][min(2, int(risk_score//0.3))]}

**Integrated Wellbeing Strategy:**
1. **Foundation**: Sleep, nutrition, and basic self-care
2. **Support Systems**: Social connections and professional resources
3. **Resilience Building**: Stress management and coping skills
4. **Thriving Skills**: Purpose, engagement, and personal growth

**Campus Wellbeing Ecosystem:**
- Counseling and Psychological Services
- Wellness Center programs and workshops
- Peer support networks
- Faculty and staff mentoring

*Analysis based on holistic wellbeing frameworks and student development research.*
"""

    def _get_resource_analysis(self, user_query, context_data):
        """Generate detailed resource analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')

        return f"""
**🛟 Personalized Resource Analysis for {student_id}**

**Resource Matching Strategy:**
Based on the specific challenges identified, I've curated resources that directly address the root causes:

**Primary Resource Recommendations:**
{chr(10).join(['• ' + action for action in actions])}

**Resource Effectiveness Analysis:**
- **Targeted Support**: Each resource addresses specific factors: {', '.join(factors)}
- **Evidence-Based**: These interventions have proven effective for similar student profiles
- **Accessibility**: All resources are freely available through university services

**Implementation Timeline:**
1. **Immediate (This Week)**: {actions[0] if actions else 'Academic consultation'}
2. **Short-term (2-4 Weeks)**: Regular support sessions and skill building
3. **Ongoing**: Continuous monitoring and adjustment of support strategies

**Expected Outcomes:**
- 30-50% improvement in identified challenge areas within 4-6 weeks
- Enhanced coping skills and resilience building
- Sustainable academic and personal success habits

*Resource recommendations based on effectiveness research and student success data.*
"""

    def _get_general_analysis(self, user_query, context_data):
        """Generate general intelligent analysis"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        return f"""
**🤖 Intelligent Analysis for {student_id}**

**Comprehensive Student Profile Analysis:**

I understand you're asking about "{user_query}". Based on the comprehensive data analysis, here's my assessment:

**Current Status Overview:**
- **Risk Level**: {risk_score:.2f} ({'High' if risk_score > 0.7 else 'Medium' if risk_score > 0.4 else 'Low'} concern)
- **Primary Factors**: {', '.join(factors)}
- **Trend Direction**: {trend.title()} pattern
- **Overall Outlook**: {['Requires immediate proactive support', 'Would benefit from targeted interventions', 'Shows generally positive patterns with minor enhancements needed'][min(2, int(risk_score//0.3))]}

**Detailed Insights:**
The data reveals interconnected patterns where {factors[0] if factors else 'academic pressures'} appear to be influencing {factors[1] if len(factors) > 1 else 'overall wellbeing'}. This creates a cycle that affects multiple areas of student experience.

**Evidence-Based Perspective:**
Research indicates that addressing these challenges through {actions[0] if actions else 'targeted support'} can break negative cycles and create positive momentum. The university's support systems are specifically designed to help with these types of situations.

**Recommended Approach:**
1. Start with the most impactful intervention: {actions[0] if actions else 'academic support'}
2. Monitor progress through regular check-ins
3. Adjust support strategies based on response and feedback

**Next Steps:**
I recommend discussing these findings with {student_id} and collaboratively developing an action plan that feels manageable and supportive.

*This analysis integrates educational psychology, student development theory, and pattern recognition from comprehensive data.*
"""

    def _get_smart_fallback(self, user_query, context_data):
        """Smart fallback that's actually intelligent"""
        return self._generate_intelligent_response(user_query, context_data)

    def _generate_aggregate_response(self, user_query, aggregate_analysis_result):
        """Generates a response based on aggregate dataset analysis."""
        query_lower = user_query.lower()
        response_parts = []

        response_parts.append(f"**📊 Analysis for the Entire Student Population:**")

        # Address common queries related to aggregate data
        if any(word in query_lower for word in ['average', 'mean', 'overall', 'typical']):
            response_parts.append("\nHere are some overall metrics:")
            if 'academic_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['academic_summary']['insight']}")
            if 'wellbeing_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['wellbeing_summary']['insight']}")
            if 'environmental_summary' in aggregate_analysis_result:
                response_parts.append(f"- {aggregate_analysis_result['environmental_summary']['insight']}")

        elif any(word in query_lower for word in ['trend', 'pattern', 'how is the group doing']):
             response_parts.append("\nBased on aggregate data, here are some general patterns:")
             # Simulate general trends based on the synthetic data generation logic
             response_parts.append("- Academic performance tends to show slight decline over the semester, likely due to increasing course load and stress.")
             response_parts.append("- Wellbeing metrics like sleep duration and wellbeing scores also tend to decrease as the semester progresses, especially during peak assignment periods.")
             response_parts.append("- Campus engagement can fluctuate, with potential dips during stressful weeks.")

        elif any(word in query_lower for word in ['risk', 'concern', 'struggle']):
            response_parts.append("\nGeneral risk factors observed across the student population include:")
            response_parts.append("- Academic pressure and workload.")
            response_parts.append("- Maintaining consistent sleep schedules.")
            response_parts.append("- Balancing social engagement with academic demands.")
            response_parts.append("These factors are interconnected and can impact overall success.")

        elif any(word in query_lower for word in ['resource', 'support', 'help']):
            response_parts.append("\nCommonly utilized resources and support areas for the student population include:")
            response_parts.append("- Academic tutoring and study skills support.")
            response_parts.append("- Wellbeing resources like counseling and stress management workshops.")
            response_parts.append("- Opportunities for social and community engagement.")

        else:
            response_parts.append("\nI can provide insights on overall academic performance, wellbeing trends, or campus engagement for the student population.")
            if 'academic_summary' in aggregate_analysis_result:
                 response_parts.append(f"\nFor example: {aggregate_analysis_result['academic_summary']['insight']}")


        if not response_parts or (len(response_parts) == 1 and "Analysis for the Entire Student Population" in response_parts[0]):
             response_parts.append("\nI can provide general insights about the entire student population. Please ask a more specific question about overall trends, averages, or common challenges.")


        return "\n".join(response_parts)

## Generate appropriate response

### Subtask:
Based on whether a student is selected, formulate the response. For no student selected, the response should summarize insights or trends from the entire dataset. For a selected student, the response should be tailored to that student's data.


**Reasoning**:
Review the `handle_user_query` function and the `DatabricksAIAgent` methods to ensure the correct context and response generation logic is applied based on whether a student is selected, and verify the HTML output formatting clearly indicates the context.



In [None]:
# Review of the logic and HTML formatting in handle_user_query and DatabricksAIAgent

# Review of handle_user_query:
# 1. Check for student_selection is None or empty: This correctly identifies the aggregate case.
# 2. Aggregate case:
#    - Calls self.analyze_aggregate_data() to get aggregate results.
#    - Creates context_data with 'aggregate_analysis' and 'is_aggregate': True. This is correct.
#    - Calls self.databricks_ai_agent.get_enhanced_response(user_query, context_data). This is correct.
#    - HTML output title: "💬 AI Response to: "{user_query}" (Overall Dataset)". This clearly indicates aggregate context.
#    - HTML output context: "Analysis Context: Entire Dataset". This is correct.
# 3. Student-specific case (else block):
#    - Extracts student_id.
#    - Calls self.analyze_student(student_id) to get student-specific results.
#    - Creates context_data with student-specific details and 'is_aggregate': False. This is correct.
#    - Calls self.databricks_ai_agent.get_enhanced_response(user_query, context_data). This is correct.
#    - HTML output title: "💬 AI Response to: "{user_query}" (Student {student_id})". This clearly indicates student-specific context.
#    - HTML output context: "Analysis Context: Student {student_id} | Risk Score: ... | Primary Factors: ...". This is correct and provides relevant student details.

# Review of DatabricksAIAgent:
# 1. get_enhanced_response:
#    - Checks context_data.get('is_aggregate', False). This correctly routes the request.
#    - If is_aggregate is True, calls _generate_aggregate_response with user_query and aggregate_analysis. This is correct.
#    - If is_aggregate is False, calls _generate_intelligent_response with user_query and context_data. This is correct.
#    - Includes fallback logic if response generation fails.

# 2. _generate_aggregate_response(user_query, aggregate_analysis_result):
#    - Takes user_query and aggregate_analysis_result as input.
#    - Parses user_query for keywords (average, trend, risk, resource).
#    - Accesses aggregate_analysis_result to extract relevant insights (academic_summary, wellbeing_summary, environmental_summary).
#    - Formulates a response string based on the query and available aggregate insights.
#    - Includes a fallback message if no specific keywords match.
#    - The logic appears to correctly use the aggregate data to generate a relevant summary.

# 3. _generate_intelligent_response(user_query, context_data) and helper methods:
#    - Takes user_query and student-specific context_data as input.
#    - Parses user_query for keywords (study, sleep, stress, social, academic, why, health, resource).
#    - Calls appropriate helper methods (_get_study_analysis, _get_sleep_analysis, etc.).
#    - Helper methods access student-specific details (risk_score, factors, actions, student_id, trend) from context_data.
#    - Helper methods formulate detailed, tailored responses using the student's specific context.
#    - The logic correctly uses student-specific data to generate tailored responses.

# Conclusion: The review confirms that the logic in handle_user_query and DatabricksAIAgent correctly handles both aggregate and student-specific queries, passes the appropriate context, and generates responses using the relevant data. The HTML formatting also clearly indicates the context of the response.

# No code changes are needed as the review confirms the existing code meets the requirements.

## Refine and test

### Subtask:
Thoroughly test the Gradio application to ensure the agent responds correctly in both scenarios (student selected and no student selected) with relevant and accurate information.


**Reasoning**:
Launch the Gradio application to test the agent's responses in both scenarios (student selected and no student selected) as instructed.



In [None]:
launch_gradio_app()

#isolated ai bot

In [None]:
import os
import requests
import numpy as np
import pandas as pd
import json
import gradio as gr
import plotly.graph_objects as go
from datetime import datetime

# Databricks Model Scoring Functions
def create_tf_serving_json(data):
    return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
    # NOTE: Replace with your actual Databricks Model Serving Endpoint URL and Token
    # url = 'YOUR_DATABRICKS_MODEL_SERVING_ENDPOINT_URL'
    # token = os.environ.get("YOUR_DATABRICKS_TOKEN_SECRET_NAME") # Get token from environment variable or Colab Secrets
    # headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}

    # Placeholder for actual API call
    print("Simulating call to Databricks Model Serving Endpoint...")
    # In a real scenario, you would make the POST request here
    # response = requests.request(method='POST', headers=headers, url=url, data=data_json)
    # if response.status_code != 200:
    #     raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    # return response.json()

    # Simulated response based on input features
    # Assuming 'academic_risk_score' is one of the input features
    simulated_risk_score = dataset['academic_risk_score'].iloc[0] if not dataset.empty and 'academic_risk_score' in dataset.columns else 0.5
    simulated_prediction = min(1.0, max(0.0, simulated_risk_score + np.random.normal(0, 0.1))) # Add some noise

    simulated_result = {'predictions': [simulated_prediction]}
    print(f"Simulated model response: {simulated_result}")
    return simulated_result


class DatabricksAIAgent:
    """Use intelligent response generation with Databricks integration"""

    def __init__(self):
        self.databricks_enabled = True # Placeholder for actual Databricks connection status
        print("✅ Databricks AI Agent initialized")

    def get_enhanced_response(self, user_query, context_data, is_aggregate=False):
        """Get enhanced response using intelligent pattern matching"""
        try:
            if is_aggregate:
                return self._generate_aggregate_response(user_query, context_data)
            else:
                return self._generate_intelligent_response(user_query, context_data)
        except Exception as e:
            print(f"❌ Databricks LLM failed: {e}")
            return self._get_smart_fallback(user_query, context_data, is_aggregate)

    def _generate_intelligent_response(self, user_query, context_data):
        """Generate intelligent, context-aware responses for individual students"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        actions = context_data.get('actions', [])
        student_id = context_data.get('student_id', 'the student')
        trend = context_data.get('trend', 'stable')

        query_lower = user_query.lower()

        # Study-related questions
        if any(word in query_lower for word in ['study', 'studying', 'homework', 'assignments', 'learn']):
            return self._get_study_analysis(user_query, context_data)

        # Sleep-related questions
        elif any(word in query_lower for word in ['sleep', 'rest', 'tired', 'fatigue', 'energy']):
            return self._get_sleep_analysis(user_query, context_data)

        # Stress-related questions
        elif any(word in query_lower for word in ['stress', 'overwhelm', 'pressure', 'anxiety', 'worry']):
            return self._get_stress_analysis(user_query, context_data)

        # Social-related questions
        elif any(word in query_lower for word in ['social', 'friends', 'lonely', 'isolated', 'community']):
            return self._get_social_analysis(user_query, context_data)

        # Academic performance
        elif any(word in query_lower for word in ['grade', 'performance', 'academic', 'gpa', 'score']):
            return self._get_academic_analysis(user_query, context_data)

        # Causal analysis
        elif any(word in query_lower for word in ['why', 'cause', 'reason', 'because', 'factor']):
            return self._get_causal_analysis(user_query, context_data)

        # General health/wellbeing
        elif any(word in query_lower for word in ['health', 'wellbeing', 'wellness', 'feel', 'mood']):
            return self._get_wellbeing_analysis(user_query, context_data)

        # Resource recommendations
        elif any(word in query_lower for word in ['resource', 'help', 'support', 'recommend', 'suggest']):
            return self._get_resource_analysis(user_query, context_data)

        # Default intelligent response
        else:
            return self._get_general_analysis(user_query, context_data)

    def _generate_aggregate_response(self, user_query, context_data):
        """Generate intelligent responses for aggregate/whole dataset analysis"""
        overall_risk = context_data.get('overall_risk', 0.5)
        common_factors = context_data.get('common_factors', [])
        trends = context_data.get('trends', {})
        student_count = context_data.get('student_count', 0)

        query_lower = user_query.lower()

        # Overall trends questions
        if any(word in query_lower for word in ['overall', 'general', 'entire', 'whole', 'all students', 'population']):
            return self._get_overall_trends_analysis(user_query, context_data)

        # Common issues questions
        elif any(word in query_lower for word in ['common', 'frequent', 'typical', 'most students', 'majority']):
            return self._get_common_issues_analysis(user_query, context_data)

        # Comparison questions
        elif any(word in query_lower for word in ['compare', 'comparison', 'difference', 'versus', 'vs']):
            return self._get_comparison_analysis(user_query, context_data)

        # Resource allocation questions
        elif any(word in query_lower for word in ['resources', 'allocation', 'prioritize', 'focus', 'investment']):
            return self._get_resource_allocation_analysis(user_query, context_data)

        # Default aggregate response
        else:
            return self._get_aggregate_general_analysis(user_query, context_data)

    def _get_study_analysis(self, user_query, context_data):
        """Generate detailed study analysis for individual student"""
        risk_score = context_data.get('risk_score', 0.5)
        factors = context_data.get('factors', [])
        student_id = context_data.get('student_id', 'the student')

        study_insights = {
            'high_risk': "The data indicates significant challenges in study habits. There's evidence of cramming, inconsistent study schedules, and potential burnout affecting learning efficiency.",
            'medium_risk': "Study patterns show some concerning trends, including irregular study sessions and possible time management issues that could be optimized.",
            'low_risk': "Study habits appear generally healthy with minor areas for improvement in consistency and technique."
        }

        risk_level = 'high_risk' if risk_score > 0.7 else 'medium_risk' if risk_score > 0.4 else 'low_risk'

        return f"""
**📚 Detailed Study Pattern Analysis for {student_id}**

**Current Assessment:**
{study_insights[risk_level]}

**Specific Study Challenges Identified:**
- **Academic Performance Trend**: {context_data.get('trend', 'stable')}
- **Primary Factors Affecting Studies**: {', '.join(factors)}
- **Risk Level Impact**: {risk_score:.2f} ({(risk_score*100):.0f}% concern level)

**Study Pattern Breakdown:**
1. **Consistency**: The data suggests {['highly irregular', 'somewhat irregular', 'relatively consistent'][min(2, int(risk_score//0.3))]} study patterns
2. **Efficiency**: Learning efficiency appears to be {['significantly impacted', 'moderately affected', 'generally effective'][min(2, int(risk_score//0.3))]}
3. **Balance**: Study-life balance shows {['concerning imbalance', 'some imbalance', 'reasonable balance'][min(2, int(risk_score//0.3))]}

**Evidence-Based Recommendations:**
- Implement spaced repetition technique for better retention
- Establish consistent daily study blocks (2-3 hours with breaks)
- Utilize active recall methods instead of passive reading
- Schedule weekly review sessions to reinforce learning

**Immediate Action Steps:**
1. Visit the Academic Success Center for personalized study strategy
2. Download a study planning app to track and schedule sessions
3. Form a study group for accountability and collaborative learning

*This analysis is based on comprehensive academic data patterns and proven educational psychology principles.*
"""

    def _get_overall_trends_analysis(self, user_query, context_data):
        """Generate overall trends analysis for entire dataset"""
        overall_risk = context_data.get('overall_risk', 0.5)
        common_factors = context_data.get('common_factors', [])
        trends = context_data.get('trends', {})
        student_count = context_data.get('student_count', 0)

        return f"""
**📊 Overall Student Population Analysis**

**Population Overview:**
Based on analysis of {student_count} students, the overall academic risk level is {overall_risk:.2f}, indicating {['significant challenges requiring campus-wide attention', 'moderate concerns with opportunities for improvement', 'generally positive patterns with targeted enhancement needs'][min(2, int(overall_risk//0.3))]}.

**Key Population Trends:**
- **Most Common Challenges**: {', '.join(common_factors[:3]) if common_factors else 'No significant patterns identified'}
- **Academic Performance**: {trends.get('academic', 'Stable across most students')}
- **Wellbeing Patterns**: {trends.get('wellbeing', 'Generally positive with some variations')}
- **Sleep Patterns**: {trends.get('sleep', 'Adequate for majority of students')}

**Population-Level Insights:**
1. **Prevalence of Issues**: Approximately {(overall_risk*100):.0f}% of students show concerning patterns
2. **Common Factor Clusters**: Students typically experience {len(common_factors)} primary challenge areas
3. **Intervention Impact**: Targeted support could benefit ~{(overall_risk*student_count):.0f} students

**Strategic Recommendations:**
- Develop campus-wide wellness initiatives addressing {common_factors[0] if common_factors else 'general student needs'}
- Enhance academic support services during peak stress periods
- Implement proactive outreach for at-risk student identification
- Optimize resource allocation based on population needs

*Analysis based on comprehensive dataset of {student_count} students using advanced pattern recognition.*
"""

    def _get_common_issues_analysis(self, user_query, context_data):
        """Generate common issues analysis for entire dataset"""
        common_factors = context_data.get('common_factors', [])
        factor_prevalence = context_data.get('factor_prevalence', {})
        student_count = context_data.get('student_count', 0)

        return f"""
**🔍 Common Issues Analysis - Student Population**

**Most Frequent Challenges Identified:**
{chr(10).join([f"{i+1}. {factor} ({factor_prevalence.get(factor, 0)*100:.0f}% of students)" for i, factor in enumerate(common_factors[:5])])}

**Pattern Analysis:**
The data reveals that students commonly experience interconnected challenges. The most prevalent issue ({common_factors[0] if common_factors else 'academic pressure'}) affects approximately {factor_prevalence.get(common_factors[0], 0)*100:.0f}% of the student population.

**Cluster Analysis:**
- **Academic Cluster**: {', '.join([f for f in common_factors if 'academic' in f.lower()][:2]) or 'General academic pressures'}
- **Wellbeing Cluster**: {', '.join([f for f in common_factors if any(word in f.lower() for word in ['sleep', 'stress', 'wellbeing'])][:2]) or 'General wellbeing concerns'}
- **Social Cluster**: {', '.join([f for f in common_factors if 'social' in f.lower()][:2]) or 'Social engagement patterns'}

**Intervention Prioritization:**
1. **High Impact**: Address {common_factors[0] if common_factors else 'primary challenges'} (affects most students)
2. **Medium Impact**: Target {common_factors[1] if len(common_factors) > 1 else 'secondary factors'} (moderate prevalence)
3. **Preventive**: Monitor {common_factors[2] if len(common_factors) > 2 else 'emerging patterns'} (early intervention opportunity)

*Analysis identifies patterns across {student_count} students to optimize support strategies.*
"""

    # ... (keep all the existing individual analysis methods: _get_sleep_analysis, _get_stress_analysis, etc.)

    def _get_aggregate_general_analysis(self, user_query, context_data):
        """Generate general analysis for aggregate data"""
        overall_risk = context_data.get('overall_risk', 0.5)
        common_factors = context_data.get('common_factors', [])
        student_count = context_data.get('student_count', 0)

        return f"""
**🤖 Population-Level AI Analysis**

**Response to: "{user_query}"**

Based on my comprehensive analysis of the entire student dataset ({student_count} students), here are the key insights:

**Overall Population Health:**
- **Average Risk Level**: {overall_risk:.2f} ({'Elevated concerns' if overall_risk > 0.6 else 'Moderate stability' if overall_risk > 0.4 else 'Generally healthy patterns'})
- **Most Common Challenges**: {', '.join(common_factors[:3]) if common_factors else 'No dominant patterns identified'}
- **Data Coverage**: Analysis includes academic, wellbeing, and environmental factors

**Strategic Insights:**
The data reveals that the student population shows {['significant areas for institutional improvement', 'moderate opportunities for enhanced support', 'generally positive patterns with targeted optimization needs'][min(2, int(overall_risk//0.3))]}.

**Pattern Recognition:**
- **Primary Concern**: {common_factors[0] if common_factors else 'Academic performance consistency'}
- **Secondary Patterns**: {common_factors[1] if len(common_factors) > 1 else 'Wellbeing fluctuations'}
- **Emerging Trends**: {common_factors[2] if len(common_factors) > 2 else 'Stable overall patterns'}

**Institutional Recommendations:**
1. Focus resources on addressing {common_factors[0] if common_factors else 'key challenge areas'}
2. Develop proactive support systems for at-risk identification
3. Implement population-level wellness initiatives

*Analysis generated using advanced pattern recognition across comprehensive student data.*
"""

    def _get_smart_fallback(self, user_query, context_data, is_aggregate=False):
        """Smart fallback that's actually intelligent"""
        if is_aggregate:
            return self._get_aggregate_general_analysis(user_query, context_data)
        else:
            return self._get_general_analysis(user_query, context_data)


class DatabricksModelAgent:
    """Agent that integrates with Databricks model endpoint"""

    def __init__(self):
        self.agent_id = "Databricks_Model_Agent"
        print("✅ Databricks Model Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Run analysis using Databricks model endpoint"""
        try:
            # Prepare data for Databricks model
            features = self._prepare_features(student_id, data_sources)

            # Score model with Databricks endpoint
            model_result = score_model(features)

            return self._format_model_response(model_result, student_id)

        except Exception as e:
            print(f"❌ Databricks model failed: {e}")
            # Fallback to simulated agent
            return self._get_simulated_response(student_id)

    def run_aggregate_analysis(self, all_students_data):
        """Run aggregate analysis on entire dataset"""
        try:
            # Calculate aggregate metrics
            aggregate_result = self._calculate_aggregate_metrics(all_students_data)
            return self._format_aggregate_response(aggregate_result)

        except Exception as e:
            print(f"❌ Aggregate analysis failed: {e}")
            return self._get_simulated_aggregate_response()

    def _prepare_features(self, student_id, data_sources):
        """Prepare features for Databricks model"""
        # Extract relevant features from data sources
        academic_data = data_sources.get('academic', pd.DataFrame())
        wellbeing_data = data_sources.get('wellbeing', pd.DataFrame())
        environmental_data = data_sources.get('environmental', pd.DataFrame())

        # Create feature vector (adjust based on your model's expected input)
        # This is a placeholder - you would extract meaningful features here
        features = {
            'student_id': [student_id],
            'academic_risk_score': [self._calculate_academic_risk(academic_data)], # Example feature
            'wellbeing_score': [self._calculate_wellbeing_score(wellbeing_data)],   # Example feature
            'environmental_factors': [self._assess_environmental_factors(environmental_data)] # Example feature
            # Add more relevant features based on your Databricks model
        }

        return pd.DataFrame(features)

    def _calculate_aggregate_metrics(self, all_students_data):
        """Calculate aggregate metrics for entire dataset"""
        # This is a simplified version - you would implement proper aggregation
        risk_scores = []
        all_factors = []

        for student_id, data in all_students_data.items():
            try:
                analysis = self.run_holistic_analysis(student_id, data)
                risk_scores.append(analysis['academic_analysis']['risk_score'])
                all_factors.extend(analysis['causal_analysis']['causal_factors'])
            except:
                continue

        # Calculate aggregate metrics
        overall_risk = np.mean(risk_scores) if risk_scores else 0.5

        # Find common factors
        from collections import Counter
        factor_counts = Counter(all_factors)
        common_factors = [factor for factor, count in factor_counts.most_common(5)]

        return {
            'overall_risk': overall_risk,
            'common_factors': common_factors,
            'factor_prevalence': {factor: count/len(all_factors) for factor, count in factor_counts.most_common(5)},
            'student_count': len(all_students_data),
            'trends': {
                'academic': 'Stable' if overall_risk < 0.6 else 'Concerning',
                'wellbeing': 'Positive' if overall_risk < 0.5 else 'Needs attention',
                'sleep': 'Adequate' if overall_risk < 0.6 else 'Insufficient'
            }
        }

    def _format_aggregate_response(self, aggregate_result):
        """Format aggregate analysis response"""
        return {
            "agent_id": self.agent_id,
            "analysis_type": "aggregate",
            "analysis_timestamp": datetime.now().isoformat(),
            "population_analysis": {
                "overall_risk_score": aggregate_result['overall_risk'],
                "student_count": aggregate_result['student_count'],
                "common_factors": aggregate_result['common_factors'],
                "factor_prevalence": aggregate_result['factor_prevalence'],
                "population_trends": aggregate_result['trends']
            },
            "strategic_recommendations": {
                "priority_level": "high" if aggregate_result['overall_risk'] > 0.7 else "medium" if aggregate_result['overall_risk'] > 0.4 else "low",
                "recommended_actions": [
                    {
                        "type": "population_health",
                        "description": f"Address {aggregate_result['common_factors'][0] if aggregate_result['common_factors'] else 'key issues'} campus-wide",
                        "confidence": aggregate_result['overall_risk']
                    },
                    {
                        "type": "preventive_care",
                        "description": "Implement proactive support systems",
                        "confidence": 0.8
                    }
                ]
            }
        }

    # ... (keep the rest of the DatabricksModelAgent methods)


class GradioHokieWellApp:
    def __init__(self):
        # Initialize both simulated and potentially real agents
        self.simulated_agent = SimulatedDatabricksAgent()
        self.databricks_ai_agent = DatabricksAIAgent() # Agent for NL queries
        self.databricks_model_agent = DatabricksModelAgent() # Agent for structured analysis via model endpoint

        # Decide which structured analysis agent to use
        self.structured_analysis_agent = self.databricks_model_agent # Use model agent first

        self.load_data()

    def load_data(self):
        """Load the synthetic dataset"""
        try:
            # Check if files exist before loading
            if os.path.exists('students.csv') and os.path.exists('academic_data.csv') and \
               os.path.exists('wellbeing_data.csv') and os.path.exists('environmental_data.csv') and \
               os.path.exists('resources.csv'):
                self.students = pd.read_csv('students.csv')
                self.academic = pd.read_csv('academic_data.csv')
                self.wellbeing = pd.read_csv('wellbeing_data.csv')
                self.environmental = pd.read_csv('environmental_data.csv')
                self.resources = pd.read_csv('resources.csv')
                print("✅ Data loaded successfully from CSV files")
            else:
                 print("⚠️ CSV files not found. Creating minimal data.")
                 self.create_minimal_data()

        except Exception as e:
            print(f"❌ Data loading failed: {e}")
            # Create minimal data if any error occurs during loading
            self.create_minimal_data()

    def create_minimal_data(self):
        """Create minimal data if files are missing or loading fails"""
        self.students = pd.DataFrame([
            {'student_id': 'S001', 'name': 'Alex Johnson', 'major': 'Computer Engineering', 'year': 'Sophomore'},
            {'student_id': 'S003', 'name': 'Jordan Smith', 'major': 'Psychology', 'year': 'Freshman'},
            {'student_id': 'S002', 'name': 'Taylor Brown', 'major': 'Business', 'year': 'Junior'}
        ])
        # Add minimal data for academic, wellbeing, and environmental dataframes
        self.academic = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003', 'S002', 'S002'],
            'assignment_id': ['A001', 'A002', 'A003', 'A004', 'A005', 'A006'],
            'course_id': ['CS101', 'CS101', 'PSYC101', 'PSYC101', 'BUS201', 'BUS201'],
            'course_name': ['Intro to CS', 'Intro to CS', 'Intro to Psych', 'Intro to Psych', 'Marketing', 'Marketing'],
            'assignment_name': ['Assignment 1', 'Assignment 2', 'Assignment 1', 'Assignment 2', 'Case Study 1', 'Case Study 2'],
            'due_date': ['2024-01-20', '2024-02-05', '2024-01-20', '2024-02-05', '2024-01-25', '2024-02-10'],
            'submission_date': ['2024-01-20', '2024-02-06', '2024-01-21', '2024-02-08', '2024-01-24', '2024-02-09'],
            'grade': [85, 80, 70, 65, 90, 88],
            'submission_delay_days': [0, 1, 1, 3, -1, -1],
            'difficulty_level': [0.5, 0.5, 0.4, 0.4, 0.6, 0.6]
        })
        self.wellbeing = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003', 'S002', 'S002'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'sleep_duration': [7.5, 7.0, 6.0, 5.5, 8.0, 7.8],
            'step_count': [8000, 8500, 5000, 4500, 10000, 9500],
            'wellbeing_score': [4.0, 3.8, 3.0, 2.8, 4.5, 4.3],
            'week_of_semester': [1, 1, 1, 1, 1, 1],
            'day_type': ['Weekday', 'Weekday', 'Weekday', 'Weekday', 'Weekday', 'Weekday']
        })
        self.environmental = pd.DataFrame({
            'student_id': ['S001', 'S001', 'S003', 'S003', 'S002', 'S002'],
            'date': ['2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21', '2024-01-20', '2024-01-21'],
            'meals_on_campus': [2.0, 1.0, 1.0, 1.0, 3.0, 2.0],
            'library_hours': [1.0, 1.5, 0.5, 0.2, 2.0, 1.8],
            'gym_visit': [1, 0, 0, 0, 1, 1],
            'campus_engagement_score': [0.7, 0.6, 0.5, 0.4, 0.9, 0.8]
        })
        print("✅ Minimal data created.")

    def get_all_students_data(self):
        """Get data for all students for aggregate analysis"""
        all_data = {}
        for student_id in self.students['student_id']:
            all_data[student_id] = self.get_student_data(student_id)
        return all_data

    def get_student_data(self, student_id):
        """Get current data for selected student"""
        return {
            'academic': self.academic[self.academic['student_id'] == student_id] if hasattr(self, 'academic') else pd.DataFrame(),
            'wellbeing': self.wellbeing[self.wellbeing['student_id'] == student_id] if hasattr(self, 'wellbeing') else pd.DataFrame(),
            'environmental': self.environmental[self.environmental['student_id'] == student_id] if hasattr(self, 'environmental') else pd.DataFrame()
        }

    def analyze_student(self, student_id):
        """Run AI analysis for a student using the selected structured analysis agent"""
        student_data = self.get_student_data(student_id)
        return self.structured_analysis_agent.run_holistic_analysis(student_id, student_data)

    def analyze_aggregate(self):
        """Run aggregate analysis on entire dataset"""
        all_students_data = self.get_all_students_data()
        return self.structured_analysis_agent.run_aggregate_analysis(all_students_data)

    def handle_user_query(self, user_query, student_selection):
        """Handle natural language queries with intelligent responses - now with conditional logic"""
        if not user_query.strip():
            return "Please enter a question about the student's analysis."

        # Check if "All Students" is selected or a specific student
        if student_selection == "All Students":
            # Run aggregate analysis for entire dataset
            analysis_result = self.analyze_aggregate()

            # Prepare context for AI agent for aggregate analysis
            population_analysis = analysis_result.get('population_analysis', {})
            strategic_recs = analysis_result.get('strategic_recommendations', {})

            context_data = {
                'overall_risk': population_analysis.get('overall_risk_score', 0.5),
                'common_factors': population_analysis.get('common_factors', []),
                'factor_prevalence': population_analysis.get('factor_prevalence', {}),
                'student_count': population_analysis.get('student_count', 0),
                'trends': population_analysis.get('population_trends', {}),
                'strategic_actions': [action['description'] for action in strategic_recs.get('recommended_actions', [])]
            }

            # Get ENHANCED response from AI agent for aggregate data
            enhanced_response = self.databricks_ai_agent.get_enhanced_response(
                user_query, context_data, is_aggregate=True
            )

            return f"""
            <div style='background: #f0f8ff; padding: 20px; border-radius: 10px; border-left: 5px solid #4169E1; margin: 10px 0;'>
                <h4 style='color: black; margin-top: 0;'>🌐 Population Analysis: "{user_query}"</h4>
                <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                    {enhanced_response}
                </div>
                <div style='margin-top: 15px; padding: 10px; background: #e6f3ff; border-radius: 5px;'>
                    <small style='color: #666;'>
                        <strong>Analysis Context:</strong> Entire Student Population | Overall Risk: {population_analysis.get('overall_risk_score', 0.5):.2f} | Student Count: {population_analysis.get('student_count', 0)}
                    </small>
                </div>
            </div>
            """
        else:
            # Specific student selected - use existing individual analysis logic
            student_id = student_selection.split(' - ')[0]
            analysis_result = self.analyze_student(student_id)

            # Prepare context for AI agent for individual analysis
            academic = analysis_result.get('academic_analysis', {})
            causal = analysis_result.get('causal_analysis', {})
            plan = analysis_result.get('intervention_plan', {})

            context_data = {
                'risk_score': academic.get('risk_score', 0.5),
                'trend': academic.get('trend_direction', 'stable'),
                'factors': causal.get('causal_factors', []),
                'actions': [action.get('description', '') for action in plan.get('planned_actions', [])],
                'risk_level': plan.get('risk_level', 'low'),
                'student_id': student_id,
                'raw_analysis_result': analysis_result
            }

            # Get ENHANCED response from AI agent for individual student
            enhanced_response = self.databricks_ai_agent.get_enhanced_response(
                user_query, context_data, is_aggregate=False
            )

            return f"""
            <div style='background: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50; margin: 10px 0;'>
                <h4 style='color: black; margin-top: 0;'>💬 Individual Analysis: "{user_query}"</h4>
                <div style='color: black; line-height: 1.6; font-size: 14px; white-space: pre-line;'>
                    {enhanced_response}
                </div>
                <div style='margin-top: 15px; padding: 10px; background: #e8f5e8; border-radius: 5px;'>
                    <small style='color: #666;'>
                        <strong>Analysis Context:</strong> Student {student_id} | Risk Score: {academic.get('risk_score', 0.5):.2f} | Primary Factors: {', '.join(causal.get('causal_factors', []))}
                    </small>
                </div>
            </div>
            """

    # ... (keep all the existing methods like create_risk_gauge, create_academic_trend_chart, etc.)

    def create_interface(self):
        """Create the Gradio interface with AGENT RESPONSE tab"""
        with gr.Blocks(theme=gr.themes.Soft(), title="HokieWell Navigator", css=".gradio-container {color: black !important;}") as demo:
            gr.Markdown(
                """
                # 🎓 HokieWell Navigator
                ### *From Reactive Support to Proactive Thriving*
                **Powered by Databricks AI Agent Framework**
                """
            )

            with gr.Row():
                with gr.Column(scale=1):
                    # Add "All Students" option to dropdown
                    student_choices = ["All Students"] + [f"{row['student_id']} - {row['name']}" for _, row in self.students.iterrows()]

                    student_dropdown = gr.Dropdown(
                        choices=student_choices,
                        label="👤 Select Student or Population",
                        value="All Students",  # Default to All Students
                        elem_classes=["black-text"]
                    )

                    analyze_btn = gr.Button("🚀 Run AI Analysis", variant="primary", elem_classes=["black-text"])
                    risk_gauge = gr.Plot(label="Academic Risk Assessment")

                    gr.Markdown("### 📊 Quick Stats", elem_classes=["black-text"])
                    risk_score = gr.Textbox(label="Risk Score", interactive=False, elem_classes=["black-text"])
                    trend_direction = gr.Textbox(label="Trend Direction", interactive=False, elem_classes=["black-text"])
                    primary_factor = gr.Textbox(label="Primary Factor", interactive=False, elem_classes=["black-text"])

                with gr.Column(scale=2):
                    with gr.Tab("🤖 Agent Response"):
                        gr.Markdown("### 💬 Ask Anything About Students")
                        gr.Markdown("""
                        **When 'All Students' is selected:**
                        - "What are the overall trends?"
                        - "What are the most common issues?"
                        - "How should we allocate resources?"

                        **When a specific student is selected:**
                        - "How is he studying?"
                        - "Explain the sleep issues"
                        - "What causes the stress?"
                        """)

                        user_query = gr.Textbox(
                            label="Enter your question:",
                            placeholder="Type your question here...",
                            lines=3,
                            elem_classes=["black-text"]
                        )

                        ask_btn = gr.Button("🎯 Get AI Analysis", variant="primary")
                        agent_response = gr.HTML(label="AI Agent Response", elem_classes=["black-text"])

                    with gr.Tab("📈 Analysis Results"):
                        analysis_output = gr.HTML(label="AI Analysis Results", elem_classes=["black-text"])

                    with gr.Tab("📊 Visual Analytics"):
                        with gr.Row():
                            academic_chart = gr.Plot(label="Academic Performance")
                            wellbeing_chart = gr.Plot(label="Wellbeing Metrics")

                    with gr.Tab("🛟 Resource Recommendations"):
                        resources_output = gr.HTML(label="Personalized Recommendations", elem_classes=["black-text"])

            # Event handlers
            analyze_btn.click(
                fn=self.run_complete_analysis,
                inputs=[student_dropdown],
                outputs=[risk_gauge, risk_score, trend_direction, primary_factor, analysis_output, academic_chart, wellbeing_chart, resources_output]
            )

            ask_btn.click(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            user_query.submit(
                fn=self.handle_user_query,
                inputs=[user_query, student_dropdown],
                outputs=[agent_response]
            )

            student_dropdown.change(
                fn=self.update_student_charts,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

            # Initial load
            demo.load(
                fn=self.initial_load,
                inputs=[student_dropdown],
                outputs=[academic_chart, wellbeing_chart]
            )

        return demo

    def run_complete_analysis(self, student_selection):
        """Run complete analysis and return all outputs - now handles aggregate analysis"""
        if student_selection == "All Students":
            # Run aggregate analysis
            analysis_result = self.analyze_aggregate()
            population_analysis = analysis_result.get('population_analysis', {})

            # Risk gauge for overall population
            risk_gauge = self.create_risk_gauge(population_analysis.get('overall_risk_score', 0.5))

            # Text outputs for population
            risk_score_val = population_analysis.get('overall_risk_score', 0.5)
            risk_score = f"{risk_score_val:.2f}"
            trend_direction = population_analysis.get('population_trends', {}).get('academic', 'Stable')
            primary_factor = population_analysis.get('common_factors', [None])[0]
            primary_factor = primary_factor.replace('_', ' ').title() if primary_factor else "Multiple factors"

            # Analysis results HTML for population
            analysis_html = self.format_aggregate_results(analysis_result)

            # Charts - show sample or aggregated charts for population
            academic_chart = self.create_aggregate_academic_chart()
            wellbeing_chart = self.create_aggregate_wellbeing_chart()

            # Resource recommendations for population
            resources_html = self.format_aggregate_resource_recommendations()

            return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html
        else:
            # Individual student analysis (existing logic)
            student_id = student_selection.split(' - ')[0]
            analysis_result = self.analyze_student(student_id)

            # Risk gauge
            risk_gauge = self.create_risk_gauge(analysis_result.get('academic_analysis', {}).get('risk_score', 0.5))

            # Text outputs
            academic_analysis = analysis_result.get('academic_analysis', {})
            causal_analysis = analysis_result.get('causal_analysis', {})

            risk_score_val = academic_analysis.get('risk_score', 0.5)
            risk_score = f"{risk_score_val:.2f}"
            trend_direction = academic_analysis.get('trend_direction', 'N/A').title()
            primary_factor = causal_analysis.get('causal_factors', [None])[0]
            primary_factor = primary_factor.replace('_', ' ').title() if primary_factor else "No significant factors"

            # Analysis results HTML
            analysis_html = self.format_analysis_results(analysis_result)

            # Charts
            academic_chart = self.create_academic_trend_chart(student_id)
            wellbeing_chart = self.create_wellbeing_chart(student_id)

            # Resource recommendations
            resources_html = self.format_resource_recommendations_html(student_id)

            return risk_gauge, risk_score, trend_direction, primary_factor, analysis_html, academic_chart, wellbeing_chart, resources_html

    def format_aggregate_results(self, analysis_result):
        """Format aggregate analysis results for display"""
        population_analysis = analysis_result.get('population_analysis', {})
        strategic_recs = analysis_result.get('strategic_recommendations', {})

        html = f"""
        <div style='background: #f0f8ff; padding: 15px; border-radius: 10px; margin: 10px 0;'>
            <h3 style='color: black; margin-bottom: 10px;'>🌐 Population Analysis</h3>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Overall Risk Score:</strong> <span style='color: black;'>{population_analysis.get('overall_risk_score', 0.0):.2f}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Student Count:</strong> <span style='color: black;'>{population_analysis.get('student_count', 0)}</span></p>
            <p style='color: black; margin: 5px 0;'><strong style='color: black;'>Most Common Factors:</strong></p>
            <ul style='color: black;'>
        """
        for factor in population_analysis.get('common_factors', [])[:5]:
            prevalence = population_analysis.get('factor_prevalence', {}).get(factor, 0) * 100
            html += f"<li style='color: black;'>{factor.replace('_', ' ').title()} ({prevalence:.1f}% of students)</li>"
        html += "</ul></div>"

        return html

    def create_aggregate_academic_chart(self):
        """Create aggregate academic trend chart"""
        # Implement aggregate chart logic here
        fig = go.Figure()
        fig.add_trace(go.Bar(
            x=['High Risk', 'Medium Risk', 'Low Risk'],
            y=[30, 40, 30],  # Example data
            name='Student Distribution'
        ))
        fig.update_layout(
            title="Student Risk Distribution",
            height=300,
            showlegend=False
        )
        return fig

    def create_aggregate_wellbeing_chart(self):
        """Create aggregate wellbeing metrics chart"""
        # Implement aggregate chart logic here
        fig = go.Figure()
        fig.add_trace(go.Pie(
            labels=['Adequate Sleep', 'Insufficient Sleep'],
            values=[65, 35],  # Example data
            name='Sleep Patterns'
        ))
        fig.update_layout(
            title="Population Sleep Patterns",
            height=300
        )
        return fig

    def format_aggregate_resource_recommendations(self):
        """Format aggregate resource recommendations"""
        html = "<div style='padding: 20px; color: black;'>"
        html += "<h3 style='color: black;'>🏛️ Population-Level Resource Recommendations</h3>"
        html += """
        <div style='background: #e6f3ff; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4169E1; color: black;'>
            <h4 style='color: black;'>Campus-Wide Wellness Initiatives</h4>
            <p style='color: black;'>Implement programs addressing common student challenges across the population</p>
        </div>
        <div style='background: #e6f3ff; padding: 15px; margin: 10px 0; border-radius: 10px; border-left: 5px solid #4169E1; color: black;'>
            <h4 style='color: black;'>Proactive Support Systems</h4>
            <p style='color: black;'>Develop early intervention systems for at-risk student identification</p>
        </div>
        """
        html += "</div>"
        return html

    # ... (keep all other existing methods)

# Keep the original SimulatedDatabricksAgent and launch function
class SimulatedDatabricksAgent:
    def __init__(self):
        self.agent_id = "HokieWell_Simulated"
        print("✅ Simulated Databricks Agent initialized")

    def run_holistic_analysis(self, student_id, data_sources):
        """Simulated analysis with student-specific patterns"""
        if student_id == "S003":  # Jordan - high risk
            risk_score = 0.75
            factors = ["sleep_deprivation", "academic_overload", "social_isolation"]
            insights = [
                "Grade trend showing significant decline over past 3 weeks",
                "Sleep duration consistently below 6 hours",
                "Assignment submission delays increasing"
            ]
        elif student_id == "S001":  # Alex - medium risk
            risk_score = 0.65
            factors = ["sleep_deprivation", "academic_overload"]
            insights = [
                "Moderate grade decline detected",
                "Irregular sleep patterns affecting performance",
                "Increased library hours suggesting cramming behavior"
            ]
        else:  # Others - lower risk
            risk_score = 0.35
            factors = ["minor_adjustments_needed"]
            insights = [
                "Stable academic performance",
                "Healthy wellbeing patterns detected",
                "Minor optimizations possible"
            ]

        return {
            "agent_id": self.agent_id,
            "student_id": student_id,
            "analysis_timestamp": datetime.now().isoformat(),
            "academic_analysis": {
                "risk_score": risk_score,
                "trend_direction": "declining" if risk_score > 0.6 else "stable",
                "confidence": 0.82,
                "key_insights": insights,
                "model_version": "databricks_dbrx_instruct_simulated"
            },
            "wellbeing_assessment": {
                "overall_score": max(0.3, 1 - risk_score + 0.1),
                "dimensions": {
                    "sleep_health": {"score": max(0.3, 1 - risk_score), "trend": "declining" if risk_score > 0.6 else "stable"},
                    "stress_levels": {"score": risk_score, "trend": "increasing" if risk_score > 0.6 else "stable"}
                }
            },
            "causal_analysis": {
                "causal_factors": factors,
                "effect_sizes": {factor: risk_score/len(factors) + 0.1 for factor in factors} if factors else {}
            },
            "intervention_plan": {
                "risk_level": "high" if risk_score > 0.7 else "medium" if risk_score > 0.4 else "low",
                "planned_actions": [
                    {
                        "type": "academic_support",
                        "description": "Schedule targeted tutoring sessions with engineering specialists",
                        "confidence": risk_score
                    },
                    {
                        "type": "wellbeing_intervention",
                        "description": "Proactive wellbeing check-in and sleep hygiene workshop",
                        "confidence": max(0.3, risk_score - 0.1)
                    }
                ] if risk_score > 0.4 else [
                    {
                        "type": "preventive_maintenance",
                        "description": "Regular check-ins to maintain healthy patterns",
                        "confidence": 0.9
                    }
                ]
            },
            "databricks_features_used": [
                "mlflow_tracking",
                "feature_store",
                "model_registry",
                "causal_ml",
                "collaborative_filtering"
            ]
        }

def launch_gradio_app():
    """Launch the Gradio interface"""
    app = GradioHokieWellApp()
    demo = app.create_interface()

    demo.launch(
        server_name="0.0.0.0",
        server_port=7867,
        share=True,
        debug=True,
        show_error=True
    )

if __name__ == "__main__":
    launch_gradio_app()

# Task
Push the current notebook to the git repository at "https://github.com/darpan02-cypher/VTHack25.git".

## Initialize git in the notebook environment

### Subtask:
Set up Git in the current directory.


In [None]:
!git init
!git add .config/

Reinitialized existing Git repository in /content/.git/


In [None]:
!git help

usage: git [--version] [--help] [-C <path>] [-c <name>=<value>]
           [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path]
           [-p | --paginate | -P | --no-pager] [--no-replace-objects] [--bare]
           [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>]
           [--super-prefix=<path>] [--config-env=<name>=<envvar>]
           <command> [<args>]

These are common Git commands used in various situations:

start a working area (see also: git help tutorial)
   clone     Clone a repository into a new directory
   init      Create an empty Git repository or reinitialize an existing one

work on the current change (see also: git help everyday)
   add       Add file contents to the index
   mv        Move or rename a file, a directory, or a symlink
   restore   Restore working tree files
   rm        Remove files from the working tree and from the index

examine the history and state (see also: git help revisions)
   bisect    Use binary search to find th

In [None]:
!git commit -m "add"

On branch master
nothing to commit, working tree clean


In [2]:
!git config --global user.email "hshriva1@charlotte.edu"
!git config --global user.name "Himanshi Shrivas"

In [3]:
!git add .


fatal: not a git repository (or any of the parent directories): .git


In [4]:
!git status

fatal: not a git repository (or any of the parent directories): .git


In [5]:
!git remote -v

fatal: not a git repository (or any of the parent directories): .git


In [6]:
!git fetch https://github.com/darpan02-cypher/VTHack25.git

fatal: not a git repository (or any of the parent directories): .git


In [None]:
!git commit -m "add"

On branch master
nothing to commit, working tree clean


In [1]:
!git push -u origin main

fatal: not a git repository (or any of the parent directories): .git


In [None]:
!git log -1 --pretty=%B

add



In [None]:
!git pull https://github.com/darpan02-cypher/VTHack25.git --rebase

From https://github.com/darpan02-cypher/VTHack25
 * branch            HEAD       -> FETCH_HEAD
Current branch master is up to date.


In [None]:
!git push origin master

fatal: could not read Username for 'https://github.com': No such device or address
