In [1]:
import os
import json
import random
import gym
import numpy as np
import requests
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from collections import deque
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Gemini API Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyA2rRBLcJx_e5g4d_fVOgG4q2Pf8ewPI70")
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"

# Constants
DEFAULT_MODEL_PARAMS = {
    "policy": "MultiInputPolicy",
    "learning_rate": 0.0003,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.99,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.01,
    "verbose": 1
}

class DifficultyAdjuster:
    """Handles dynamic difficulty adjustment based on performance."""
    
    def __init__(self):
        self.performance_window = deque(maxlen=10)  # Tracks last 10 performances
        self.difficulty_levels = ["easy", "medium", "hard"]
        self.current_difficulty_idx = 1  # Start with medium
        
    def update_difficulty(self, is_correct):
        """Adjust difficulty based on recent performance."""
        self.performance_window.append(1 if is_correct else 0)
        
        if len(self.performance_window) == self.performance_window.maxlen:
            success_rate = sum(self.performance_window) / len(self.performance_window)
            
            if success_rate > 0.7:  # Doing well, increase difficulty
                self.current_difficulty_idx = min(self.current_difficulty_idx + 1, len(self.difficulty_levels) - 1)
            elif success_rate < 0.4:  # Struggling, decrease difficulty
                self.current_difficulty_idx = max(self.current_difficulty_idx - 1, 0)
                
        return self.get_current_difficulty()
    
    def get_current_difficulty(self):
        return self.difficulty_levels[self.current_difficulty_idx]

class TrainingCallback(BaseCallback):
    """Custom callback for tracking training progress and adjusting parameters."""
    
    def __init__(self, verbose=0):
        super(TrainingCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []
        
    def _on_step(self) -> bool:
        return True
    
    def _on_rollout_end(self) -> None:
        # Track episode rewards and lengths
        if len(self.model.ep_info_buffer) > 0:
            self.episode_rewards.extend([ep_info["r"] for ep_info in self.model.ep_info_buffer if "r" in ep_info])
            self.episode_lengths.extend([ep_info["l"] for ep_info in self.model.ep_info_buffer if "l" in ep_info])
            
    def get_mean_reward(self):
        return np.mean(self.episode_rewards) if self.episode_rewards else 0

def generate_gemini_scenario(emotion, difficulty="medium"):
    """Generates a scenario with choices and correct answer using Gemini API."""
    prompt = f"""
    Generate a therapeutic scenario about emotional management for someone feeling {emotion}.
    Difficulty: {difficulty}.
    Format exactly as this JSON structure:
    {{
        "scenario": "the scenario description",
        "choices": [
            {{"text": "choice 1", "correct": false, "explanation": "why this is wrong"}},
            {{"text": "choice 2", "correct": true, "explanation": "why this is right"}},
            {{"text": "choice 3", "correct": false, "explanation": "why this is wrong"}},
            {{"text": "choice 4", "correct": false, "explanation": "why this is wrong"}}
        ],
        "difficulty": "{difficulty}",
        "emotion": "{emotion}"
    }}
    Important rules:
    1. Make exactly 4 choices
    2. Only one choice should be correct
    3. Provide concrete explanations
    4. Keep scenario realistic and therapeutic
    5. Ensure explanations are psychologically valid
    6. Make the correct choice non-obvious for harder difficulties
    """
    
    headers = {"Content-Type": "application/json"}
    data = {
        "contents": [{
            "parts": [{
                "text": prompt
            }]
        }],
        "generationConfig": {
            "temperature": 0.7 + (0.1 * ["easy", "medium", "hard"].index(difficulty))
        }
    }
    
    try:
        response = requests.post(
            f"{GEMINI_API_URL}?key={GEMINI_API_KEY}",
            headers=headers,
            json=data
        )
        response.raise_for_status()
        generated_text = response.json()["candidates"][0]["content"]["parts"][0]["text"]
        
        # Extract JSON from response
        json_start = generated_text.find('{')
        json_end = generated_text.rfind('}') + 1
        json_str = generated_text[json_start:json_end]
        
        scenario = json.loads(json_str)
        
        # Validate scenario structure
        if not all(k in scenario for k in ["scenario", "choices", "difficulty"]):
            raise ValueError("Invalid scenario format")
        if len(scenario["choices"]) != 4:
            raise ValueError("Scenario must have exactly 4 choices")
            
        return scenario
    except Exception as e:
        print(f"Error generating scenario: {str(e)}")
        # Return fallback scenario
        return {
            "scenario": f"You're feeling {emotion} after a long day. How do you cope?",
            "choices": [
                {"text": "Bottle up your emotions", "correct": False, "explanation": "Suppressing emotions can lead to increased stress"},
                {"text": "Talk to a trusted friend", "correct": True, "explanation": "Social support helps process emotions effectively"},
                {"text": "Yell at someone nearby", "correct": False, "explanation": "This harms relationships and increases tension"},
                {"text": "Ignore the feeling completely", "correct": False, "explanation": "Avoidance prevents emotional processing"}
            ],
            "difficulty": difficulty,
            "emotion": emotion
        }

class EmotionGameEnv(gym.Env):
    """Custom Gym environment for Emotion-Based RL with adaptive difficulty."""
    
    def __init__(self):
        super(EmotionGameEnv, self).__init__()
        
        # Define state and action spaces
        self.emotions = ["sad", "angry", "happy", "stress", "neutral", "fear", "anxious", "overwhelmed"]
        self.difficulty_adjuster = DifficultyAdjuster()
        self.action_space = spaces.Discrete(4)  # 4 choices per scenario
        self.observation_space = spaces.Dict({
            "emotion": spaces.Discrete(len(self.emotions)),
            "difficulty": spaces.Discrete(len(self.difficulty_adjuster.difficulty_levels)),
            "history": spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)  # Last 5 performances
        })

        self.state = None
        self.current_scenario = None
        self.performance_history = deque(maxlen=5)  # Tracks last 5 performances
        self.stats = {
            "correct": 0,
            "total": 0,
            "difficulty_stats": {d: {"correct": 0, "total": 0} for d in self.difficulty_adjuster.difficulty_levels},
            "emotion_stats": {e: {"correct": 0, "total": 0} for e in self.emotions}
        }

    def reset(self):
        """Resets the environment for a new episode."""
        emotion = random.choice(self.emotions)
        difficulty = self.difficulty_adjuster.get_current_difficulty()
        
        self.state = {
            "emotion": emotion,
            "difficulty": difficulty,
            "history": np.array(list(self.performance_history) + [0] * (5 - len(self.performance_history)), dtype=np.float32)
        }
        
        # Generate new scenario
        self.current_scenario = generate_gemini_scenario(emotion, difficulty)
        
        return self._get_obs()

    def step(self, action):
        """Takes an action and returns (obs, reward, done, info)."""
        if not self.current_scenario:
            raise ValueError("No scenario loaded")
            
        try:
            chosen_choice = self.current_scenario["choices"][action]
            is_correct = chosen_choice["correct"]
            self.performance_history.append(1 if is_correct else 0)
            
            # Update difficulty based on performance
            new_difficulty = self.difficulty_adjuster.update_difficulty(is_correct)
            
            # Calculate base reward
            if is_correct:
                reward = 10
                self.stats["correct"] += 1
                self.stats["difficulty_stats"][self.current_scenario["difficulty"]]["correct"] += 1
                self.stats["emotion_stats"][self.current_scenario["emotion"]]["correct"] += 1
            else:
                reward = -3
                
            self.stats["total"] += 1
            self.stats["difficulty_stats"][self.current_scenario["difficulty"]]["total"] += 1
            self.stats["emotion_stats"][self.current_scenario["emotion"]]["total"] += 1
            
            # Apply difficulty multiplier
            difficulty = self.current_scenario["difficulty"]
            if difficulty == "medium":
                reward *= 1.5
            elif difficulty == "hard":
                reward *= 2
                
            # Add small reward for attempting harder difficulties
            reward += ["easy", "medium", "hard"].index(difficulty) * 0.5
                
        except (IndexError, KeyError):
            reward = -5  # Penalty for invalid action
            is_correct = False
            
        done = True  # One step per episode
        info = {
            "scenario": self.current_scenario["scenario"],
            "choices": [c["text"] for c in self.current_scenario["choices"]],
            "chosen": self.current_scenario["choices"][action]["text"],
            "correct": is_correct,
            "explanation": chosen_choice["explanation"],
            "difficulty": self.current_scenario["difficulty"],
            "emotion": self.current_scenario["emotion"],
            "stats": self.stats,
            "new_difficulty": new_difficulty
        }
        
        return self._get_obs(), reward, done, info
    
    def _get_obs(self):
        """Convert state to observation space format."""
        return {
            "emotion": self.emotions.index(self.state["emotion"]),
            "difficulty": self.difficulty_adjuster.difficulty_levels.index(self.state["difficulty"]),
            "history": self.state["history"]
        }

def train_rl_model(env, total_timesteps=10000, model_params=None):
    """Trains the RL model using Proximal Policy Optimization (PPO)."""
    params = DEFAULT_MODEL_PARAMS.copy()
    if model_params:
        params.update(model_params)
    
    callback = TrainingCallback()
    model = PPO(**params, env=env)
    
    try:
        print("\n🎯 Training PPO model with adaptive difficulty...\n")
        model.learn(total_timesteps=total_timesteps, callback=callback)
        model.save("emotional_rl_agent_adaptive")
        print(f"\n✅ Model training completed (Mean Reward: {callback.get_mean_reward():.2f})")
        print(f"Saved as 'emotional_rl_agent_adaptive'")
        return model
    except KeyboardInterrupt:
        print("\n⏸ Training interrupted. Saving model...")
        model.save("emotional_rl_agent_adaptive_interrupted")
        return model

def evaluate_model(env, model=None, model_path="emotional_rl_agent_adaptive", num_tests=10):
    """Evaluates the model with detailed output and adaptive difficulty."""
    if model is None:
        print("\n📥 Loading trained model...")
        model = PPO.load(model_path, env=env)

    print("\n🔍 Starting Evaluation with Adaptive Difficulty...")
    for i in range(num_tests):
        obs = env.reset()
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)

        # Display results
        print(f"\n{'='*50}")
        print(f"🏷️  Test Case {i+1}/{num_tests}")
        print(f"🎭 Emotion: {info['emotion'].upper()}")
        print(f"📊 Difficulty: {info['difficulty'].upper()} → New: {info['new_difficulty'].upper()}")
        print(f"\n📜 Scenario: {info['scenario']}")
        
        print("\n💡 Choices:")
        for idx, choice in enumerate(info['choices']):
            marker = "✓" if idx == action else " "
            correctness = "(CORRECT)" if info['choices'][idx] == info['chosen'] and info['correct'] else ""
            print(f" {marker} {idx}. {choice} {correctness}")
        
        print(f"\n🤖 AI Chose: {action}. {info['chosen']}")
        print(f"✅ Correct: {'Yes' if info['correct'] else 'No'}")
        print(f"💡 Explanation: {info['explanation']}")
        print(f"🏆 Reward: {reward:.1f}")
        print(f"{'='*50}")

    # Print summary statistics
    stats = info['stats']
    accuracy = (stats["correct"] / stats["total"]) * 100 if stats["total"] > 0 else 0
    print(f"\n📊 Overall Accuracy: {accuracy:.1f}% ({stats['correct']}/{stats['total']})")
    
    print("\n📈 Difficulty Breakdown:")
    for diff in env.difficulty_adjuster.difficulty_levels:
        diff_stats = stats["difficulty_stats"][diff]
        if diff_stats["total"] > 0:
            acc = (diff_stats["correct"] / diff_stats["total"]) * 100
            print(f"  {diff.upper()}: {acc:.1f}% ({diff_stats['correct']}/{diff_stats['total']})")
    
    print("\n🎭 Emotion Performance:")
    for emotion in sorted(env.emotions):
        emo_stats = stats["emotion_stats"][emotion]
        if emo_stats["total"] > 0:
            acc = (emo_stats["correct"] / emo_stats["total"]) * 100
            print(f"  {emotion.upper()}: {acc:.1f}% ({emo_stats['correct']}/{emo_stats['total']})")

def interactive_demo(env, model=None, model_path="emotional_rl_agent_adaptive"):
    """Interactive demo where human can play against the AI."""
    if model is None:
        print("\n📥 Loading trained model...")
        model = PPO.load(model_path, env=env)

    print("\n🎮 Interactive Mode - Compete against the AI!")
    print("You'll see the same scenarios and choices as the AI.")
    print("Try to get more correct answers than the AI!\n")
    
    num_rounds = int(input("Enter number of rounds (default 5): ") or 5)
    human_score = 0
    ai_score = 0

    for round_num in range(1, num_rounds + 1):
        obs = env.reset()
        scenario = env.current_scenario

        print(f"\n{'='*50}")
        print(f"🔔 Round {round_num}/{num_rounds}")
        print(f"🎭 Emotion: {scenario['emotion'].upper()}")
        print(f"📊 Difficulty: {scenario['difficulty'].upper()}")
        print(f"\n📜 Scenario: {scenario['scenario']}")

        print("\n💡 Choices:")
        for idx, choice in enumerate(scenario['choices']):
            print(f" {idx}. {choice['text']}")

        # Human choice
        while True:
            try:
                human_action = int(input("\nYour choice (0-3): "))
                if 0 <= human_action <= 3:
                    break
                print("Please enter a number between 0 and 3")
            except ValueError:
                print("Please enter a valid number")

        # AI choice
        ai_action, _ = model.predict(obs)

        # Evaluate both
        _, human_reward, _, human_info = env.step(human_action)
        _, ai_reward, _, ai_info = env.step(ai_action)

        # Display results
        print(f"\n🤖 AI chose: {ai_action}. {ai_info['chosen']}")
        print(f"🧑 You chose: {human_action}. {human_info['chosen']}")

        print(f"\n💡 Correct answer: {[c['text'] for c in scenario['choices'] if c['correct']][0]}")

        if human_info['correct']:
            human_score += 1
            print("✅ You got it right!")
        else:
            print(f"❌ Your explanation: {human_info['explanation']}")

        if ai_info['correct']:
            ai_score += 1
            print("✅ AI got it right!")
        else:
            print(f"❌ AI explanation: {ai_info['explanation']}")

        print(f"\n📊 Score: You {human_score} - {ai_score} AI")
        print(f"{'='*50}")

    print("\n🏆 Final Results:")
    print(f"You: {human_score} correct answers")
    print(f"AI: {ai_score} correct answers")

    if human_score > ai_score:
        print("\n🎉 You beat the AI! Great job!")
    elif human_score == ai_score:
        print("\n🤝 It's a tie! Good match!")
    else:
        print("\n🤖 The AI won this time. Try again!")
def main():
    print("🚀 Emotion-Based RL & AI Therapy System - Training Only")
    print(f"🔑 Using Gemini API Key: {GEMINI_API_KEY[:5]}...{GEMINI_API_KEY[-5:]}")
    
    # Initialize environment
    env = EmotionGameEnv()

    # Train PPO model using default config
    print("\n📦 Training PPO model (default: 10000 timesteps)...")
    train_rl_model(env, total_timesteps=10000, model_params=DEFAULT_MODEL_PARAMS)

if __name__ == "__main__":
    main()


🚀 Emotion-Based RL & AI Therapy System - Training Only
🔑 Using Gemini API Key: AIzaS...wPI70

📦 Training PPO model (default: 10000 timesteps)...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.





🎯 Training PPO model with adaptive difficulty...

Error generating scenario: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

⏸ Training interrupted. Saving model...


In [None]:
def main():
    print("🚀 Emotion-Based RL & AI Therapy System - Training & Evaluation")
    print(f"🔑 Using Gemini API Key: {GEMINI_API_KEY[:5]}...{GEMINI_API_KEY[-5:]}")

    env = EmotionGameEnv()

    print("\n📦 Training PPO model (default: 10000 timesteps)...")
    model = train_rl_model(env, total_timesteps=10000, model_params=DEFAULT_MODEL_PARAMS)

    print("\n🧪 Evaluating Trained Model...\n")
    evaluate_model(env, model=model, num_tests=5)
if __name__ == "__main__":
    main()


🚀 Emotion-Based RL & AI Therapy System - Training & Evaluation
🔑 Using Gemini API Key: AIzaS...wPI70

📦 Training PPO model (default: 10000 timesteps)...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

🎯 Training PPO model with adaptive difficulty...





Error generating scenario: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
Error generating scenario: HTTPSConnectionPool(host='generativelanguage.googleapis.com', port=443): Max retries exceeded with url: /v1beta/models/gemini-1.5-flash:generateContent?key=AIzaSyA2rRBLcJx_e5g4d_fVOgG4q2Pf8ewPI70 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000181E58729A0>: Failed to resolve 'generativelanguage.googleapis.com' ([Errno 11001] getaddrinfo failed)"))
Error generating scenario: HTTPSConnectionPool(host='generativelanguage.googleapis.com', port=443): Max retries exceeded with url: /v1beta/models/gemini-1.5-flash:generateContent?key=AIzaSyA2rRBLcJx_e5g4d_fVOgG4q2Pf8ewPI70 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000181E5886340>: Failed to resolve 'generativelanguage.googleapis.com' ([Errno 11001] getaddrinfo fail