In [1]:
import os
import json
import random
import gym
import numpy as np
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from dotenv import load_dotenv
import openai
from typing import Dict, List, Tuple, Optional
import time
from collections import defaultdict


In [None]:
# Load OpenAI API Key securely from .env file
def load_api_key() -> None:
    """Loads the OpenAI API key from .env file."""
    env_path = "../models/key.env"
    load_dotenv(env_path)
    api_key = os.getenv("OPENAI_API_KEY")

    if not api_key:
        raise ValueError("❌ OpenAI API Key not found. Make sure it's in the .env file.")
    
    openai.api_key = api_key
    
    print("✅ OpenAI API Key loaded successfully!")
 

In [None]:
# Enhanced dataset loading with validation
def load_dataset() -> List[Dict]:
    """Loads and validates the JSON dataset for emotion-based scenarios."""
    dataset_path = "../datasets/game.json"

    try:
        with open(dataset_path, "r") as file:
            dataset = json.load(file)
        
        # Validate dataset structure
        required_keys = {"scenario", "responses", "correct_response", "difficulty"}
        for intent in dataset.get("intents", []):
            if not all(key in intent for key in required_keys):
                raise ValueError(f"❌ Invalid dataset structure. Missing required keys in intent: {intent}")
            
            # Validate responses
            for response in intent["responses"]:
                if "text" not in response or "reward" not in response:
                    raise ValueError("❌ Each response must have 'text' and 'reward' fields")
        
        print(f"✅ Dataset loaded successfully with {len(dataset['intents'])} scenarios!")
        return dataset["intents"]
    except FileNotFoundError:
        raise FileNotFoundError(f"❌ Dataset file '{dataset_path}' not found.")
    except json.JSONDecodeError:
        raise ValueError(f"❌ Failed to parse JSON file '{dataset_path}'.")


In [None]:
# Stress measurement metrics
class StressMetrics:
    """Tracks and calculates stress-related metrics during interactions."""
    
    def __init__(self):
        self.stress_level = 0
        self.stress_history = []
        self.response_times = []
        self.incorrect_choices = 0
        self.correct_choices = 0
    
    def update_stress(self, reward: float, response_time: float) -> None:
        """Updates stress metrics based on the interaction."""
        stress_change = 0
        
        if reward < 0:
            self.incorrect_choices += 1
            stress_change = 0.2  # Stress increases with incorrect choices
        else:
            self.correct_choices += 1
            stress_change = -0.1  # Stress decreases with correct choices
        
        # Longer response times increase stress
        stress_change += min(response_time / 10, 0.5)
        
        self.stress_level = max(0, min(1, self.stress_level + stress_change))
        self.stress_history.append(self.stress_level)
        self.response_times.append(response_time)
    
    def get_stress_score(self) -> float:
        """Calculates a normalized stress score (0-1)."""
        if not self.stress_history:
            return 0
        
        # Weighted average with more emphasis on recent stress
        recent_stress = self.stress_history[-min(5, len(self.stress_history)):]
        return sum(recent_stress) / len(recent_stress)
    
    def get_summary(self) -> Dict:
        """Returns a summary of stress metrics."""
        return {
            "current_stress": self.stress_level,
            "average_stress": sum(self.stress_history) / len(self.stress_history) if self.stress_history else 0,
            "correct_choices": self.correct_choices,
            "incorrect_choices": self.incorrect_choices,
            "average_response_time": sum(self.response_times) / len(self.response_times) if self.response_times else 0
        }


In [None]:

# Enhanced Emotion-Based RL Environment
class EmotionGameEnv(gym.Env):
    """Custom Gym environment for Emotion-Based RL with improved features."""
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self):
        super(EmotionGameEnv, self).__init__()

        # Load emotion dataset
        self.dataset = load_dataset()
        self.stress_metrics = StressMetrics()
        self.interaction_history = []
        self.current_episode = 0

        # Define state and action spaces
        self.emotions = ["sad", "angry", "happy", "stress", "neutral", "fear", "anxious"]
        self.difficulty_levels = ["easy", "medium", "hard"]
        
        # State includes emotion and difficulty
        self.observation_space = spaces.Dict({
            "emotion": spaces.Discrete(len(self.emotions)),
            "difficulty": spaces.Discrete(len(self.difficulty_levels))
        })
        
        self.action_space = spaces.Discrete(4)  # 4 choices per scenario
        self.max_episode_length = 10  # Maximum interactions per episode

        self.state = None
        self.current_scenario = None
        self.episode_step = 0
        self.available_scenarios = self._categorize_scenarios()

    def _categorize_scenarios(self) -> Dict[str, List[Dict]]:
        """Categorizes scenarios by emotion and difficulty."""
        categorized = defaultdict(list)
        for scenario in self.dataset:
            categorized[scenario.get("emotion", "neutral")].append(scenario)
        return categorized

    def reset(self):
        """Resets the environment for a new episode."""
        self.current_episode += 1
        self.episode_step = 0
        
        # Select random emotion and difficulty
        emotion = random.choice(self.emotions)
        difficulty = random.choice(self.difficulty_levels)
        
        # Filter scenarios by emotion and difficulty
        possible_scenarios = [
            s for s in self.available_scenarios.get(emotion, []) 
            if s.get("difficulty", "easy") == difficulty
        ]
        
        if not possible_scenarios:
            possible_scenarios = self.dataset  # Fallback to all scenarios
        
        self.current_scenario = random.choice(possible_scenarios)
        self.state = {
            "emotion": self.emotions.index(emotion),
            "difficulty": self.difficulty_levels.index(difficulty)
        }
        
        return self.state

    def step(self, action: int) -> Tuple[Dict, float, bool, Dict]:
        """Executes one step in the environment."""
        start_time = time.time()
        self.episode_step += 1
        
        try:
            responses = self.current_scenario["responses"]
            if action >= len(responses):
                reward = -1  # Penalize invalid actions
                response_data = {"text": "Invalid choice", "reward": reward}
            else:
                response_data = responses[action]
                reward = response_data["reward"]
                
                # Adjust reward based on difficulty
                difficulty = self.difficulty_levels[self.state["difficulty"]]
                if difficulty == "hard":
                    reward *= 1.5
                elif difficulty == "easy":
                    reward *= 0.8
        except (IndexError, KeyError):
            reward = -1  # Default penalty for errors
            response_data = {"text": "Error in scenario", "reward": reward}
        
        # Calculate response time and update stress metrics
        response_time = time.time() - start_time
        self.stress_metrics.update_stress(reward, response_time)
        
        # Track interaction history
        self.interaction_history.append({
            "episode": self.current_episode,
            "step": self.episode_step,
            "emotion": self.emotions[self.state["emotion"]],
            "scenario": self.current_scenario["scenario"],
            "action": action,
            "response": response_data["text"],
            "reward": reward,
            "response_time": response_time,
            "stress_level": self.stress_metrics.stress_level
        })
        
        # Check if episode should end
        done = self.episode_step >= self.max_episode_length
        if reward < -0.5:  # End early on very bad choices
            done = True
        
        info = {
            "scenario": self.current_scenario["scenario"],
            "response": response_data["text"],
            "stress_level": self.stress_metrics.stress_level,
            "correct_response": self.current_scenario.get("correct_response", ""),
            "difficulty": self.difficulty_levels[self.state["difficulty"]]
        }
        
        # Get next state (same emotion but may change difficulty)
        next_state = {
            "emotion": self.state["emotion"],
            "difficulty": random.choice([self.state["difficulty"]] * 3 + 
                                      [max(0, self.state["difficulty"]-1), 
                                      min(len(self.difficulty_levels)-1, self.state["difficulty"]+1)])
        }
        
        return next_state, reward, done, info

    def render(self, mode='human'):
        """Renders the current state of the environment."""
        if mode == 'human':
            print(f"\nCurrent Emotion: {self.emotions[self.state['emotion']]}")
            print(f"Difficulty: {self.difficulty_levels[self.state['difficulty']]}")
            print(f"Scenario: {self.current_scenario['scenario']}")
            print(f"Current Stress Level: {self.stress_metrics.stress_level:.2f}")
            print(f"Episode Step: {self.episode_step}/{self.max_episode_length}")

    def get_history(self) -> List[Dict]:
        """Returns the interaction history."""
        return self.interaction_history

    def get_stress_summary(self) -> Dict:
        """Returns stress metrics summary."""
        return self.stress_metrics.get_summary()


In [None]:

# Enhanced GPT response generator with therapy techniques
def get_therapy_response(emotion: str, scenario: str, stress_level: float, history: List[Dict]) -> str:
    """Generates a therapeutic response using GPT with context-aware techniques."""
    # Build context from history
    recent_history = history[-3:] if history else []
    context = "\n".join(
        f"Previous interaction: When feeling {h['emotion']} in situation '{h['scenario']}', "
        f"the response was '{h['response']}' which resulted in reward {h['reward']:.1f} "
        f"and stress level {h['stress_level']:.2f}."
        for h in recent_history
    )
    
    # Select therapy approach based on emotion and stress
    therapy_approaches = {
        "stress": "cognitive behavioral therapy techniques",
        "anxious": "grounding exercises and mindfulness",
        "angry": "anger management strategies",
        "sad": "positive reframing and self-compassion exercises",
        "happy": "reinforcement of positive behaviors",
        "fear": "exposure therapy principles",
        "neutral": "general counseling techniques"
    }
    
    approach = therapy_approaches.get(emotion, "general counseling techniques")
    
    prompt = (
        f"Act as an AI therapist helping a user manage emotions. The user is currently feeling {emotion} "
        f"(stress level: {stress_level:.2f}/1.0) in this situation: '{scenario}'. "
        f"Use {approach} to provide a helpful response. Keep it concise (1-2 sentences) and therapeutic. "
        f"Here's recent context:\n{context}\n\nTherapeutic response:"
    )
    
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Using more cost-effective model
            messages=[
                {"role": "system", "content": "You are a compassionate AI therapist trained in CBT, DBT, and mindfulness techniques."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=150
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"I'm having trouble generating a response right now. Please try again later. Error: {str(e)}"


In [None]:

# Enhanced RL Model Training with Callbacks
def train_rl_model(env: EmotionGameEnv, total_timesteps: int = 20000) -> PPO:
    """Trains the RL model with improved settings and callbacks."""
    # Environment check
    check_env(env)
    
    # Callback for early stopping and model saving
    eval_callback = EvalCallback(
        env,
        callback_on_new_best=StopTrainingOnRewardThreshold(reward_threshold=5.0, verbose=1),
        verbose=1,
        eval_freq=1000,
        best_model_save_path="./best_models/"
    )
    
    model = PPO(
        "MultiInputPolicy",
        env,
        verbose=1,
        learning_rate=0.0003,
        n_steps=2048,
        batch_size=64,
        n_epochs=10,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.2,
        ent_coef=0.01,
        tensorboard_log="./tensorboard_logs/"
    )
    
    try:
        print("\n🎯 Training PPO model with enhanced settings...\n")
        model.learn(
            total_timesteps=total_timesteps,
            callback=eval_callback,
            tb_log_name="emotion_rl"
        )
        model.save("emotional_rl_agent_enhanced")
        print("✅ Model training completed and saved as 'emotional_rl_agent_enhanced'.")
    except KeyboardInterrupt:
        print("⏸ Training interrupted. Saving model...")
        model.save("emotional_rl_agent_enhanced_interrupted")
    
    return model

# Comprehensive Evaluation Function
def evaluate_model(env: EmotionGameEnv, model_path: str = "emotional_rl_agent_enhanced", num_tests: int = 5) -> None:
    """Evaluates the model with detailed analysis and therapeutic feedback."""
    print("\n📊 Starting comprehensive evaluation...")
    
    try:
        model = PPO.load(model_path)
    except FileNotFoundError:
        print(f"❌ Model file '{model_path}' not found. Please train the model first.")
        return
    
    for test_num in range(1, num_tests + 1):
        print(f"\n=== Test Case {test_num}/{num_tests} ===")
        obs = env.reset()
        done = False
        total_reward = 0
        episode_steps = 0
        
        while not done:
            env.render()
            action, _states = model.predict(obs)
            obs, reward, done, info = env.step(action)
            
            # Get therapeutic response
            emotion = env.emotions[obs["emotion"]]
            gpt_response = get_therapy_response(
                emotion,
                info["scenario"],
                env.stress_metrics.stress_level,
                env.get_history()
            )
            
            total_reward += reward
            episode_steps += 1
            
            print(f"\nStep {episode_steps}:")
            print(f"Emotion: {emotion}")
            print(f"Difficulty: {info['difficulty']}")
            print(f"Scenario: {info['scenario']}")
            print(f"Action Taken: {action}")
            print(f"Response: {info['response']}")
            print(f"Reward: {reward:.2f} (Total: {total_reward:.2f})")
            print(f"Stress Level: {env.stress_metrics.stress_level:.2f}")
            print(f"Therapeutic Feedback: {gpt_response}")
            
            if done:
                print("\nEpisode completed!")
                print(f"Total Reward: {total_reward:.2f}")
                print(f"Steps Taken: {episode_steps}")
                print(f"Final Stress Level: {env.stress_metrics.stress_level:.2f}")
                
                # Show stress metrics summary
                stress_summary = env.stress_metrics.get_summary()
                print("\nStress Metrics Summary:")
                for k, v in stress_summary.items():
                    print(f"- {k.replace('_', ' ').title()}: {v if isinstance(v, int) else f'{v:.2f}'}")
                
                # Reset stress metrics for next episode
                env.stress_metrics = StressMetrics()


In [None]:

# Main Execution Function with Enhanced Features
def main():
    """Main function with improved setup and error handling."""
    print("🚀 Starting Enhanced Emotion-Based RL Therapy System...\n")
    
    try:
        # Load OpenAI API Key
        load_api_key()
        
        # Initialize the environment
        env = EmotionGameEnv()
        
        # Train or load the RL model
        train_new_model = input("Train new model? (y/n): ").lower() == 'y'
        
        if train_new_model:
            timesteps = int(input("Enter training timesteps (default: 20000): ") or 20000)
            model = train_rl_model(env, total_timesteps=timesteps)
        else:
            model_path = input("Enter model path (default: emotional_rl_agent_enhanced): ") or "emotional_rl_agent_enhanced"
            model = PPO.load(model_path)
        
        # Evaluate the model with therapeutic feedback
        num_tests = int(input("Enter number of test cases (default: 5): ") or 5)
        evaluate_model(env, num_tests=num_tests)
        
        # Save interaction history
        save_history = input("Save interaction history? (y/n): ").lower() == 'y'
        if save_history:
            history = env.get_history()
            with open("therapy_session_history.json", "w") as f:
                json.dump(history, f, indent=2)
            print("✅ Session history saved to 'therapy_session_history.json'")
        
    except Exception as e:
        print(f"❌ An error occurred: {str(e)}")
    finally:
        print("\n🏁 Session completed. Wishing you emotional well-being!")

if __name__ == "__main__":
    main()