In [8]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import os
import json
import random
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

# ✅ Load seed dataset
def load_seed_dataset():
    path = "../datasets/game.json"
    try:
        with open(path, "r") as file:
            return json.load(file)["intents"]
    except Exception as e:
        print(f"❌ Dataset load error: {e}")
        return []
      
def generate_followup_scenario_with_flan(prev_scenario, chosen_option, model, tokenizer):
    print("🔁 Generating a follow-up scenario based on the user's choice...\n")

    prompt = (
        f"You are a therapeutic AI that creates emotional decision-making game scenarios.\n"
        f"Here is what happened previously:\n"
        f"Scenario: {prev_scenario['scenario']}\n"
        f"User chose: {chosen_option}\n"
        f"Now, generate a new follow-up scenario as a JSON object with:\n"
        f"- 'scenario': a stressful or emotional situation\n"
        f"- 'emotion': one of ['happy', 'sad', 'stressed', 'angry', 'anxious', 'confused']\n"
        f"- 'difficulty': 'easy', 'medium', or 'hard'\n"
        f"- 'responses': list of 4 responses, each with 'option' and 'reward' (-3 to +3)\n"
        f"- 'best_choice_index': the index of the most appropriate response\n"
        f"Only return the JSON object."
    )

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.8
        )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        new_scenario = json.loads(decoded)
        print("✅ Follow-up scenario generated successfully.")
        return new_scenario
    except json.JSONDecodeError:
        print("❌ Failed to parse JSON. Raw output:")
        print(decoded)
        return None

# ✅ Emotion-based Gym Environment
class EmotionGameEnv(gym.Env):
    def __init__(self, combined_data):
        super().__init__()
        self.dataset = combined_data
        self.emotions = sorted(list(set(s["emotion"] for s in self.dataset)))
        print(f"✅ Detected emotions: {self.emotions}")

        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Discrete(len(self.emotions))

        self.stress = 0.5
        self.stress_history = []
        self.state = None
        self.current_scenario = None

    def reset(self):
        self.stress = 0.5
        self.stress_history = [self.stress]
        self.current_scenario = random.choice(self.dataset)
        self.state = self.current_scenario["emotion"]
        return self.emotions.index(self.state)

    def step(self, action):
        try:
            reward = self.current_scenario["responses"][action]["reward"]
        except:
            reward = 0

        self.stress = min(1.0, max(0.0, self.stress - reward * 0.05 + random.uniform(-0.02, 0.02)))
        self.stress_history.append(self.stress)

        done = True
        info = {
            "scenario": self.current_scenario["scenario"],
            "stress": self.stress
        }
        return self.emotions.index(self.state), reward, done, info

# ✅ Train RL model
def train_rl_model(env, total_timesteps=5000):
    model = PPO("MlpPolicy", env, verbose=0)
    print("🎯 Training RL model...")
    model.learn(total_timesteps=total_timesteps)
    model.save("emotional_rl_agent")
    print("✅ Model saved as 'emotional_rl_agent'.")
    return model

# ✅ Evaluate and plot stress
def evaluate_model(env, model_path="emotional_rl_agent", num_tests=5):
    model = PPO.load(model_path)
    print("\n📊 Evaluating model...\n")
    for _ in range(num_tests):
        obs = env.reset()
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)

        print(f"\n🎭 Emotion: {env.state}")
        print(f"📜 Scenario: {info['scenario']}")
        print(f"✅ Action Taken: {action}")
        print(f"🏆 Reward: {reward}")
        print(f"📈 Stress Level: {round(info['stress'], 2)}")

    plt.plot(env.stress_history)
    plt.title("📉 Stress Level Over Time")
    plt.xlabel("Step")
    plt.ylabel("Stress")
    plt.grid(True)
    plt.show()

# ✅ Interactive button-based gameplay
def adaptive_interactive_gameplay(initial_scenario, model, tokenizer, rounds=5):
    current_scenario = initial_scenario
    env = EmotionGameEnv([current_scenario])  # Start with single scenario
    output = widgets.Output()

    def play_round(round_index):
        nonlocal current_scenario  # ✅ Declare nonlocal once at the start of the round
        env.dataset = [current_scenario]
        env.reset()

        with output:
            clear_output()
            print(f"\n🎭 Emotion: {current_scenario['emotion']}")
            print(f"📜 Scenario: {current_scenario['scenario']}")

        buttons = []

        def handle_choice(i):
            def on_click(btn):
                nonlocal current_scenario  # ✅ Must be declared before any usage inside this function

                user_choice = current_scenario["responses"][i]["option"]
                _, reward, _, info = env.step(i)

                with output:
                    clear_output()
                    print(f"✅ You chose: {user_choice}")
                    print(f"🏆 Reward: {reward}")
                    print(f"📉 New Stress Level: {round(info['stress'], 2)}")
                    plt.plot(env.stress_history)
                    plt.title("📉 Stress Level")
                    plt.xlabel("Step")
                    plt.ylabel("Stress")
                    plt.grid(True)
                    plt.show()

                if round_index + 1 < rounds:
                    new_scenario = generate_followup_scenario_with_flan(current_scenario, user_choice, model, tokenizer)
                    if new_scenario:
                        current_scenario = new_scenario
                        next_button = widgets.Button(description="➡ Next Scenario")
                        next_button.on_click(lambda b: play_round(round_index + 1))
                        display(next_button)
                    else:
                        print("❌ Failed to generate next scenario.")
                else:
                    print("\n🎉 Game session complete.")
            return on_click

        for i, resp in enumerate(current_scenario["responses"]):
            btn = widgets.Button(description=resp['option'])
            btn.on_click(handle_choice(i))
            buttons.append(btn)

        display(*buttons)
        display(output)

    play_round(0)


# ✅ Main execution
def main():
    print("🚀 Adaptive Emotion RL Game (FLAN-T5)...\n")
    seed_data = load_seed_dataset()
    if not seed_data:
        print("❌ No seed data found.")
        return

    model_name = "google/flan-t5-small"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    first_scenario = random.choice(seed_data)
    adaptive_interactive_gameplay(first_scenario, model, tokenizer, rounds=5)

main()


🚀 Adaptive Emotion RL Game (FLAN-T5)...

✅ Detected emotions: ['happy']


Button(description='Share the news with close friends and family.', style=ButtonStyle())

Button(description='Treat yourself to something you enjoy.', style=ButtonStyle())

Button(description='Downplay your achievement and not share it with anyone.', style=ButtonStyle())

Button(description='Compare yourself to others and feel like it’s not good enough.', style=ButtonStyle())

Output()

In [None]:
# ✅ Train RL model and save as .zip and .pkl
def train_rl_model(env, total_timesteps=5000):
    model = PPO("MlpPolicy", env, verbose=0)
    print("🎯 Training RL model...")
    model.learn(total_timesteps=total_timesteps)

    # Standard SB3 save
    model.save("emotional_rl_agent")
    print("✅ Model saved as 'emotional_rl_agent.zip'.")

    # Optional: Save as .pkl
    import joblib
    joblib.dump(model, "emotional_rl_agent.pkl")
    print("✅ Model also saved as 'emotional_rl_agent.pkl'.")

    return model
  