In [2]:
import gym
from gym import spaces
import numpy as np

class EmotionSupportEnv(gym.Env):
    """A simple environment for an emotion-based recommendation system."""
    metadata = {'render.modes': ['console']}

    def __init__(self):
        super(EmotionSupportEnv, self).__init__()

        # Define action space: 0: Music, 1: Movie, 2: Book
        self.action_space = spaces.Discrete(3)

        # Define observation (state) space: 0: Happy, 1: Sad, 2: Angry
        self.observation_space = spaces.Discrete(3)  # Assign here

        self.state = 0  # Initialize state, representing an emotional state

    def step(self, action):
        # Simulate the effect of an action on the state. Here we mock the transition.
        self.state = self.observation_space.sample()  # Randomly change the emotional state

        # Mock a reward mechanism: 1 for a good recommendation, -1 for a bad one
        reward = np.random.choice([1, -1])

        done = True  # Assuming the episode ends after one step for simplicity

        info = {}

        return self.state, reward, done, info

    def reset(self):
        # Reset the environment to a new, random state (emotional state)
        self.state = self.observation_space.sample()
        return self.state

    def render(self, mode='console'):
        if mode != 'console':
            raise NotImplementedError()

        # Print the current state for visualization
        emotion = ['Happy', 'Sad', 'Angry'][self.state]
        print(f"Current emotional state: {emotion}")

# Example usage
env = EmotionSupportEnv()

obs = env.reset()
env.render()

for _ in range(3):
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    print(f"Action (Recommendation): {action}, Reward: {reward}")


Current emotional state: Angry
Action (Recommendation): 2, Reward: -1
Action (Recommendation): 2, Reward: 1
Action (Recommendation): 1, Reward: 1
