In [None]:
# First, import the required libraries
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

# Step 1: Define the LSTM model for predictive action by the supervisor
class PredictiveModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(PredictiveModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Using the last hidden state
        return out

# Step 2: Define Supervisor and Worker
class Supervisor:
    def __init__(self):
        # Define the LSTM model for action prediction
        self.model = PredictiveModel(input_size=4, hidden_size=16, output_size=2)  # Predicting 2 actions (x, y)
        self.influence_matrix = np.zeros((10, 10))  # Interaction effect matrix between actions
        self.exploration_rate = 1.0  # Initial exploration rate

    def predict_action(self, state):
        """ Predict next action for the worker (x, y positions) """
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
        return self.model(state_tensor).detach().numpy()

    def update_influence_matrix(self, action, suggested_action, reward):
        """ Update influence matrix based on action and reward """
        self.influence_matrix[action, suggested_action] += reward
        self.influence_matrix /= np.max(self.influence_matrix)  # Normalize matrix

    def adaptive_exploration(self, complexity):
        """ Adjust exploration rate based on environment complexity """
        self.exploration_rate = max(0.1, self.exploration_rate * np.exp(-complexity * 0.1))
        return self.exploration_rate

# Step 3: Simulate Worker Actions
def simulate_worker_action(supervisor, worker_state, complexity):
    """ Simulate the worker's decision-making process """
    # Predict the next action using the supervisor model
    predicted_action = supervisor.predict_action(worker_state)

    # Compute exploration rate based on environmental complexity
    exploration_rate = supervisor.adaptive_exploration(complexity)

    # Decide whether to explore or exploit
    if np.random.rand() < exploration_rate:
        # Explore: Random action
        action = np.random.choice(10)
    else:
        # Exploit: Use the predicted action from the model
        action = np.argmax(predicted_action)

    # Return the action and exploration rate
    return action, exploration_rate

# Step 4: Visualization Function
def plot_influence_matrix(supervisor):
    """ Plot the current influence matrix between actions """
    plt.imshow(supervisor.influence_matrix, cmap='hot', interpolation='nearest')
    plt.title("Influence Matrix")
    plt.colorbar()
    plt.show()

# Step 5: Main Loop to Simulate the Interaction
def run_simulation():
    supervisor = Supervisor()

    # Initial state of the worker: [x_position, y_position, x_velocity, y_velocity]
    worker_state = [0.5, 0.5, 1.0, 0.0]

    # Simulate for 50 steps
    for step in range(50):
        # Simulate the worker's action
        complexity = np.random.rand()  # Random complexity for each step
        action, exploration_rate = simulate_worker_action(supervisor, worker_state, complexity)

        # Simulate the reward for the action
        reward = np.random.rand() * 0.5  # Random reward between 0 and 0.5

        # Update the influence matrix with the action taken by the worker
        supervisor.update_influence_matrix(action, action, reward)  # Using action to update matrix

        # Print progress
        if step % 10 == 0:
            print(f"Step {step}: Worker action: {action}, Exploration rate: {exploration_rate:.2f}")

        # Update the worker state (here, we simulate some simple physics)
        worker_state = [worker_state[0] + 0.1, worker_state[1] + 0.1, worker_state[2], worker_state[3]]