In [1]:
from air_hockey_challenge.framework.air_hockey_challenge_wrapper import AirHockeyChallengeWrapper
from air_hockey_challenge.framework.challenge_core import ChallengeCore
from air_hockey_challenge.framework.agent_base import AgentBase
from examples.control.hitting_agent import build_agent, HittingAgent
from examples.control.hitting_agent_wait import HittingAgentWait

from mushroom_rl.utils.dataset import parse_dataset, select_random_samples
from mushroom_rl.policy import GaussianTorchPolicy

import torch
import torch.nn as nn

from tqdm import tqdm

import pickle

import numpy as np

use_cuda = torch.cuda.is_available()
device = 'cuda' if use_cuda else 'cpu'
print(f"Cuda: {use_cuda}")


Cuda: False


In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

class MushroomRLTrajectoryDataset(Dataset):
    def __init__(self, mdp, agent, n_episodes):
        """
        Initialize the MushroomRLTrajectoryDataset.

        Args:
            mdp (mushroom_rl.environments.Environment): the MDP (Markov Decision Process).
            agent: the agent to evaluate.
        """
        self.core = ChallengeCore(agent, mdp)
                # Initialize empty lists for each data type
        state_list, action_list, reward_list, next_state_list, absorbing_list, last_list = [], [], [], [], [], []

        for i in tqdm(range(n_episodes)):
            # Evaluate one episode at a time
            trajectory = self.core.evaluate(n_episodes=1, render=False)
            state, action, reward, next_state, absorbing, last = parse_dataset(trajectory)

            # Append the data from the current episode
            state_list.append(torch.from_numpy(state).to(device))
            action_list.append(torch.from_numpy(action).to(device))
            reward_list.append(torch.from_numpy(reward).to(device))
            next_state_list.append(torch.from_numpy(next_state).to(device))
            absorbing_list.append(torch.from_numpy(absorbing).to(device))
            last_list.append(torch.from_numpy(last).to(device))

        # Concatenate the data from all episodes
        self.state = torch.cat(state_list, dim=0)
        self.action = torch.cat(action_list, dim=0)
        self.reward = torch.cat(reward_list, dim=0)
        self.next_state = torch.cat(next_state_list, dim=0)
        self.absorbing = torch.cat(absorbing_list, dim=0)
        self.last = torch.cat(last_list, dim=0)

        self.length = self.state.shape[0]

    def __len__(self):
        """
        Return the total number of state-action-reward-next_state tuples in the dataset.

        Returns:
            int: the length of the dataset.
        """
        return self.length

    def __getitem__(self, index):
        """
        Get the state-action-reward-next_state tuple at the specified index.

        Args:
            index (int): the index of the desired tuple.

        Returns:
            dict: a dictionary containing state, action, reward, and next_state.
        """

        return {
            'state': self.state[index],
            'action': self.action[index],
            'reward': self.reward[index],
            'next_state': self.next_state[index],
            'absorbing': self.absorbing[index],
            'last': self.last[index],
        }



In [3]:
class Network(nn.Module):
    def __init__(self, input_shape, output_shape, n_features, **kwargs):
        super(Network, self).__init__()

        self._h1 = nn.Linear(input_shape[0], n_features)
        self._h2 = nn.Linear(n_features, n_features)
        self._h3 = nn.Linear(n_features, output_shape[0])

        nn.init.xavier_uniform_(self._h1.weight,
                                gain=nn.init.calculate_gain('tanh'))
        nn.init.xavier_uniform_(self._h2.weight,
                                gain=nn.init.calculate_gain('tanh'))
        nn.init.xavier_uniform_(self._h3.weight,
                                gain=nn.init.calculate_gain('linear'))

    def forward(self, obs, **kwargs):
        features1 = torch.tanh(self._h1(torch.squeeze(obs, 1).float()))
        features2 = torch.tanh(self._h2(features1))
        return self._h3(features2)


class BCAgent(AgentBase):
    def __init__(self, env_info, policy, **kwargs):
        super().__init__(env_info, **kwargs)
        self.policy = policy

    def reset(self):
        pass

    def draw_action(self, observation):
        return self.policy.draw_action(observation).reshape(2,3)


In [19]:
def train_dagger_agent(learner_policy, expert_policy, mdp, dataset, num_iterations=10, batch_size=64, lr=0.001):
    # Create data loader
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Set up the optimizer and loss function
    optimizer = optim.Adam(learner_policy.parameters(), lr=lr)

    # Train the initial policy on collected data
    for epoch in range(num_iterations):
        for i, data in enumerate(data_loader):
            optimizer.zero_grad()

            states = data['state'].float()

            # Get expert actions for each state in the batch
            expert_actions = []
            successful_states = []

            for state in states:
                expert_policy.reset()
                state_np = state.cpu().numpy()

                expert_action = expert_policy.draw_action(state_np)

                if not expert_policy.optimization_failed:
                    successful_states.append(state)

                    expert_actions.append(torch.from_numpy(expert_action))

            successful_states = torch.stack(successful_states)
            expert_actions = torch.stack(expert_actions).reshape((-1, 6))
            print(f"successful states: {successful_states}")
            print(f"expert actions: {expert_actions}")
            print(f"expert actions shape: {expert_actions.shape}")
            print(f"successful states shape: {successful_states.shape}")

            # Compute loss
            loss = -policy.log_prob_t(successful_states, expert_actions).mean()

            # Update the learner policy
            loss.backward()
            optimizer.step()

        # Evaluate the current learner policy
        learner_trajectory_dataset = MushroomRLTrajectoryDataset(mdp, BCAgent(mdp.env_info, learner_policy), n_episodes=2)
        data_loader = DataLoader(learner_trajectory_dataset, batch_size=batch_size, shuffle=True)

        # Collect new expert actions for the new states
        for state in learner_trajectory_dataset:
            state_np = state['state'].float().unsqueeze(0).cpu().numpy()
            expert_action = expert_policy.draw_action(state_np).astype(np.float32)
            state['action'] = torch.from_numpy(expert_action).squeeze().tolist()

        # Add the new data to the dataset
        dataset.trajectories.extend(learner_trajectory_dataset.trajectories)
        dataset.length += learner_trajectory_dataset.length

    return learner_policy


# 1. defining BCAgent and expert agent to be trained in dagger

In [5]:
env = "3dof-hit"

mdp = AirHockeyChallengeWrapper(env)
mdp.reset()

# policy can only output 1D actions (6,) ... they need to be recast in (2,3) shape later on
policy = GaussianTorchPolicy(Network, (12,), (6,), std_0=1., n_features=64, use_cuda=use_cuda)

policy = policy.load('dataset/hit_500_policy')
bc_agent = BCAgent(mdp.env_info, policy)
dataset = MushroomRLTrajectoryDataset(mdp, bc_agent, n_episodes=10)


expert_agent = HittingAgentWait(mdp.env_info)


  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:02<00:00,  2.19s/it][A
 10%|█         | 1/10 [00:02<00:19,  2.21s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:04<00:00,  4.50s/it][A
 20%|██        | 2/10 [00:06<00:28,  3.56s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  1.05it/s][A
 30%|███       | 3/10 [00:07<00:16,  2.38s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:04<00:00,  4.87s/it][A
 40%|████      | 4/10 [00:12<00:20,  3.37s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:04<00:00,  4.77s/it][A
 50%|█████     | 5/10 [00:17<00:19,  3.88s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:04<00:00,  4.68s/it][A
 60%|██████    | 6/10 [00:22<00:16,  4.16s/it][A
  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:04<00:00,  4.65s/it][A
 70%|███████   | 7/10 [00:26<00:12,  

In [20]:
dagger_agent = train_dagger_agent(policy, expert_agent, mdp, dataset, num_iterations=1, batch_size=64, lr=0.001)

successful states: tensor([[ 0.8765,  0.0355,  0.0000,  0.0000,  0.0000,  0.0000, -1.1016,  1.2411,
          1.3769, -0.0406,  1.2615, -6.6864],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -1.0110,  1.0760,
          0.9594,  0.3649, -0.8919,  0.4557],
        [ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -1.0060,  1.0279,
          1.3844,  0.7014, -0.6074, -5.7935],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.6559,  0.6832,
          1.2903,  0.4457,  0.3626,  1.0613]])
expert actions: tensor([[-1.1045,  1.2578,  1.3055, -0.2475,  0.4063, -0.4574],
        [-1.0064,  1.0650,  0.9664,  0.0952, -0.2052,  0.2534],
        [-1.0003,  1.0243,  1.3239, -0.1181,  0.2116, -0.2241],
        [-0.6545,  0.6937,  1.2947, -0.3136,  0.6982, -0.6215]])
expert actions shape: torch.Size([4, 6])
successful states shape: torch.Size([4, 12])


Exception in thread Thread-14044:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False

ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 3.4265e+00 > 1.4364e+00
ERROR in osqp_setup: Problem data validation.
successful states: tensor([[ 1.2927e+00,  2.0504e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -7.3053e-01,  5.3752e-01,  7.0488e-01, -5.7626e-01,
          2.6118e+00, -7.3204e+00],
        [ 1.9817e+00, -4.0271e-01,  1.0411e+00, -6.9680e-02,  4.6303e-02,
          2.0197e+00,  1.4227e-01, -8.7709e-01, -2.0052e+00, -2.3426e-01,
         -7.3116e-01, -6.5113e+00],
        [ 1.0602e+00, -2.9419e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.0048e+00,  1.1381e+00,  1.1063e+00,  2.0726e+00,
         -4.4409e+00,  3.9288e+00],
        [ 1.0602e+00, -2.9419e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.0539e+00,  1.2420e+00,  1.0135e+00,  1.6063e+00,
         -2.9155e+00,  1.5912e+00],
        [ 1.2207e+00, -1.9818e-01, -1.5026e-04,  2.8333e+00, -1.9533e+00,
         -1.9625e-03, -8.

Exception in thread Thread-14258:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.a

successful states: tensor([[ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -0.7313,  0.6481,
          0.8347,  3.0138, -8.1607,  8.9815],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -1.0685,  1.1300,
          1.4436,  1.4320, -2.5474, -0.4850],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.9755,  1.0082,
          1.2509,  0.0424, -0.7991,  1.4941],
        [ 2.0027, -0.4166,  0.4428, -0.0801,  0.0533,  2.2590,  0.3799, -0.6689,
         -1.5559, -1.4207, -0.8794, -0.6162],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000,  0.5688, -0.7005,
         -0.8718,  2.5822, -7.3779, 11.4217],
        [ 1.0302,  0.0196,  0.0000,  0.0000,  0.0000,  0.0000, -0.7114,  0.6359,
          0.8402,  1.1990, -2.4023,  2.4565],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -0.1590, -0.0975,
          0.8611,  0.6129,  1.9707, -4.8866],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -0.1733, -0.1260,
          0

Exception in thread Thread-14517:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False

ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 3.4807e+00 > 1.9269e+00
ERROR in osqp_setup: Problem data validation.
successful states: tensor([[  1.2886,   0.2511,   0.0000,   0.0000,   0.0000,   0.0000,  -0.4777,
           0.4129,   1.1946,   0.5320,  -0.7642,   1.8169],
        [  0.9048,   0.3271,   0.0000,   0.0000,   0.0000,   0.0000,  -0.8013,
           0.6146,   1.4006,   0.7505,  -1.2609,   0.7921],
        [  1.2927,   0.2050,   0.0000,   0.0000,   0.0000,   0.0000,   0.0720,
           0.1868,   0.7699,   0.7902,  -1.7117,  -3.2668],
        [  1.9269,  -0.3663,   2.7054,  -0.0423,   0.0281,   1.3539,   1.6274,
          -0.3191,  -1.9965,   0.7364,   0.4591,  -4.3406],
        [  1.0602,  -0.2942,   0.0000,   0.0000,   0.0000,   0.0000,  -0.5643,
           0.2897,   1.1592,   1.9457,  -5.0813,   5.5064],
        [  1.0602,  -0.2942,   0.0000,   0.0000,   0.0000,   0.0000,   0.6453,
          -0.9535,  -0.4167,   1.9176,  -7.4612,  15.7078],
 

Exception in thread Thread-14674:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.a

successful states: tensor([[ 1.0302e+00,  1.9565e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -8.8278e-01,  8.4891e-01,  9.6841e-01,  2.9942e+00,
         -7.8649e+00,  8.0166e+00],
        [ 1.2056e+00,  1.7637e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1557e+00,  1.3002e+00,  1.4428e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3119e+00,  2.6857e-01,  1.1366e-05,  1.0955e+00,  8.2955e-01,
          5.7329e-04, -3.5439e-01,  5.1957e-01,  6.4625e-01, -5.4718e-01,
          2.9703e+00, -5.1194e+00],
        [ 1.2886e+00,  2.5110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1557e+00,  1.3002e+00,  1.4428e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.0482e-01,  3.2709e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.0055e+00,  1.0982e+00,  1.1812e+00,  2.2901e+00,
         -5.5597e+00,  8.9090e+00],
        [ 1.0602e+00, -2.9419e-01,  0.0000e+00,  0.0000e+00, 

Exception in thread Thread-14691:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False

ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 8.8230e+00 > -8.2098e-01
ERROR in osqp_setup: Problem data validation.


Exception in thread Thread-14703:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False

ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 8.3872e+00 > 1.3840e+00
ERROR in osqp_setup: Problem data validation.
successful states: tensor([[ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -1.0147,  1.1284,
          1.0610, -0.5459,  1.6511, -4.4341],
        [ 1.9790, -0.4009,  1.1212, -0.0683,  0.0454,  1.9876,  0.1221, -0.8666,
         -2.1583,  0.2053, -0.1018, -0.6037],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000, -0.7954,  0.6849,
          1.3942,  1.6128, -3.6740,  1.7211],
        [ 1.9803, -0.4018,  1.0813, -0.0690,  0.0458,  2.0036,  0.1333, -0.8900,
         -2.1054, -0.8398,  0.6464, -6.5007],
        [ 0.9858,  0.0382,  0.0000,  0.0000,  0.0000,  0.0000, -1.1449,  1.2887,
          1.2021,  1.7640, -3.9015,  4.2267],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.6712,  0.7460,
          1.2496,  0.0489,  1.1915, -0.4811],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000, -1.1557,  

Exception in thread Thread-15061:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.a

successful states: tensor([[ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -1.0949,  1.2220,
          1.3176,  0.2374,  0.1589, -4.2235],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.9612,  0.9467,
          1.0347, -0.3603,  0.8531, -2.1508],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -0.0175,  0.3928,
          0.7236,  0.1231,  0.8837, -3.7512],
        [ 1.2886,  0.2511,  0.0000,  0.0000,  0.0000,  0.0000, -0.6526,  0.9059,
          0.4079, -0.0278,  3.1873, -9.8805],
        [ 0.9858,  0.0382,  0.0000,  0.0000,  0.0000,  0.0000, -1.0228,  1.0382,
          1.4536,  1.3479, -2.8657,  2.4700],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.7614,  0.7024,
          1.0536,  2.3400, -5.6972, 10.1762]])
expert actions: tensor([[-1.0924,  1.2234,  1.2756,  0.0255, -0.0187,  0.0204],
        [-0.9648,  0.9553,  1.0133,  0.0055,  0.0071,  0.0117],
        [-0.0190,  0.4087,  0.6798, -0.2740,  0.7094, -0.6177],
        [-

Exception in thread Thread-15146:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=castin

successful states: tensor([[ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -1.1335,  1.2757,
          1.3829,  0.8154, -1.4739,  0.6765],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.9126,  0.8171,
          1.2274,  0.3020, -0.0340, -0.8233],
        [ 2.2073, -0.3971,  1.8658, -0.1892, -0.1744,  3.7795,  0.0911, -0.8131,
         -1.5732,  3.2578, -4.4670,  0.2921],
        [ 1.2886,  0.2511,  0.0000,  0.0000,  0.0000,  0.0000, -0.4215,  0.5245,
          0.9214,  1.1635, -0.6241, -3.0995],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000, -0.9782,  1.0993,
          1.0903,  1.7842, -3.6275,  2.5910]])
expert actions: tensor([[-1.1252,  1.2608,  1.3899,  0.0134, -0.0217,  0.0239],
        [-0.9112,  0.8203,  1.2158, -0.1557,  0.3609, -0.3376],
        [ 0.0911, -0.8131, -1.5732,  0.0000,  0.0000,  0.0000],
        [-0.4122,  0.5242,  0.8850, -0.2297,  0.5865, -0.5357],
        [-0.9600,  1.0621,  1.1172,  0.0299, -0.0889,  0.1015]])
expert a

Exception in thread Thread-15354:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.a

successful states: tensor([[ 1.2886e+00,  2.5110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -5.2704e-01,  5.2951e-01,  9.8344e-01,  3.4949e+00,
         -9.7918e+00,  1.4569e+01],
        [ 1.2927e+00,  2.0504e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -6.4436e-01,  3.4125e-01,  6.0018e-01,  8.7707e-01,
         -2.0199e+00,  1.6985e+00],
        [ 1.2886e+00,  2.5110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1169e+00,  1.2545e+00,  1.3806e+00, -7.2990e-01,
          2.5265e+00, -8.0312e+00],
        [ 1.0602e+00, -2.9419e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1433e+00,  1.3066e+00,  1.3401e+00, -2.8058e-01,
          1.7962e+00, -6.0174e+00],
        [ 1.2056e+00,  1.7637e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -6.7366e-01,  5.6975e-01,  6.5252e-01, -2.2729e-01,
          1.6368e+00, -4.8799e+00],
        [ 9.0482e-01,  3.2709e-01,  0.0000e+00,  0.0000e+00, 

Exception in thread Thread-15529:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False

ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 6.8751e+00 > -1.4147e+00
ERROR in osqp_setup: Problem data validation.
successful states: tensor([[ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -1.1497,  1.3028,
          1.3836, -0.0910,  0.7759, -2.6001],
        [ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -0.2348,  0.1063,
          0.8643,  1.5129, -3.6474,  4.9330],
        [ 1.9776, -0.4000,  1.1608, -0.0676,  0.0449,  1.9718,  0.1421, -0.8867,
         -2.1312,  1.3481, -1.9018,  3.3233],
        [ 1.2886,  0.2511,  0.0000,  0.0000,  0.0000,  0.0000, -0.4807,  0.4103,
          1.1869,  1.0553, -2.2612,  4.7434],
        [ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -0.6008,  0.3775,
          0.8364,  1.1533, -3.6061,  4.1886]])
expert actions: tensor([[-1.1503,  1.3100,  1.3581,  0.0326, -0.0502,  0.0562],
        [-0.2258,  0.0843,  0.9044, -0.5979,  1.3902, -0.8674],
        [ 0.1421, -0.8867, -2.1312,  0.0000,  0.0000, 

Exception in thread Thread-15850:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=castin

successful states: tensor([[ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -0.7513,  0.6272,
          0.4675, -0.2716,  1.9967, -7.5780],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.6519,  0.8686,
          0.9191,  1.4669, -2.3878,  1.8894],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -1.0127,  1.1395,
          1.0139,  0.9927, -2.5776,  4.1252],
        [ 2.2035, -0.4005,  1.9411, -0.1873, -0.1726,  3.7494,  0.1522, -0.9015,
         -1.5306,  2.5492, -2.8023, -0.3557],
        [ 1.2886,  0.2511,  0.0000,  0.0000,  0.0000,  0.0000, -0.9185,  0.9813,
          1.3949,  1.4689, -2.3508,  0.8193],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.5265,  0.5935,
          1.1717, -0.6469,  2.0340, -3.6767],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -1.0539,  1.1319,
          1.3182,  0.8939, -0.5411, -5.0607],
        [ 1.0302,  0.0196,  0.0000,  0.0000,  0.0000,  0.0000, -0.9389,  0.9482,
          1

Exception in thread Thread-16491:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=castin

successful states: tensor([[ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.8614,  0.7334,
          1.2910,  0.5120, -0.3205, -0.9164],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.9542,  0.9124,
          1.1021, -0.4083,  0.8845, -1.3925],
        [ 1.9995, -0.4145,  0.5324, -0.0786,  0.0522,  2.2231,  0.3121, -0.6620,
         -1.5694, -1.9580,  0.7780, -1.9423],
        [ 1.2056,  0.1764,  0.0000,  0.0000,  0.0000,  0.0000, -0.2728,  0.2066,
          0.7145,  1.0232, -2.4882,  3.5080]])
expert actions: tensor([[-0.8587,  0.7356,  1.2768, -0.2412,  0.5444, -0.4942],
        [-0.9587,  0.9223,  1.0872, -0.0417,  0.1143, -0.1004],
        [ 0.3121, -0.6620, -1.5694,  0.0000,  0.0000,  0.0000],
        [-0.2667,  0.1921,  0.7419, -0.3988,  0.9844, -0.7088]])
expert actions shape: torch.Size([4, 6])
successful states shape: torch.Size([4, 12])
successful states: tensor([[ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -0.9834,  1.0461,
          1.1

Exception in thread Thread-16630:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=castin

successful states: tensor([[ 1.0602e+00, -2.9419e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  6.4362e-01, -9.3698e-01, -3.0605e-01, -9.4824e-01,
          4.2531e+00, -5.7675e+00],
        [ 1.2886e+00,  2.5110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1115e+00,  1.3339e+00,  1.0527e+00,  1.1179e+00,
         -1.3193e+00,  3.0406e-01],
        [ 1.9964e+00, -4.1245e-01,  6.2061e-01, -7.7006e-02,  5.1172e-02,
          2.1879e+00,  2.3398e-01, -6.2297e-01, -1.6427e+00, -8.8438e-01,
         -1.5499e+00,  2.5061e-01],
        [ 9.8584e-01,  3.8181e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -1.1339e+00,  1.2626e+00,  1.4555e+00,  3.2963e-01,
         -6.1771e-01, -1.3599e-01],
        [ 1.2927e+00,  2.0504e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -6.3976e-01,  2.3636e-01,  1.3230e+00, -3.3564e-01,
          4.4576e-01, -9.9846e-01],
        [ 9.0482e-01,  3.2709e-01,  0.0000e+00,  0.0000e+00, 

Exception in thread Thread-17058:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=castin

successful states: tensor([[ 1.9979, -0.4135,  0.5767, -0.0778,  0.0517,  2.2054,  0.2788, -0.6548,
         -1.5907, -1.9266,  0.9168, -2.4920],
        [ 1.6782,  0.4287,  0.8315,  0.7770, -0.6016,  8.4450, -0.2848,  0.4639,
          0.4512,  2.0301, -6.5421,  5.9445],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000, -1.1451,  1.2821,
          1.4502,  0.1966, -0.3076,  0.0409],
        [ 1.0602, -0.2942,  0.0000,  0.0000,  0.0000,  0.0000, -1.1374,  1.3272,
          1.2187,  0.0569,  1.1062, -4.6527],
        [ 0.9048,  0.3271,  0.0000,  0.0000,  0.0000,  0.0000, -0.9696,  0.9734,
          1.3042, -0.5275,  0.7966, -2.0930],
        [ 1.2927,  0.2050,  0.0000,  0.0000,  0.0000,  0.0000, -1.0412,  1.1228,
          1.2422,  0.3685,  0.6668, -6.3240]])
expert actions: tensor([[ 0.2788, -0.6548, -1.5907,  0.0000,  0.0000,  0.0000],
        [-0.2848,  0.4639,  0.4512,  0.0000,  0.0000,  0.0000],
        [-1.1433,  1.2792,  1.4503, -0.0211,  0.0221, -0.0276],
        [-

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s][A[A

100%|██████████| 1/1 [00:04<00:00,  4.41s/it][A[A

 50%|█████     | 1/2 [00:04<00:04,  4.42s/it][A[A

  0%|          | 0/1 [00:00<?, ?it/s][A[A

100%|██████████| 1/1 [00:04<00:00,  4.40s/it][A[A

100%|██████████| 2/2 [00:08<00:00,  4.42s/it][A[A


AttributeError: 'MushroomRLTrajectoryDataset' object has no attribute 'trajectories'

# testing expert agent

In [9]:
data_loader = DataLoader(dataset, batch_size=64, shuffle=True)

for i, data in enumerate(data_loader):
    states = data['state'].float()
    for state in states:
        expert_agent.reset()
        state_np = state.cpu().numpy()
        expert_action = expert_agent.draw_action(state_np)
        print(f"expert_action: {expert_action}")
        print(f"optimization_failed: {expert_agent.optimization_failed}")
    # expert_actions = expert_agent.draw_action(states).float()

expert_action: [[-0.00474991  0.35589615 -0.5072534 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.18190074 -0.4883073   0.8293152 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.6237628   0.49058458  0.4572864 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.6604817   0.5212704   0.969205  ]
 [-0.15149744  0.37543228 -0.3254726 ]]
optimization_failed: False
expert_action: [[-0.3713106   0.5788531   0.14347985]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.8400942   0.57402956 -0.5160753 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[0.18532197 0.13293336 0.03667677]
 [0.         0.         0.        ]]
optimization_failed: True
expert_action: [[-0.10467133 -0.07287078  0.24252151]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.2975505   0.21409406  1.0

Exception in thread Thread-4018:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.ar

expert_action: [[ 0.15865289 -0.44082004  0.2523563 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.04151758 -0.13285692  0.57445925]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.259395    0.50566864 -0.5159055 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.8010943  0.8151319  0.8561258]
 [ 0.         0.         0.       ]]
optimization_failed: False
expert_action: [[-0.03954968 -0.09363928  0.8600023 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.08591827  0.06232821  0.4467567 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.39174122 -0.61539084 -0.31317687]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.16239703 -0.4859524   0.29144177]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.2058224  -0.30752552 -0.0

Exception in thread Thread-4255:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False,

expert_action: [[-0.44493315  0.50703895  1.028452  ]
 [-0.38755116  0.92173886 -0.75705886]]
optimization_failed: False
expert_action: [[-0.04764035  0.5493459  -0.613633  ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.2871652   0.17682706  0.992462  ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.9479595   1.0538996   0.71533483]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.05045802  0.5396129  -0.77464026]
 [ 0.          0.          0.        ]]
optimization_failed: True
ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 3.4265e+00 > 1.4364e+00
ERROR in osqp_setup: Problem data validation.
expert_action: [[ 0.14226988 -0.87709254 -2.005182  ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[ 0.07306751  0.39050758 -0.10339569]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_actio

Exception in thread Thread-4420:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-0.13134968  0.14803529  0.35780385]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.31430766  0.33941346  0.49475744]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.2787915  -0.65479153 -1.5906917 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.10257582  0.12261888  0.1679051 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.14340541  0.02520191 -0.5296495 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[0.17991996 0.41836154 0.07481731]
 [0.         0.         0.        ]]
optimization_failed: True
expert_action: [[ 0.13423087  0.13567248 -0.29127008]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[0.30305916 0.3331746  0.21795753]
 [0.         0.         0.        ]]
optimization_failed: True
expert_action: [[ 0.3717156  -0.71710956 -0.0060628

Exception in thread Thread-4642:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-0.9784547   0.9956266   0.9239848 ]
 [ 0.07718238 -0.17397662  0.22046627]]
optimization_failed: False
expert_action: [[ 0.3120535  -0.66199684 -1.5693977 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-1.105415    1.2529955   1.2631328 ]
 [ 0.07739013 -0.13654026  0.15534136]]
optimization_failed: False
expert_action: [[-0.11890393 -0.17276096  0.44214526]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.00083233 -0.07792191 -0.21223128]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.35042456  0.01107498  0.2698251 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 1.7752268  -0.56540567 -1.5360508 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.15893206 -0.0714314  -0.05808924]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.15358731 -0.14488

Exception in thread Thread-4805:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-0.6927767   0.5486653   0.8923408 ]
 [-0.14772324  0.39243892 -0.35721573]]
optimization_failed: False
expert_action: [[ 0.0910794  -0.81312615 -1.5732309 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.59219825  0.40939984  1.0822121 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.01608523 -0.1277473   0.18838719]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.3542259  -0.48182753  0.1569758 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.06404203 -0.26213816  0.5596188 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 1.7847697 -0.6091575 -1.6767838]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[-1.3170174   0.48797444 -0.74587977]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.50905555 -0.50744426 -0.

Exception in thread Thread-4878:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False,

expert_action: [[-0.04332216  0.18131313  0.07897364]
 [ 0.          0.          0.        ]]
optimization_failed: True
ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 6.8751e+00 > -1.4147e+00
ERROR in osqp_setup: Problem data validation.
expert_action: [[ 0.14209    -0.88668084 -2.131177  ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.45871595 -0.88799983 -1.7673175 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.01195845 -0.3721832  -0.43732822]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.7493014 -0.3371949 -0.9292635]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[-0.23046404  0.03752451  1.2083248 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.2191987  0.8036189 -0.5747787]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[ 0.1768

Exception in thread Thread-5049:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.ar

expert_action: [[-0.84270144  1.0157673   0.31597206]
 [ 0.16658016 -0.62464446  0.85502446]]
optimization_failed: False
expert_action: [[ 0.5330235 -0.5483017 -0.5162173]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[ 0.34703645 -0.66932356 -1.5550904 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[ 0.36935675 -0.27815923 -0.52416986]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.11055823 -0.04139642  0.60617507]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.62781996  0.48809043  0.92630696]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.15275536  0.13519959  0.10403015]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.20421211 -0.07062083 -0.17004035]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.52553016  0.84249043 -0.

Exception in thread Thread-5109:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-0.11928453 -0.06941381 -0.3681254 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.12844163 -0.13211305  0.12725143]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.24471438 -0.00381829 -0.24484073]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.25235665  0.1083178   1.0727016 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.28477576  0.46391872  0.45118263]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[ 0.74057436 -1.1645209  -1.4158075 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 1.354802   -0.22379322 -0.74183625]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.0391736   1.1791614   1.0626118 ]
 [ 0.16153133 -0.34604296  0.4325942 ]]
optimization_failed: False
expert_action: [[-0.43210918  0.255088

Exception in thread Thread-5263:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False,

expert_action: [[-0.16915008  0.07848694  0.35732728]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.60295105  0.46893346  0.9942728 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.15538576 -0.7599411  -1.6628653 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.1883965   0.12696654 -0.49579388]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.22751787  0.07642453 -0.41387066]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.07693227 -0.16904156 -0.03725892]
 [ 0.          0.          0.        ]]
optimization_failed: True
ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 8.3872e+00 > 1.3840e+00
ERROR in osqp_setup: Problem data validation.
expert_action: [[ 0.13330626 -0.88995194 -2.1054447 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action

Exception in thread Thread-5385:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.ar

expert_action: [[ 0.06704289 -0.13969721  0.48282743]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.3798768  -0.66892296 -1.5559297 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.604116    0.39575136  0.71246624]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.35171327  0.30248484  0.66966814]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.07373026 -0.13590662  0.73043483]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.45274404  0.20624374  1.0749692 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.360843    0.45277607  0.7096073 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.16660345  0.13666813 -0.2151777 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.29943982 -0.3567969

Exception in thread Thread-5743:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False,

expert_action: [[-0.37591225 -0.01864379  0.53414375]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[0.11916947 0.16905515 0.23280324]
 [0.         0.         0.        ]]
optimization_failed: True
ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 3.4807e+00 > 1.9269e+00
ERROR in osqp_setup: Problem data validation.
expert_action: [[ 1.6273971 -0.3190639 -1.9964571]
 [ 0.         0.         0.       ]]
optimization_failed: False
expert_action: [[ 0.4827374  -0.18701337 -0.3501824 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.02579224 -0.20619932 -0.30417946]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.38692918 -0.38909867 -0.33378315]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.0623136  1.1467687  1.2957568]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[ 0.79123205  0

Exception in thread Thread-5806:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-1.0914063   1.1830581   1.4001399 ]
 [-0.03026531  0.0507515  -0.05821444]]
optimization_failed: False
expert_action: [[ 0.23398411 -0.6229735  -1.6426678 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[ 0.4965134 -0.3560663 -0.7279221]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[ 0.4659239  -0.40618727 -0.67332494]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.7804661   0.78868514  0.7790773 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.6151435   0.49441126  0.6751603 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.96479034  0.9552556   1.0132654 ]
 [ 0.00552326  0.00711024  0.01173569]]
optimization_failed: False
expert_action: [[-0.00877555  0.3481046   0.5387198 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.30290172 -1.3305142  -0

Exception in thread Thread-5826:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.ar

expert_action: [[ 0.09876818 -0.01010689  0.4236384 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.00231881 -0.6152347   0.549082  ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.3952087  -0.00613405  0.26545808]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.23174815  0.03998184 -0.3308175 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.02625399 -0.07825845  0.7908446 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.18882649 -0.90680295 -1.6277595 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.01459786 -0.26563686 -0.46353242]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.49783498  0.29945016  0.89871913]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.0018064   1.1116755

Exception in thread Thread-6044:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 122, in plan_ee_trajectory
    p = np.hstack([p, np.ones((p.shape[0], 1)) * self.ee_height])
  File "<__array_function__ internals>", line 200, in hstack
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/site-packages/numpy/core/shape_base.py", line 368, in hstack
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting

expert_action: [[-1.1181271   1.2651008   1.285372  ]
 [ 0.03979941 -0.0671889   0.07435513]]
optimization_failed: False
expert_action: [[ 0.15221299 -0.9014953  -1.530576  ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-1.4358     0.4306963 -0.3983826]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[-0.02076995 -0.5209251   0.06268455]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.5375551  -0.6034368   0.03614739]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.72403544 -0.48639703 -0.34786612]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.1221437   1.2906765   1.3009112 ]
 [ 0.01678667 -0.02582353  0.0264991 ]]
optimization_failed: False
expert_action: [[-0.40601897  0.60681045  0.08583099]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.54156834  0.39741042  0

Exception in thread Thread-6081:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 86, in _plan_trajectory_thread
    _, joint_pos_traj = self.optimizer.optimize_trajectory(ee_traj, joint_pos, joint_vel, q_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 35, in optimize_trajectory
    success, dq_next = self._solve_aqp(des_point[:3], q_cur, dq_anchor)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/baseline/baseline_agent/optimizer.py", line 63, in _solve_aqp
    solver.setup(P=sparse.csc_matrix(P), q=q, A=sparse.csc_matrix(A), l=l, u=u, verbose=False,

expert_action: [[-0.75672007  0.6270686   1.2844229 ]
 [-0.35209808  0.74951625 -0.6568871 ]]
optimization_failed: False
expert_action: [[ 0.13669492 -0.07882933  0.22655794]
 [ 0.          0.          0.        ]]
optimization_failed: True
ERROR in validate_data: Lower bound at index 2 is greater than upper bound: 8.8230e+00 > -8.2098e-01
ERROR in osqp_setup: Problem data validation.
expert_action: [[ 0.12213133 -0.8665892  -2.1582603 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[-0.06129466  0.21133845 -0.2849607 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.83839035 -0.62171817 -1.1291972 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.3143282   0.20523943  0.9006417 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.11462268 -0.26904494 -0.629176  ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_acti

Exception in thread Thread-7011:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/air_hockey_challenge/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 85, in _plan_trajectory_thread
    ee_traj, hit_idx, q_anchor = self.plan_ee_trajectory(puck_pos, ee_pos)
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in plan_ee_trajectory
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.arange(0, self.bezier_planner.t_final + 1e-6,
  File "/Users/annetteader/PycharmProjects/air_hockey_challenge/examples/control/hitting_agent_wait.py", line 116, in <listcomp>
    res = np.array([self.bezier_planner.get_point(t_i) for t_i in np.ar

expert_action: [[-0.7499143   0.7784129   0.61364454]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.72624916  0.3337437  -0.3272969 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.8658305   0.81734294  1.1154131 ]
 [ 0.          0.          0.        ]]
optimization_failed: False
expert_action: [[ 0.18688375  0.07923381 -0.5553619 ]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-0.6656602   0.56562847  0.6301968 ]
 [-0.03941377  0.11725168 -0.09641077]]
optimization_failed: False
expert_action: [[-0.29255924  0.187323    0.45349982]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[ 0.14387763 -0.01725383 -0.15805583]
 [ 0.          0.          0.        ]]
optimization_failed: True
expert_action: [[-1.0143994  0.5898604 -0.3815038]
 [ 0.         0.         0.       ]]
optimization_failed: True
expert_action: [[-0.02091914  0.31819472  0.