In [14]:
from math import radians, sqrt, sin, cos, log, inf
import random
import torch as torch
import numpy as np
from collections import deque




# Part a

In [15]:
def compute_values(goal, starting_values):

    x_goal, y_goal, z_goal = goal

    t_1_int, d_2_int, d_3_int, t_4_int, t_5_int = starting_values
    t_1, d_2, d_3, t_4, t_5 = starting_values

    # Constant
    t_6 = radians(40)
    d_6 = .2

    #Arbitrary
    d_1 = 3
    threshold = 0.001

    x = (cos(t_1) * cos(t_4) * sin(t_5) * d_6) - (sin(t_1) * cos(t_5) * d_6) - (sin(t_1) * d_3)
    y = (sin(t_1) * cos(t_4) * sin(t_5) * d_6) + (cos(t_1) * cos(t_5) * d_6) + (cos(t_1) * d_3)
    z = (sin(t_4) * sin(t_5) * d_6) + d_1 + d_2

    def dist(x1, x2, y1, y2, z1, z2):
        return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)

    change = 0
    n = 1
    while(n > 0):
        
        factor = dist(x, x_goal, y, y_goal, z, z_goal) * 2
        t_1_diff = (random.random()-0.5) * factor
        t_4_diff = (random.random()-0.5) * factor
        t_5_diff = (random.random()-0.5) * factor
        d_2_diff = (random.random()-0.5) * factor
        d_3_diff = (random.random()-0.5) * factor

        t_1_temp = t_1 - t_1_diff
        t_4_temp = t_4 - t_4_diff
        t_5_temp = t_5 - t_5_diff
        d_2_temp = d_2 - d_2_diff
        d_3_temp = d_3 - d_3_diff

        change += abs(t_1_diff) + abs(t_4_diff) + abs(t_5_diff) + abs(d_2_diff) + abs(d_3_diff)

        x_tmp = (cos(t_1_temp) * cos(t_4_temp) * sin(t_5_temp) * d_6) - (sin(t_1_temp) * cos(t_5_temp) * d_6) - (sin(t_1_temp) * d_3_temp)
        y_tmp = (sin(t_1_temp) * cos(t_4_temp) * sin(t_5_temp) * d_6) + (cos(t_1_temp) * cos(t_5_temp) * d_6) + (cos(t_1_temp) * d_3_temp)
        z_tmp = (sin(t_4_temp) * sin(t_5_temp) * d_6) + d_1 + d_2_temp

        if abs(x_tmp - x_goal) < threshold and abs(y_tmp - y_goal) < threshold and abs(z_tmp - z_goal) < threshold:
            # print("success")
            # print(f"Final XYZ {x_tmp:.4}, {y_tmp:.4}, {z_tmp:.4},")
            # print(f"Final thetas {t_1_temp=:.4} {t_4_temp=:.4} {t_5_temp=:.4} {d_2_temp=} {d_3_temp=}")
            # print("num iter {}".format(n))
            # print('Change: ', change)
            return (x_tmp, y_tmp, z_tmp), (t_1_temp, d_2_temp, d_3_temp, t_4_temp, t_5_temp), change, n
        
        if dist(x_goal, x, y_goal, y, z_goal, z) < dist(x_goal, x_tmp, y_goal, y_tmp, z_goal, z_tmp):
            pass
        else:
            t_1 = t_1_temp 
            t_4 = t_4_temp
            t_5 = t_5_temp
            d_2 = d_2_temp
            d_3 = d_3_temp

            x = x_tmp
            y = y_tmp
            z = z_tmp

        n += 1

In [16]:
goal = (1.2, 0.8, 0.5)
starting_values = (radians(-90), 0.5, 1, radians(-90), radians(90))
position, values, change, iterations = compute_values(goal, starting_values)
x, y, z = position
t_1, d_2, d_3, t_4, t_5 = values

print("success")
print(f"Final XYZ: {x:.4}, {y:.4}, {z:.4},")
print(f"Final values: {t_1=:.4} {t_4=:.4} {t_5=:.4} {d_2=} {d_3=}")
print("Iterations: {}".format(iterations))
print('Change: ', change)

success
Final XYZ: 1.2, 0.7999, 0.4994,
Final values: t_1=-1.009 t_4=-4.502 t_5=2.016 d_2=-2.677064776479278 d_3=1.527912654593763
Iterations: 98
Change:  77.44061701195574


# Part b

This is what we came up with after trying for too long to solve it using a neural network RL approach.

In [17]:
min_change = inf
saved = None

goal = (1.2, 0.8, 0.5)
starting_values = (radians(0), 0.2, .3, radians(-90), radians(90))

for iteration in range(1, 100000):
    position, values, change, iterations = compute_values(goal, starting_values)
    if change < min_change:
        min_change = change
        saved = (position, values, change, iterations)


position, values, change, iteration = saved
x, y, z = position
t_1, d_2, d_3, t_4, t_5 = values

print("success")
print(f"Final XYZ: {x:.4}, {y:.4}, {z:.4},")
print(f"Final values: {t_1 = :.4} {t_4 = :.4} {t_5 = :.4} {d_2 = :.4} {d_3 = :.4}")
print("Iterations: {}".format(iterations))
print('Change: ', change)

success
Final XYZ: 1.2, 0.8, 0.5004,
Final values: t_1 = -0.896 t_4 = -0.674 t_5 = 0.9246 d_2 = -2.4 d_3 = 1.316
Iterations: 63
Change:  8.111665934888014


We tried to train a neural network to do this but it had some serious issues. I think we approached the problem wrong. Below is our initial attempt at 3b.

In [18]:
class Memory(object):
    def __init__(self, max_size=100):
        self.memory = deque(maxlen=max_size)

    def push(self, element):
        self.memory.append(element)

    def get_batch(self, batch_size=4):
        if batch_size > len(self.memory):
            batch_size = len(self.memory)
        return random.sample(self.memory, batch_size)

    def __repr__(self):
        return f"Current elements in memory: {len(self.memory)}"

    def __len__(self):
        return len(self.memory)

In [19]:
class Environment:

    def __init__(self):
        self.x_goal = 1.2
        self.y_goal = 0.8
        self.z_goal = 0.5
        # Constant
        self.d_6 = .2
        self.d_1 = 3
        self.threshold = 0.01

        self.positions = (radians(0),
                    0.2,
                    .3,
                    radians(-90),
                    radians(90))
        self.x = inf
        self.y = inf
        self.z = inf

    def calculatePosition(self, positional_info):
        t_1, d_2, d_3, t_4, t_5 = positional_info
        
        
        x = (cos(t_1) * cos(t_4) * sin(t_5) * self.d_6) - (sin(t_1) * cos(t_5) * self.d_6) - (sin(t_1) * d_3)
        y = (sin(t_1) * cos(t_4) * sin(t_5) * self.d_6) + (cos(t_1) * cos(t_5) * self.d_6) + (cos(t_1) * d_3)
        z = (sin(t_4) * sin(t_5) * self.d_6) + self.d_1 + d_2

        return (x, y, z)

    def step(self, positional_info, steps):
        # self.positions = positional_info
        x_new, y_new, z_new = self.calculatePosition(positional_info)
        x_current, y_current, z_current = self.calculatePosition(self.positions)

        new_distance_to_end = self.dist(x_new, self.x_goal, y_new, self.y_goal, z_new, self.z_goal)
        current_distance_to_end = self.dist(x_current, self.x_goal, y_current, self.y_goal, z_current, self.z_goal)

        if new_distance_to_end < self.threshold:
            self.positions = positional_info
            done = True
            reward = 500 - steps if steps < 500 else 0
        elif new_distance_to_end < current_distance_to_end:
            self.positions = positional_info
            done = False
            reward = 0
        else:
            done = False
            reward = 0
            
        return (done, np.array(self.positions), reward)

    def dist(self, x1, x2, y1, y2, z1, z2):
        return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)

    def reset(self):
        self.positions = (radians(0),
                    0.2,
                    .3,
                    radians(-90),
                    radians(90))

        return (False, np.array(self.positions), 0)

    def getPositions(self):
        return self.positions

    def getDistanceFromGoal(self):
        x, y, z = self.calculatePosition(self.positions)
        return self.dist(x, self.x_goal, y, self.y_goal, z, self.z_goal)

In [20]:
class NN(torch.nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.layer1 = torch.nn.Linear(5, 5)
        self.layer2 = torch.nn.Linear(5, 10)
    
    def forward(self, input):
        layer1_output = self.layer1(input)

        layer1_relu = torch.nn.functional.relu(layer1_output)

        layer2_output = self.layer2(layer1_relu)

        return layer2_output

In [21]:
class Agent(object):
    def __init__(self):
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = NN().to(self.device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3)
        self.loss_function = torch.nn.MSELoss()
        
        self.decay = 0.995
        self.randomness = 1.0
        self.min_randomness = 0.01
        self.move_step_size = 1.0


    def act(self, state):
        state = torch.from_numpy(state).to(self.device)
        probs = self.model(state.float()).cpu()
        
        if np.random.rand() <= self.randomness:
            action = np.random.randint(low=0, high=probs.size(dim=0))
        else:
            action = torch.argmax(probs).item()

        if action > 4:
            state[action - 5] -= self.move_step_size
        else:
            state[action] += self.move_step_size

        return action, state.numpy()

    def update(self, memories):
        states, next_states, actions, rewards = self.unpack_batch(memories)

        old_targets = self.old_targets(states, actions)
        new_targets = self.new_targets(states, next_states, rewards, actions)

        loss = torch.nn.functional.smooth_l1_loss(old_targets, new_targets)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()

    def unpack_batch(self, batch):
        states, next_states, actions, rewards = zip(*batch)

        states = torch.tensor(states).float().to(self.device)
        next_states = torch.tensor(next_states).float().to(self.device)

        actions = torch.tensor(actions).unsqueeze(1).long().to(self.device)
        rewards = torch.tensor(rewards).unsqueeze(1).float().to(self.device)

        return states, next_states, actions, rewards

    def old_targets(self, states, actions):
        return self.model(states).gather(1, actions)

    def new_targets(self, states, next_states, rewards, actions):
        return rewards + torch.amax(self.model(next_states), dim=1, keepdim=True)

    def update_randomness(self):
        self.randomness *= self.decay
        self.randomness = max(self.randomness, self.min_randomness)
    
    def update_move_step(self, distance):
        self.move_step_size = distance / 2


In [22]:
agent = Agent() 
env = Environment()
memory = Memory(max_size=30000)

max_iteration = 1000
logging_iteration = 50
learning = []
losses = []
distances = []

for iteration in range(1, max_iteration - 1):
    steps = 0
    done = False
    
    done, state, reward = env.reset()

    while not done:

        action, position = agent.act(state)
        done, next_state, reward = env.step(position, steps)
        memory.push(element=(state, next_state, action, reward))

        agent.update_move_step(env.getDistanceFromGoal())

        state = next_state
        steps += 1

        if done:
            distances.append(env.getDistanceFromGoal())


    for _ in range(100):
        memory_batch = memory.get_batch(batch_size=400)
        loss = agent.update(memory_batch)
    losses.append(loss)
    agent.update_randomness()
    learning.append(steps)

    
    if iteration % logging_iteration == 0:
        print(f"  Iteration: {iteration}")
        print(f"  Average Final Distance: {np.mean(distances[-logging_iteration:]):.4f}")
        print(f"  Average Steps: {np.mean(learning[-logging_iteration:]):.4f}")
        print(f"  Average Loss: {np.mean(losses[-logging_iteration:]):.4f}")
        print(f"  Agent Randomness: {agent.randomness:.3f}")
        print()



  states = torch.tensor(states).float().to(self.device)


  Iteration: 50
  Average Final Distance: 0.0076
  Average Steps: 99.1200
  Average Loss: 3.8565
  Agent Randomness: 0.778

  Iteration: 100
  Average Final Distance: 0.0077
  Average Steps: 124.3800
  Average Loss: 4.1295
  Agent Randomness: 0.606

  Iteration: 150
  Average Final Distance: 0.0075
  Average Steps: 159.5600
  Average Loss: 3.1726
  Agent Randomness: 0.471



KeyboardInterrupt: 