In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from torch.nn import Parameter

from torch.optim import Adam
from torch.optim import RMSprop

from treeQN.treeqn_traj_simplest import TreeQN
import random

import pandas as pd

In [2]:
def get_start(size):
    goal_point = (size//2, size//2)
    start_point = -1
    while True:
        start_point = (random.randint(0,size), random.randint(0,size))
        if goal_point != start_point:
            break
    return start_point, goal_point

In [3]:
def hard_policy(state,goal_point):
    goal_x, goal_y = goal_point
    x,y = state
    x_right = goal_x > x # if goal is right
    x_left = goal_x < x # if goal is left
    y_up = goal_y > y # if goal is above
    y_down = goal_y < y # if goal is below
    possible_next_states = []
    if x_right:
        possible_next_states.append((x+1,y))
    if x_left:
        possible_next_states.append((x-1,y))
    if y_up:
        possible_next_states.append((x,y+1))
    if y_down:
        possible_next_states.append((x,y-1))
    if len(possible_next_states) == 0:
        return -1
    return random.choice(possible_next_states)



In [4]:
def point_to_tensor(point,goal,size,noise=0):
    x,y = point
    x_goal, y_goal = goal
    tensor = torch.zeros(size,size)
    scale = 1
    tensor[x][y] = 1 * scale
    # tensor[x+1][y] = 1 * scale
    # tensor[x][y+1] = 1 * scale
    # tensor[x+1][y+1] = 1
    tensor[x_goal][y_goal] = -1 * scale
    # tensor[x_goal+1][y_goal] = -1 * scale
    # tensor[x_goal][y_goal+1] = -1 * scale
    # tensor[x_goal+1][y_goal+1] = -1 * scale
    #add gaussian noise
    tensor = tensor + torch.randn(tensor.size()) * noise
    
    return tensor

In [5]:
def get_trajectory(size,start_point = None, goal_point = None):
    trajectory = []
    if start_point is None:
        start, goal = get_start(size)
    else:
        start, goal = start_point, goal_point
    trajectory.append(start)
    while start != goal:
        start = hard_policy(start,goal)
        trajectory.append(start)
    if len(trajectory) != 6:
        return get_trajectory(size)
    trajectory = trajectory[:5] #dont turn into goal
    return [point_to_tensor(p,goal,size).unsqueeze(0).unsqueeze(0) for p in trajectory]

In [6]:
def max_starting_points(size):
    start_points = []
    for i in range(10000):
        start_points.append(get_start(size)[0])
    start_points = set(start_points)
    goal_point = (size//2, size//2)
    return start_points, goal_point

In [7]:
size = 20
s, goal_point = max_starting_points(size)
start_points = list(s)
train_start_points = start_points[:len(start_points)//2]
test_start_points = start_points[len(start_points)//2:]

train_data = [get_trajectory(size,start_point,goal_point) for start_point in train_start_points]
valid_data = [get_trajectory(size,start_point,goal_point) for start_point in test_start_points]

<h1> Model Here

In [8]:
train_data[0][0].shape

torch.Size([1, 1, 20, 20])

In [25]:
input_shape = train_data[0][0].shape# minimum size #train_data[0][0].shape
num_actions = 1
tree_depth = 4
embedding_dim = 64
td_lambda = 1
gamma = 0.9    #0.99
decode_dropout = 0.5
t1 = False #True is Einsum. False +dx 
model = TreeQN(input_shape=input_shape, num_actions=num_actions, tree_depth=tree_depth, embedding_dim=embedding_dim, td_lambda=td_lambda,gamma=gamma,decode_dropout=decode_dropout,t1=False)
optimizer = Adam(model.parameters(), lr=1e-4)
#optimizer = RMSprop(model.parameters(), lr=1e-4,alpha =0.99, eps = 1e-5) | loss from treeqn paper
# Collect all encoder and decoder parameters

Einsum Transiton


In [26]:
#model.decoder(model.encoder(train_data[0][0])).shape

In [27]:
def validate(model, valid_data):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    with torch.no_grad():  # Disable gradient calculation
        for t in valid_data:
            decoded_values, all_policies = model(t[0])
            decode_loss = F.mse_loss(decoded_values[0], t[0], reduction='sum')

            first_policy = all_policies[0]
            second_policy = all_policies[1].view(num_actions, -1)
            third_policy = all_policies[2].view(num_actions, num_actions, -1)
            fourth_policy = all_policies[3].view(num_actions, num_actions, num_actions, -1)

            second_layer_probs = first_policy * second_policy
            third_layer_probs = second_layer_probs * third_policy
            fourth_layer_probs = third_layer_probs * fourth_policy

            first = torch.flatten(first_policy).view(num_actions, 1, 1, 1)
            second = torch.flatten(second_layer_probs).view(num_actions**2, 1, 1, 1)
            third = torch.flatten(third_layer_probs).view(num_actions**3, 1, 1, 1)
            fourth = torch.flatten(fourth_layer_probs).view(num_actions**4, 1, 1, 1)

            first_loss = (F.mse_loss(decoded_values[1], t[1], reduction='none') * first).sum()
            second_loss = (F.mse_loss(decoded_values[2], t[2], reduction='none') * second).sum()
            third_loss = (F.mse_loss(decoded_values[3], t[3], reduction='none') * third).sum()
            fourth_loss = (F.mse_loss(decoded_values[4], t[4], reduction='none') * fourth).sum()

            l2w, l3w, l4w = 1, 1, 1
            total_loss += first_loss + second_loss * l2w + third_loss * l3w + fourth_loss * l4w + decode_loss

    return total_loss / len(valid_data)

In [28]:
def store_gradients(model):
    gradients = []
    for name, param in model.named_parameters():
        if param.grad is not None:
            gradients.append([name, param.grad.norm().item()])
    return gradients

In [29]:

# # Regularization strength (lambda)
# lambda_reg = 0 #1e-5

# all_gradients = []
# raw_losses = []
# for epoch in range(10000):
#     model.train()
#     total_loss = 0
#     for t in random.sample(train_data, len(train_data)):
#         for sample in t:
#             optimizer.zero_grad() 
#             encoding = model.encoder(sample)
#             decoding = model.decoder(encoding)
            
#             # Compute reconstruction loss (MSE)
#             reconstruction_loss = F.mse_loss(decoding, sample)
            
#             # Compute L2 regularization (sum of squared weights)
#             l2_reg = 0
#             for param in model.parameters():
#                 l2_reg += torch.sum(param ** 2)
            
#             # Combine losses
#             loss = reconstruction_loss + lambda_reg * l2_reg
            
#             loss.backward()
#             optimizer.step()
#             total_loss += loss.item()
        
#         all_gradients.append(store_gradients(model))
#         raw_losses.append(total_loss)
    
#     print(f'Epoch {epoch}, Loss: {total_loss}')
# #max loss is 0.53 for embed dim 64 

In [30]:
# #freeze encoder and decoder
# for param in model.encoder.parameters():
#     param.requires_grad = False
# for param in model.decoder.parameters():
#     param.requires_grad = False

In [31]:
# #test_autoencode ability
# with torch.no_grad():
#     test_sample = random.choice(valid_data)
#     encoding = model.encoder(test_sample[0])
#     decoding = model.decoder(encoding)
#     print('Original:', test_sample[0].numpy())
#     print('Reconstructed:', np.round(decoding.numpy()))

In [32]:
all_gradients = []
raw_losses = []
model.train()  
s1 = num_actions
s2 = num_actions**2
s3 = num_actions**3
s4 = num_actions**4

for epoch in range(300):  # epochs
# Set the model to training mode
    avg_loss = 0
    temp_loss = 0
    temp_raw_loss = 0
    sample_count = 0

    avg_decode_loss, avg_first_loss, avg_second_loss, avg_third_loss, avg_fourth_loss = 0, 0, 0, 0, 0

    avg_raw_loss = 0
    avg_valid_loss =0
    raw_gradients = []
    for t in random.sample(train_data, len(train_data)):  # sample through all data in random order each epoch
        # Get reconstruction loss to help ground abstract state
        decoded_values, all_policies = model(t[0])
        decode_loss = F.mse_loss(decoded_values[0], t[0], reduction='sum')

        # Get transition probabilities for each state
        first_policy = all_policies[0]
        second_policy = all_policies[1].view(num_actions, -1)
        third_policy = all_policies[2].view(num_actions, num_actions, -1)
        fourth_policy = all_policies[3].view(num_actions, num_actions, num_actions, -1)

        print(first_policy.shape)
        print(second_policy.shape)
        print(third_policy.shape)
        print(fourth_policy.shape)

        print('og')
        print(all_policies[0].shape)
        print(all_policies[1].shape)
        print(all_policies[2].shape)
        print(all_policies[3].shape)

        # These should all add to 1 (in testing there seems to be some small rounding error)
        second_layer_probs = first_policy * second_policy
        third_layer_probs = second_layer_probs * third_policy
        fourth_layer_probs = third_layer_probs * fourth_policy

        raise ValueError("testing")

        # Flatten transition probabilities to then weigh with loss of each predicted state at each layer
        first = torch.flatten(first_policy).view(num_actions, 1, 1, 1)
        second = torch.flatten(second_layer_probs).view(s2, 1, 1, 1)
        third = torch.flatten(third_layer_probs).view(s3, 1, 1, 1)
        fourth = torch.flatten(fourth_layer_probs).view(s4, 1, 1, 1)

        first_loss = (F.mse_loss(decoded_values[1], t[1], reduction='none') * first).sum()
        second_loss = (F.mse_loss(decoded_values[2], t[2], reduction='none') * second).sum()
        third_loss = (F.mse_loss(decoded_values[3], t[3], reduction='none') * third).sum()
        fourth_loss = (F.mse_loss(decoded_values[4], t[4], reduction='none') * fourth).sum()




        # For experimenting with different weights on different layers
        raw_loss = (first_loss + second_loss + third_loss + fourth_loss).detach().item()
        raw_losses.append(raw_loss)
        l2w, l3w, l4w = 1, 1, 1
        total_loss = first_loss + second_loss * l2w + third_loss * l3w + fourth_loss * l4w + decode_loss

        # break if total loss is nan
        if torch.isnan(total_loss):
            raise ValueError("NAN LOSS")


        avg_decode_loss += decode_loss.item()
        avg_first_loss += first_loss.item()
        avg_second_loss += second_loss.item()
        avg_third_loss += third_loss.item()
        avg_fourth_loss += fourth_loss.item()

        temp_loss += total_loss
        temp_raw_loss += raw_loss
        sample_count += 1
        avg_valid_loss += 0 #validate(model, valid_data)

        optimizer.zero_grad()
        temp_loss.backward()
        # Monitor gradients before clipping and stepping
        all_gradients.append(store_gradients(model))

        # Uncomment if you want to use gradient clipping
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 1)

        optimizer.step()
        avg_loss += temp_loss.item()
        avg_raw_loss += temp_raw_loss
        temp_loss = 0
        temp_raw_loss = 0

    # Freeze encoder and decoder after first epoch
    # if epoch == 1:
    #     for param in model.encoder.parameters():
    #         param.requires_grad = False
    #     for param in model.decoder.parameters():
    #         param.requires_grad = False
    if epoch % 10 == 0: 
        #print just validation
        print(f"Epoch {epoch + 1}, Validation Loss: {validate(model, valid_data)}")

    avg_decode_loss = avg_decode_loss / len(train_data)
    avg_first_loss = avg_first_loss / len(train_data)
    avg_second_loss = avg_second_loss / len(train_data)
    avg_third_loss = avg_third_loss / len(train_data)
    avg_fourth_loss = avg_fourth_loss / len(train_data)


    avg_train_loss = avg_loss / len(train_data)
    avg_train_raw_loss = avg_raw_loss / len(train_data)
    avg_valid_loss = avg_valid_loss / len(valid_data)
    # Perform validation
    print(f"Epoch {epoch + 1}, Total Loss: {avg_train_loss}, DLoss: {avg_decode_loss}, A1: {avg_first_loss}, A2: {avg_second_loss}, A3: {avg_third_loss}, A4: {avg_fourth_loss}")

#Max loss with 4 actions, 1.38 after 130 epochs
#Max loss with 1 action, 1.5. Bascially the same...

torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1, 1])
torch.Size([1, 1, 1, 1])
og
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])


ValueError: testing

Epoch 581, Validation Loss: 0.8256955742835999 Old T function
Epoch 581, Train Loss: 0.7633827402764424, Train Raw Loss: 0.7448219670363787, Validation Loss: 0.0
Epoch 582, Train Loss: 0.7654743137176742, Train Raw Loss: 0.7468309859618206, Validation Loss: 0.0
Epoch 583, Train Loss: 0.7629341331564568, Train Raw Loss: 0.7446876400827684, Validation Loss: 0.0
Epoch 584, Train Loss: 0.7623813437213275, Train Raw Loss: 0.7441016497005793, Validation Loss: 0.0
Epoch 585, Train Loss: 0.759624786996706, Train Raw Loss: 0.7415352151136506, Validation Loss: 0.0
Epoch 586, Train Loss: 0.7635865574025295, Train Raw Loss: 0.7449935508108783, Validation Loss: 0.0
Epoch 587, Train Loss: 0.7689634615301408, Train Raw Loss: 0.750305650933561, Validation Loss: 0.0
Epoch 588, Train Loss: 0.7626074822196229

In [None]:
test_state = train_data[34][0]
encoded_test = model.encoder(test_state)
transitioned_test = model.tree_transition(encoded_test)
print(transitioned_test.shape)
decoded_transition = model.decoder(transitioned_test)
print(decoded_transition.shape)
print(test_state.argmax())
print('original state')
print(test_state.squeeze(0).squeeze(0)[4:-4, 4:-4])
for i in range(4):
    print('action taken:',i)
    print(torch.round(decoded_transition[i].squeeze(0).squeeze(0)[4:-4, 4:-4]))

torch.Size([1, 1, 64])
torch.Size([1, 1, 20, 20])
tensor(167)
original state
tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])
action taken: 0
tensor([[ 0.,  0., -0.,  0., -0., -0., -0., -0.,  0.,  0.,  0.,  0

IndexError: index 1 is out of bounds for dimension 0 with size 1

In [None]:
def decode_action(start_state, action_index, tf_tensor):
    with torch.no_grad():
        #encode start_state:
        z = model.encoder(start_state)
        transitions = (torch.einsum("ij,jab->iba", z, tf_tensor))
        decoded_states = model.decoder(transitions)
        print(decoded_states.shape)
        return torch.round(model.decoder(transitions[action_index]))


In [None]:
#4.46, 5.3 at 2000
for name, num in all_gradients[-1]:
    print(name +':', round(num, 2))

transition_fun: 1.94
goal: 0.0
goal_beta: 0.0
decoder.fc.weight: 2.96
decoder.fc.bias: 0.08
decoder.deconv1.weight: 6.28
decoder.deconv1.bias: 0.22
decoder.deconv2.weight: 7.74
decoder.deconv2.bias: 0.82
decoder.final_conv.weight: 7.27
decoder.final_conv.bias: 0.0
encoder.cnn_encoder.conv1.weight: 1.24
encoder.cnn_encoder.conv1.bias: 2.8
encoder.cnn_encoder.bn1.weight: 0.26
encoder.cnn_encoder.bn1.bias: 0.12
encoder.cnn_encoder.conv2.weight: 2.37
encoder.cnn_encoder.conv2.bias: 0.32
encoder.cnn_encoder.bn2.weight: 0.19
encoder.cnn_encoder.bn2.bias: 0.11
encoder.cnn_encoder.conv3.weight: 1.55
encoder.cnn_encoder.conv3.bias: 0.12
encoder.cnn_encoder.bn3.weight: 0.13
encoder.cnn_encoder.bn3.bias: 0.04
encoder.cnn_encoder.residual_conv.weight: 0.36
encoder.cnn_encoder.residual_conv.bias: 0.05
encoder.linear.weight: 4.45
encoder.linear.bias: 0.04


In [None]:
def get_best_path(decoded, all_policies):
    best_first_action = all_policies[0].argmax().item()
    best_second_action = all_policies[1].view(num_actions,-1)[best_first_action].argmax().item() 
    best_third_action = all_policies[2].view(num_actions,num_actions,-1)[best_first_action][best_second_action].argmax().item()
    best_fourth_action = all_policies[3].view(num_actions,num_actions,num_actions,-1)[best_first_action][best_second_action][best_third_action].argmax().item()
    print(best_first_action,best_second_action,best_third_action,best_fourth_action)
    first = decoded[0]
    second = decoded[1][best_first_action].unsqueeze(0)
    third = decoded[2][best_first_action**2 + best_second_action].unsqueeze(0)
    fourth = decoded[3][best_first_action**3 + best_second_action**2 + best_third_action].unsqueeze(0)
    fifth = decoded[4][best_first_action**4 + best_second_action**3 + best_third_action**2 + best_fourth_action].unsqueeze(0)
    return [first,second,third,fourth,fifth]

In [None]:
def get_best_path_actions(decoded, all_policies):
    best_first_action = all_policies[0].argmax().item()
    best_second_action = all_policies[1].view(num_actions,-1)[best_first_action].argmax().item() 
    best_third_action = all_policies[2].view(num_actions,num_actions,-1)[best_first_action][best_second_action].argmax().item()
    best_fourth_action = all_policies[3].view(num_actions,num_actions,num_actions,-1)[best_first_action][best_second_action][best_third_action].argmax().item()
    return best_first_action,best_second_action,best_third_action,best_fourth_action

In [None]:
a_data = []
for i in valid_data:
    d,q = model(i[0])
    a_data.append(get_best_path_actions(d,q))

In [None]:
pd.DataFrame(a_data).var()

0    0.0
1    0.0
2    0.0
3    0.0
dtype: float64

In [None]:
#try backward in succession?
a_data = []
for i in valid_data:
    a,b = model(i[0])
    get_best_path(a,b)


0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0


In [None]:
test_trans = torch.rand(num_actions,embedding_dim)
enc_state = model.encoder(valid_data[0][0])
print(test_trans.shape)
print(enc_state.shape)

torch.Size([1, 64])
torch.Size([1, 64])


In [None]:
t1 = (test_trans + enc_state)
t1 = t1.view(num_actions,embedding_dim,-1).shape
t1

torch.Size([1, 64, 1])

In [None]:
valid_data[0][2].argmin()

tensor(210)

In [None]:
d, q = model(train_data[0][0])

In [None]:
def viewer(tensor):
    tensor = tensor.squeeze(0).squeeze(0)
    max_val_index = tensor.argmax().item()
    min_val_index = tensor.argmin().item()
    max_val = tensor.max().item()
    min_val = tensor.min().item()

    new_tensor = torch.zeros_like(tensor).flatten()
    new_tensor[max_val_index] = 1
    new_tensor[min_val_index] = -1
    new_tensor = new_tensor.view(tensor.shape)

    zoomed_tensor = new_tensor[5:-5, 5:-5]
    return torch.round(zoomed_tensor)

def viewer_2(tensor): 
    tensor = tensor.squeeze(0).squeeze(0) 
    zoomed_tensor = tensor[5:-5, 5:-5]
    return torch.round(zoomed_tensor)

def print_movie(tensor_list):
    for tensor,name in zip(tensor_list,["Start","Action 1","Action 2","Action 3","Action 4"]):
        print(name)
        print(viewer(tensor))
        print("\n")

print_movie(get_best_path(d,q))

0 0 0 0
Start
tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])


Action 1
tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.],
        [ 0., 

In [None]:
#View Action Weights (This hasn't been informative yet)
dec, all_policies = model(train_data[0][0]) 
# dot = make_dot((dec[0],dec[1],dec[2],dec[3],dec[4],all_policies[0],all_policies[1],all_policies[2],all_policies[3]),params=dict(model.named_parameters()))
# dot.render('model', format='png')
print(f"Action Weight Sums { torch.round(model.transition_fun.data,decimals=3).sum(dim=0).sum(dim=0)}")  #might be summing the wrong way, or just not interesting

Action Weight Sums tensor([ 0.5190,  0.5720,  0.1650,  0.8650,  0.7450,  0.3820,  0.2240,  0.3700,
         0.2120,  0.7790,  0.7160,  0.8930,  0.3270,  0.2910,  0.4630,  0.3670,
         0.5340,  0.4240,  0.9550,  0.2990,  0.3370,  0.0580,  0.1530,  0.0390,
         0.9360,  0.7010,  0.9120,  0.8130,  0.3640,  0.5070,  0.6280,  0.6390,
        -0.0670,  0.4870,  0.9580,  1.0000,  0.7500,  0.2280,  0.6930,  0.4580,
         0.1240,  0.1720,  0.3240,  0.5160,  0.2940,  0.0600,  0.9600,  0.9100,
         0.6570,  0.4000,  0.7830,  0.1180,  0.5930,  0.5110,  0.5780,  1.0540,
         0.8860,  0.4110,  0.7130,  0.5170,  0.3490,  0.5970,  0.2290,  0.6070])


In [None]:
best_first_action = all_policies[0].argmax()
best_second_action = all_policies[1].view(num_actions,-1)[best_first_action].argmax() 
best_third_action = all_policies[2].view(num_actions,num_actions,-1)[best_first_action][best_second_action].argmax()
best_fourth_action = all_policies[3].view(num_actions,num_actions,num_actions,-1)[best_first_action][best_second_action][best_third_action].argmax() 
# print(torch.round(all_q[0],decimals=3).detach(), f"Argmax {all_q[0].argmax().item()}")
# print(torch.round(all_q[1],decimals=3).view(4,-1).detach(),f"Argmax {all_q[1].view(4,-1)[1].argmax().item()}")
# print(torch.round(all_q[2],decimals=3).view(4,4,-1)[0].detach(),f"Argmax {all_q[2].view(4,4,-1)[1][0].argmax().item()}")
# print(torch.round(all_q[3],decimals=3).view(4,4,4,-1)[0][0].detach(),f"Argmax {all_q[3].view(4,4,4,-1)[1][1][0].argmax().item()}")
print(f"Best Actions: {best_first_action.item()} {best_second_action.item()} {best_third_action.item()} {best_fourth_action.item()}")

Best Actions: 0 0 0 0


In [None]:
#View Action Weights (This hasn't been informative yet)
for i in range(500):
    dec, all_policies = model(train_data[i][0]) 

    best_first_action = all_policies[0].argmax()
    best_second_action = all_policies[1].view(4,-1)[best_first_action].argmax() 
    best_third_action = all_policies[2].view(4,4,-1)[best_first_action][best_second_action].argmax()
    best_fourth_action = all_policies[3].view(4,4,4,-1)[best_first_action][best_second_action][best_third_action].argmax() 

    print(f"Best Actions: {best_first_action.item()} {best_second_action.item()} {best_third_action.item()} {best_fourth_action.item()}")

RuntimeError: shape '[4, -1]' is invalid for input of size 1

In [None]:
import json

# Define the input and output file paths
input_file = r'/home/mike/Desktop/TreeQN/BIB/TreeQN_Approach/treeQN/final_losses2.txt'
output_file = 'output.json'

# Initialize a list to store each block of data
data = []

# Initialize a temporary dictionary to store each set of values
current_dict = {}

# Read the text file line by line
with open(input_file, 'r') as file:
    for line in file:
        line = line.strip()
        if not line:  # If the line is empty, it's a new block of data
            if current_dict:
                data.append(current_dict)
                current_dict = {}
        else:
            # Check if the line contains a colon
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = value.strip()

                # Convert numeric values to float or int
                try:
                    value = float(value) if '.' in value else int(value)
                except ValueError:
                    pass
                
                # Add the key-value pair to the current dictionary
                current_dict[key] = value

    # Append the last block of data
    if current_dict:
        data.append(current_dict)

# Write the list of dictionaries to a JSON file
with open(output_file, 'w') as json_file:
    json.dump(data, json_file, indent=4)

print("Conversion complete! JSON saved to", output_file)


Conversion complete! JSON saved to output.json


In [None]:
import pandas as pd
import json

# Load JSON data (replace 'your_data.json' with your actual JSON file path)
with open(r'/home/mike/Desktop/TreeQN/BIB/TreeQN_Approach/Notebooks/image_gridworld/tree_kfold_result.json', 'r') as file:
    data = json.load(file)

# Convert the JSON data to a DataFrame
df = pd.DataFrame(data)

In [None]:
df['Learning Rate'] = df['Learning Rate'].astype(float)

In [None]:
df[df['Learning Rate'] == 1e-5]['Final Validation Loss'].value_counts(bins=2)

(12.901, 13.257]    41
(12.544, 12.901]    14
Name: count, dtype: int64

In [None]:
df= df.drop(columns=['Gradient Clipping','Epochs'])

In [None]:
df = df[df['Discount Factor'] == 1]

In [None]:
df = df.drop(columns = 'Discount Factor')

In [None]:
df.sort_values(by="Final Validation Loss").head(15).reset_index(drop=True).to_csv("Top_15.csv")