### Imports and HTML-content

In [1]:
%run helper.py

In [2]:
from IPython.display import display, HTML
point_to_dist_angle = """

"""

In [3]:
from scipy.stats import multivariate_normal

def calc_likelihoods_for_distributions_and_points(matrix_dist, matrix_points):
    means = distributions[:, 0]
    covariances = distributions[:, 1:]
    likelihoods = np.array([multivariate_normal(mean=means[i], cov=covariances[i]).pdf(points) for i in range(len(means))])
    
    return likelihoods.T 

### Code

### Build Graph

In [5]:
@save_params
def create_graph_data(csv_file_name, iteration):
    classes_dict = {
        'goomba': 0,
        'mario': 1,
        'cloud': 2,
        'ground': 3,
        'bush': 4,
        'box': 5,
        'pipe': 6
    }
    
    class_names, boxes = get_classnames_boxes_from_csv(csv_file_name, iteration)
    num_nodes = len(boxes)
    edge_connections = cartesian_product_for_nodes(range(num_nodes))
    node_features = []
    matrix = np.empty((0, 2))
    normal_dist = [0, 0, 1, 0, 0, 1] # mu1, mu2, sig00, sig01, sig10, sig11
    dataset_number = int(csv_file_name.split('/')[-1].split('.')[0])

    if num_nodes == 1:
        box = boxes[0]
        width, height = abs(box['left'] - box['right']), abs(box['top'] - box['bottom'])
        # node_feature: (normal-distribution, class-label, x-val, y-val, width, height, dataset_number, iteration, id)
        node_features.append((*normal_dist, classes_dict[class_names[0]], box['center_x'], \
                              box['center_y'], width, height, dataset_number, iteration, 0))
        
        return Data(
            x=node_features,
            edge_index=torch.tensor([0]),
            edge_attr=torch.tensor([0])
        )
        
    for i, box in enumerate(boxes):
        new_row = np.array([[box['center_x'], box['center_y']]])
        matrix = np.vstack((matrix, new_row))

        width, height = abs(box['left'] - box['right']), abs(box['top'] - box['bottom'])
        # node_feature: (normal-distribution, class-label, x-val, y-val, width, height, dataset_number, iteration, id)
        node_features.append((*normal_dist, classes_dict[class_names[i]], box['center_x'], \
                              box['center_y'], width, height, dataset_number, iteration, i))
    
    dists, angles = dist_angle_from_matrix(matrix, edge_connections)
    
    node_features = torch.tensor(node_features, dtype=torch.float)
    edge_connections = torch.tensor(edge_connections)
    edges_features =  torch.tensor(np.stack((dists, angles), axis=-1))
    
    data = Data(
        x=node_features,
        edge_index=edge_connections.t().contiguous(),
        edge_attr=edges_features
    )

    return data

In [6]:
dataset = []
for i in range(4):
    csv_file_name = f"/workspaces/jupyterlite/content/pytroch-geometric/mario-tracking-data/{i:04d}.csv"
    df = pd.read_csv(csv_file_name)
    num_iterations = int(df.iloc[-1]['iteration']) # we start at index 1, so no need for `+ 1`
    
    for iteration in range(1, num_iterations):
        graph_data = create_graph_data(csv_file_name, iteration)
        dataset.append(graph_data)

train_dataset = dataset[:int(0.8 * len(dataset))]
test_dataset = dataset[int(0.8 * len(dataset)):]

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1)

  return np.linalg.norm(val_edge_pairs, axis=1), np.rad2deg(np.arctan(val_edge_pairs[:, 1] / val_edge_pairs[:, 0]))
  return np.linalg.norm(val_edge_pairs, axis=1), np.rad2deg(np.arctan(val_edge_pairs[:, 1] / val_edge_pairs[:, 0]))


In [7]:
int(csv_file_name.split('/')[-1].split('.')[0])

3

#### Message Passing

In [8]:
class SimpleGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super().__init__()
        self.conv1 = GATConv(in_channels, out_channels, heads=1, add_self_loops=False)

    def forward(self, x, edge_index, target_node):
        out = self.conv1(x, edge_index)

#### Loss

The Loss will be the SSE of the likelihoods that the points of frame i+1 belong to the distributions that were created from the graph on frame i.


In [9]:
def classes_points_distributions_ids_from_graph(graph):
    graph = graph.x
    return graph[:, 6], graph[:, :6], graph[:, 7:9], graph[:, -1]

In [10]:
def calculate_optimial_matching_for_distributions_and_points(graph, csv_file_name, iteration, threshhold=0):
        classes_dict = {
        'goomba': 0,
        'mario': 1,
        'cloud': 2,
        'ground': 3,
        'bush': 4,
        'box': 5,
        'pipe': 6
    }
    

In [11]:
def calculate_sse_for_distributions_and_points():
    pass

In [12]:
classes_dict = {
    'goomba': 0,
    'mario': 1,
    'cloud': 2,
    'ground': 3,
    'bush': 4,
    'box': 5,
    'pipe': 6
}

csv_data = get_classnames_boxes_from_csv(csv_file_name, 50)
class_ids = np.array([classes_dict[class_name] for class_name in csv_data[0]])
boxes_coordinates = np.empty((0, 2))

for box_values in csv_data[1]:
    box_coordinates =  np.array([box_values['center_x'], box_values['center_y']])
    boxes_coordinates = np.vstack((boxes_coordinates, box_coordinates))

class_ids, boxes_coordinates

(array([1, 5]),
 array([[170.95 ,  89.285],
        [151.65 , 254.77 ]]))

In [47]:
def calc_point_indices_to_distributions(distributions, points, treshhold_likelihood = 0):
    if points.shape[0] == 1 and distributions.shape[0] == 1:
        likelihood_entries = calc_likelihoods_for_distributions_and_points(distributions, points)
    
        if likelihood_entries < treshhold_likelihood: return np.array([-1])
        return np.array([0])

    likelihoods = calc_likelihoods_for_distributions_and_points(distributions, points)
    sorted_indices = np.argsort(likelihoods, axis=0)
    ranks = np.zeros_like(likelihoods, dtype=int)
    n_rows, n_cols = likelihoods.shape
    ranks[sorted_indices, np.arange(n_cols)] = np.tile(np.arange(n_rows), (n_cols, 1)).T
    
    mask_binary = np.array((n_rows - ranks) <= n_cols, dtype=int)
    cumsum_array = np.cumsum(mask_binary, axis=0)
    
    s = min(n_rows, n_cols)
    extended_likelihood_entries = np.zeros((s+1, n_cols + 1))
    
    mask  = np.array((n_rows - ranks) <= n_cols)
    
    points_to_consider = np.where(np.any(mask, axis=1))[0]
    filtered_points = points[points_to_consider]
    
    likelihood_entries = calc_likelihoods_for_distributions_and_points(distributions, filtered_points)
    likelihood_indicies_to_filter = likelihood_entries < treshhold_likelihood
    # punish points that should be filtered out
    likelihood_entries[likelihood_indicies_to_filter] = 1e-50
    
    inv_percentage_of_likelihood = np.log(likelihood_entries)/ np.sum(np.log(likelihood_entries), axis=0)
    # percentage_of_likelihood = (1 / inv_percentage_of_likelihood) / np.sum( (1 / inv_percentage_of_likelihood), axis=0)
    # percentage_of_likelihood = inv_percentage_of_likelihood
    
    cost_matrix = np.log(inv_percentage_of_likelihood)
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    
    max_index = max(col_ind) + 1
    points_to_distributions = np.zeros(max_index, dtype=row_ind.dtype) - 1
    
    points_to_distributions[col_ind] = row_ind

    # if punished points still come up ahead, set the association to -1
    for point_index, distribution_index in enumerate(col_ind):
        if likelihood_indicies_to_filter[point_index, distribution_index]:
            points_to_distributions[point_index] = -1
    
    return points_to_distributions

In [13]:
graph_a = train_dataset[700]
graph_b = train_dataset[701]

In [14]:
node_attributes_a = classes_points_distributions_ids_from_graph(graph_a)
node_attributes_b = classes_points_distributions_ids_from_graph(graph_b)
node_attributes_a

(tensor([5., 1.]),
 tensor([[0., 0., 1., 0., 0., 1.],
         [0., 0., 1., 0., 0., 1.]]),
 tensor([[151.5850, 201.2750],
         [176.7600, 119.2650]]),
 tensor([0., 1.]))

In [15]:
def likelihood_of_distributions_from_center_of_distributions_to_points(distributions, center_distributions, points):
    normalized_points = points - center_distribution
    return calc_point_indices_to_distributions(distributions, normalized_points)

In [16]:
labels = torch.tensor([1, 2, 1, 1, 3])
matrix = np.array([
    [1, 1],
    [2, 3],
    [4, 1],
    [1, 4],
    [5, 3]
])

matrix[labels == 1]

array([[1, 1],
       [4, 1],
       [1, 4]])

In [17]:
classes_points_distributions_ids_from_graph(graph_b)[0].unique()
labels.unique()

tensor([1, 2, 3])

In [18]:
matching_nodes = torch.empty((0, 2))

In [19]:
# for label in node_attributes_a[0].unique():
label = 5
dist_indicies = (node_attributes_a[0] == label)
point_indicies = (node_attributes_b[0] == label)
distributions = node_attributes_a[1][dist_indicies]
distributions = np.array(distributions.reshape(distributions.shape[0], -1, 2)) # to have the correct format
distributions[:, 0, :] += np.array(node_attributes_a[2][point_indicies])
points = node_attributes_b[2][point_indicies]

if points.shape[0] > 0: calc_point_indices_to_distributions(distributions, points)
# else: 

In [20]:
distributions = np.array([
    [[3, 0], [1, 0], [0, 1]],
    [[2, 0], [1, 0.5], [0.5, 1]]
])

points = np.random.uniform(-10, 10, (2, 2))

# distributions[:, 0, :] += points
# distributions

# calc_likelihoods_for_distributions_and_points(distributions, points)
calc_point_indices_to_distributions(distributions, points)

array([1, 0])

In [21]:
likelihoods = calc_likelihoods_for_distributions_and_points(distributions, points)
sorted_indices = np.argsort(likelihoods, axis=0)
ranks = np.zeros_like(likelihoods, dtype=int)
n_rows, n_cols = likelihoods.shape
ranks[sorted_indices, np.arange(n_cols)] = np.tile(np.arange(n_rows), (n_cols, 1)).T

mask_binary = np.array((n_rows - ranks) <= n_cols, dtype=int)
cumsum_array = np.cumsum(mask_binary, axis=0)

s = min(n_rows, n_cols)
extended_likelihood_entries = np.zeros((s+1, n_cols + 1))

mask  = np.array((n_rows - ranks) <= n_cols)

points_to_consider = np.where(np.any(mask, axis=1))[0]
filtered_points = points[points_to_consider]

likelihood_entries = calc_likelihoods_for_distributions_and_points(distributions, filtered_points)

percentage_of_likelihood = np.log(likelihood_entries)/ np.sum(np.log(likelihood_entries), axis=0)

cost_matrix = np.log(percentage_of_likelihood)
row_ind, col_ind = linear_sum_assignment(cost_matrix)

max_index = max(col_ind) + 1
points_to_distributions = np.zeros(max_index, dtype=row_ind.dtype) - 1

points_to_distributions[col_ind] = row_ind


In [43]:
treshhold_likelihood = 0.001
likelihood_entries = calc_likelihoods_for_distributions_and_points(distributions, filtered_points)
likelihood_entries[likelihood_entries < treshhold_likelihood] = 1e-10
likelihood_entries = 1 - likelihood_entries

percentage_of_likelihood = np.log(likelihood_entries)/ np.sum(np.log(likelihood_entries), axis=0)

In [41]:
likelihood_entries

array([[1.00000000e-10, 1.00000000e-10],
       [4.83943970e-02, 4.30353254e-03]])

In [44]:
np.log(likelihood_entries)/ np.sum(np.log(likelihood_entries), axis=0)

array([[2.01594172e-09, 2.31866899e-08],
       [9.99999998e-01, 9.99999977e-01]])

In [27]:
percentage_of_likelihood[percentage_of_likelihood < 0.1] = 0

In [45]:
percentage_of_likelihood

array([[2.01594172e-09, 2.31866899e-08],
       [9.99999998e-01, 9.99999977e-01]])

In [None]:
# classes_dict = {
#     'goomba': 0,
#     'mario': 1,
#     'cloud': 2,
#     'ground': 3,
#     'bush': 4,
#     'box': 5,
#     'pipe': 6
# }

# class_names, boxes = get_classnames_boxes_from_csv(csv_file_name, iteration)
# num_nodes = len(boxes)
# edge_connections = cartesian_product_for_nodes(range(num_nodes))
# node_features = []
# matrix = np.empty((0, 2))
# normal_dist = [0, 0, 1, 0, 1, 0] # mu1, mu2, sig00, sig01, sig10, sig11

# if num_nodes == 1:
#     box = boxes[0]
#     width, height = abs(box['left'] - box['right']), abs(box['top'] - box['bottom'])
#     node_features.append((*normal_dist, classes_dict[class_names[0]], width, height))
    
# for i, box in enumerate(boxes):
#     new_row = np.array([[box['center_x'], box['center_y']]])
#     matrix = np.vstack((matrix, new_row))

#     width, height = abs(box['left'] - box['right']), abs(box['top'] - box['bottom'])
#     node_features.append((*normal_dist, classes_dict[class_names[i]], width, height))

# dists, angles = dist_angle_from_matrix(matrix, edge_connections)

# node_features = torch.tensor(node_features, dtype=torch.float)
# edge_connections = torch.tensor(edge_connections)
# edges_features =  torch.tensor(np.stack((dists, angles), axis=-1))