## Group Spring Simulation

In [2]:
import numpy as np
from sklearn.svm import SVC

# Group Spring Simulation

In [3]:
!ls data/simulation/spring_simulation/

[34m__pycache__[m[m
datainfo_static_5.npy
edges_all_test_group_static_5.npy
edges_all_valid_group_static_5.npy
edges_sampled_all_test_group_static_5.npy
edges_sampled_all_train_group_static_5.npy
edges_sampled_all_valid_group_static_5.npy
ga_test_group_static_5.npy
ga_train_group_static_5.npy
ga_valid_group_static_5.npy
generate_dataset.py
generate_dataset_group.py
gr_test_group_static_5.npy
gr_train_group_static_5.npy
gr_valid_group_static_5.npy
loc_all_test_group_static_5.npy
loc_all_valid_group_static_5.npy
loc_sampled_all_test_group_static_5.npy
loc_sampled_all_train_group_static_20.npy
loc_sampled_all_train_group_static_5.npy
loc_sampled_all_valid_group_static_5.npy
sampled_indices_all_test_group_static_5.npy
sampled_indices_all_train_group_static_5.npy
sampled_indices_all_valid_group_static_5.npy
spring_sim.py
spring_sim_group.py
test_data_loader_static_5.pth
train_data_loader_static_5.pth
valid_data_loader_static_5.pth
vel_all_test_group_static_5.

In [4]:
# Load Data

data_folder = "data/simulation/spring_simulation/"
suffix = "_static_5"

loc_train = np.load('data/simulation/spring_simulation/loc_sampled_all_train_group' + suffix + '.npy')
vel_train = np.load('data/simulation/spring_simulation/vel_sampled_all_train_group' + suffix + '.npy')
edges_train = np.load('data/simulation/spring_simulation/gr_train_group' + suffix + '.npy')

loc_valid = np.load('data/simulation/spring_simulation/loc_sampled_all_valid_group' + suffix + '.npy')
vel_valid = np.load('data/simulation/spring_simulation/vel_sampled_all_valid_group' + suffix + '.npy')
edges_valid = np.load('data/simulation/spring_simulation/gr_valid_group' + suffix + '.npy')

loc_test = np.load('data/simulation/spring_simulation/loc_sampled_all_test_group' + suffix + '.npy')
vel_test = np.load('data/simulation/spring_simulation/vel_sampled_all_test_group' + suffix + '.npy')
edges_test = np.load('data/simulation/spring_simulation/gr_test_group' + suffix + '.npy')

num_atoms = loc_train.shape[3]

# Reshape to: [num_sims, num_atoms, num_timesteps, num_dims]
loc_train = np.transpose(loc_train, [0, 3, 1, 2])
vel_train = np.transpose(vel_train, [0, 3, 1, 2])
feat_train = np.concatenate([loc_train, vel_train], axis=3)
edges_train = np.reshape(edges_train, [-1, num_atoms ** 2])
edges_train = np.array((edges_train + 1) / 2, dtype=np.int64)

loc_valid = np.transpose(loc_valid, [0, 3, 1, 2])
vel_valid = np.transpose(vel_valid, [0, 3, 1, 2])
feat_valid = np.concatenate([loc_valid, vel_valid], axis=3)
edges_valid = np.reshape(edges_valid, [-1, num_atoms ** 2])
edges_valid = np.array((edges_valid + 1) / 2, dtype=np.int64)

loc_test = np.transpose(loc_test, [0, 3, 1, 2])
vel_test = np.transpose(vel_test, [0, 3, 1, 2])
feat_test = np.concatenate([loc_test, vel_test], axis=3)
edges_test = np.reshape(edges_test, [-1, num_atoms ** 2])
edges_test = np.array((edges_test + 1) / 2, dtype=np.int64)


# Exclude self edges
off_diag_idx = np.ravel_multi_index(
    np.where(np.ones((num_atoms, num_atoms)) - np.eye(num_atoms)),
    [num_atoms, num_atoms])
edges_train = edges_train[:, off_diag_idx]
edges_valid = edges_valid[:, off_diag_idx]
edges_test = edges_test[:, off_diag_idx]

In [5]:
#Create edge-node relation

num_nodes = feat_train.shape[1]


def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot



def create_edgeNode_relation(num_nodes, self_loops=False):
    if self_loops:
        indices = np.ones([num_nodes, num_nodes])
    else:
        indices = np.ones([num_nodes, num_nodes]) - np.eye(num_nodes)
    rel_rec = np.array(encode_onehot(np.where(indices)[0]), dtype=np.float32)
    rel_send = np.array(encode_onehot(np.where(indices)[1]), dtype=np.float32)
    
    return rel_rec, rel_send 

rel_rec, rel_send = create_edgeNode_relation(num_nodes, self_loops=False)

In [6]:
feat_train.shape

(600, 5, 49, 4)

In [7]:
edges_train.shape

(600, 20)

## Feature Engineering on Training Data

In [8]:
feat_train_re = feat_train.reshape((feat_train.shape[0], feat_train.shape[1],-1))
feat_train_re.shape

(600, 5, 196)

In [9]:
senders_train = np.matmul(rel_send, feat_train_re)
receivers_train = np.matmul(rel_rec, feat_train_re)
senders_train = senders_train.reshape((senders_train.shape[0], senders_train.shape[1],feat_train.shape[2],feat_train.shape[-1]))
receivers_train = receivers_train.reshape((receivers_train.shape[0],receivers_train.shape[1],feat_train.shape[2],feat_train.shape[-1]))

In [10]:
#Edge shape: [batch_size, n_edges, n_timesteps, n_features]
senders_train.shape

(600, 20, 49, 4)

In [11]:
#Extract velocities and locations
velocity_senders_train = senders_train[:,:,:,2:]
velocity_receivers_train = receivers_train[:,:,:,2:]
location_senders_train = senders_train[:,:,:,:2]
location_receivers_train = receivers_train[:,:,:,:2]

In [12]:
#Compute Normalized distance histogram
distance_train = location_senders_train-location_receivers_train
distance_train = distance_train**2
distance_train = distance_train.sum(-1)
distance_train = np.sqrt(distance_train)
distance_train = distance_train.reshape((-1, distance_train.shape[-1]))
distance_train_max = distance_train.max()
distance_train_min = distance_train.min()
bins = np.arange(distance_train_min, distance_train_max, 4)

hist_dist_train = []
for d in distance_train:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_dist_train.append(hist_d)

hist_dist_train = np.array(hist_dist_train)

In [13]:
hist_dist_train

array([[0.24489796, 0.24489796, 0.34693878, 0.16326531],
       [0.53061224, 0.46938776, 0.        , 0.        ],
       [0.26530612, 0.24489796, 0.30612245, 0.18367347],
       ...,
       [1.        , 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        ],
       [0.26530612, 0.20408163, 0.2244898 , 0.30612245]])

In [14]:
#Compute normalized histogram of speed difference
speed_receivers_train = np.sqrt((velocity_receivers_train**2).sum(-1))
speed_senders_train = np.sqrt((velocity_senders_train**2).sum(-1))

diff_speed_train = np.abs(speed_receivers_train-speed_senders_train)
diff_speed_train = diff_speed_train.reshape((-1, diff_speed_train.shape[-1]))

diff_speed_train_max = diff_speed_train.max()
diff_speed_train_min = diff_speed_train.min()
bins = np.arange(diff_speed_train_min, diff_speed_train_max, 0.2)

hist_diff_speed_train = []
for d in diff_speed_train:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_speed_train.append(hist_d)
    
hist_diff_speed_train = np.array(hist_diff_speed_train)

In [15]:
hist_diff_speed_train

array([[0.93877551, 0.06122449, 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        , 0.        ],
       [0.83673469, 0.16326531, 0.        , 0.        , 0.        ],
       ...,
       [0.81632653, 0.18367347, 0.        , 0.        , 0.        ],
       [0.30612245, 0.55102041, 0.14285714, 0.        , 0.        ],
       [0.79591837, 0.20408163, 0.        , 0.        , 0.        ]])

In [16]:
#Compute normalized histogram of absolute difference in direction
velocity_receivers_train

tans_receivers = velocity_receivers_train[:,:,:,1]/velocity_receivers_train[:,:,:,0]
directions_receivers = np.arctan(tans_receivers)

tans_senders = velocity_senders_train[:,:,:,1]/velocity_senders_train[:,:,:,0]
directions_senders = np.arctan(tans_senders)

diff_directions = np.abs(directions_senders-directions_receivers)

#create histogram
bins = np.arange(0, np.pi+0.25*np.pi, 0.25*np.pi)
hist_diff_dire_train = []

diff_directions = diff_directions.reshape((-1, diff_directions.shape[-1]))

for d in diff_directions:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_dire_train.append(hist_d)
    
hist_diff_dire_train = np.array(hist_diff_dire_train)

In [17]:
hist_diff_dire_train

array([[0.3877551 , 0.26530612, 0.34693878, 0.        ],
       [0.85714286, 0.14285714, 0.        , 0.        ],
       [0.28571429, 0.40816327, 0.30612245, 0.        ],
       ...,
       [0.75510204, 0.14285714, 0.10204082, 0.        ],
       [0.34693878, 0.55102041, 0.10204082, 0.        ],
       [0.34693878, 0.44897959, 0.20408163, 0.        ]])

In [18]:
#Normalized histogram of absolute difference in velocity direction
#and relative position

diff_locations_train = location_senders_train-location_receivers_train
diff_locations_train_tans = diff_locations_train[:,:,:,1]/diff_locations_train[:,:,:,0]
relative_positions = np.arctan(diff_locations_train_tans)

diff_velocities_train = velocity_senders_train-velocity_receivers_train
diff_velocities_train_tans = diff_velocities_train[:,:,:,1]/diff_velocities_train[:,:,:,0]
velocity_dires = np.arctan(diff_velocities_train_tans)

diff_vel_loc = np.abs(relative_positions-velocity_dires)

#create histogram
bins = np.arange(0, np.pi+0.25*np.pi,0.25*np.pi)
hist_diff_vel_loc_train = []
diff_vel_loc = diff_vel_loc.reshape((-1, diff_vel_loc.shape[-1]))

for d in diff_vel_loc:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_vel_loc_train.append(hist_d)
    
hist_diff_vel_loc_train = np.array(hist_diff_vel_loc_train)

In [19]:
hist_diff_vel_loc_train

array([[0.95918367, 0.02040816, 0.02040816, 0.        ],
       [0.97959184, 0.02040816, 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        ],
       ...,
       [0.57142857, 0.36734694, 0.06122449, 0.        ],
       [0.        , 0.81632653, 0.18367347, 0.        ],
       [1.        , 0.        , 0.        , 0.        ]])

In [20]:
#Concatenate features

hist_feat_train = np.concatenate([hist_dist_train, hist_diff_speed_train,
                                 hist_diff_dire_train, hist_diff_vel_loc_train], axis=-1)

In [21]:
hist_feat_train.shape

(12000, 17)

In [22]:
#convert labels
labels_train = edges_train.flatten()
labels_train[labels_train==0]=-1

In [25]:
labels_train.shape

(12000,)

## Feature Engineering on Test Data

In [26]:
feat_test_re = feat_test.reshape((feat_test.shape[0], feat_test.shape[1],-1))
feat_test_re.shape

(200, 5, 196)

In [27]:
senders_test = np.matmul(rel_send, feat_test_re)
receivers_test = np.matmul(rel_rec, feat_test_re)
senders_test = senders_test.reshape((senders_test.shape[0], senders_test.shape[1],feat_test.shape[2],feat_test.shape[-1]))
receivers_test = receivers_test.reshape((receivers_test.shape[0],receivers_test.shape[1],feat_test.shape[2],feat_test.shape[-1]))

In [28]:
velocity_senders_test = senders_test[:,:,:,2:]
velocity_receivers_test = receivers_test[:,:,:,2:]
location_senders_test = senders_test[:,:,:,:2]
location_receivers_test = receivers_test[:,:,:,:2]

In [29]:
#Compute Normalized distance histogram
distance_test = location_senders_test-location_receivers_test
distance_test = distance_test**2
distance_test = distance_test.sum(-1)
distance_test = np.sqrt(distance_test)
distance_test = distance_test.reshape((-1, distance_test.shape[-1]))

#use training information
bins = np.arange(distance_train_min, distance_train_max, 4)

hist_dist_test = []
for d in distance_test:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_dist_test.append(hist_d)

hist_dist_test = np.array(hist_dist_test)

In [30]:
#Compute normalized histogram of speed difference
speed_receivers_test = np.sqrt((velocity_receivers_test**2).sum(-1))
speed_senders_test = np.sqrt((velocity_senders_test**2).sum(-1))

diff_speed_test = np.abs(speed_receivers_test-speed_senders_test)
diff_speed_test = diff_speed_test.reshape((-1, diff_speed_test.shape[-1]))


bins = np.arange(diff_speed_train_min, diff_speed_train_max, 0.2)

hist_diff_speed_test = []
for d in diff_speed_test:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_speed_test.append(hist_d)
    
hist_diff_speed_test = np.array(hist_diff_speed_test)

In [31]:
hist_diff_speed_test

array([[0.40816327, 0.59183673, 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        , 0.        ],
       ...,
       [1.        , 0.        , 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        , 0.        ]])

In [32]:
#Compute normalized histogram of absolute difference in direction
velocity_receivers_test

tans_receivers = velocity_receivers_test[:,:,:,1]/velocity_receivers_test[:,:,:,0]
directions_receivers = np.arctan(tans_receivers)

tans_senders = velocity_senders_test[:,:,:,1]/velocity_senders_test[:,:,:,0]
directions_senders = np.arctan(tans_senders)

diff_directions = np.abs(directions_senders-directions_receivers)

#create histogram
bins = np.arange(0, np.pi+0.25*np.pi, 0.25*np.pi)
hist_diff_dire_test = []

diff_directions = diff_directions.reshape((-1, diff_directions.shape[-1]))

for d in diff_directions:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_dire_test.append(hist_d)
    
hist_diff_dire_test = np.array(hist_diff_dire_test)

In [33]:
hist_diff_dire_test

array([[0.36734694, 0.63265306, 0.        , 0.        ],
       [0.        , 0.57142857, 0.42857143, 0.        ],
       [0.63265306, 0.36734694, 0.        , 0.        ],
       ...,
       [0.        , 0.        , 0.6122449 , 0.3877551 ],
       [0.        , 0.16326531, 0.79591837, 0.04081633],
       [0.        , 0.        , 0.6122449 , 0.3877551 ]])

In [34]:
#Normalized histogram of absolute difference in velocity direction
#and relative position

diff_locations_test = location_senders_test-location_receivers_test
diff_locations_test_tans = diff_locations_test[:,:,:,1]/diff_locations_test[:,:,:,0]
relative_positions = np.arctan(diff_locations_test_tans)

diff_velocities_test = velocity_senders_test-velocity_receivers_test
diff_velocities_test_tans = diff_velocities_test[:,:,:,1]/diff_velocities_test[:,:,:,0]
velocity_dires = np.arctan(diff_velocities_test_tans)

diff_vel_loc = np.abs(relative_positions-velocity_dires)

#create histogram
bins = np.arange(0, np.pi+0.25*np.pi,0.25*np.pi)
hist_diff_vel_loc_test = []
diff_vel_loc = diff_vel_loc.reshape((-1, diff_vel_loc.shape[-1]))

for d in diff_vel_loc:
    hist_d = np.histogram(d,bins=bins)[0]/np.histogram(d,bins=bins)[0].sum()
    hist_diff_vel_loc_test.append(hist_d)
    
hist_diff_vel_loc_test = np.array(hist_diff_vel_loc_test)

In [35]:
hist_diff_vel_loc_test.shape

(4000, 4)

In [36]:
#Concatenate test features

hist_feat_test = np.concatenate([hist_dist_test, hist_diff_speed_test,
                                 hist_diff_dire_test, hist_diff_vel_loc_test], axis=-1)

In [37]:
hist_feat_test.shape

(4000, 17)

In [38]:
#convert labels
labels_test = edges_test.flatten()
labels_test[labels_test==0]=-1

In [40]:
labels_test.shape

(4000,)

## Train and Test Model

In [42]:
#Training model
clf = SVC(gamma="auto")
clf.fit(hist_feat_train, labels_train)

SVC(gamma='auto')

In [61]:
predicted_edges = clf.predict(hist_feat_test)
predicted_edges = predicted_edges.reshape((-1, num_atoms*(num_atoms-1)))
predicted_edges[predicted_edges==-1]=0

#build diagonal embeddings, shape: [batch_size, n_edges, n_edges]

predicted_edges_diag = []
for edge in predicted_edges:
    edge_diag = np.diag(edge)
    predicted_edges_diag.append(edge_diag)
    
predicted_edges_diag = np.array(predicted_edges_diag)

predicted_relations = np.matmul(rel_send.T, np.matmul(predicted_edges_diag, rel_rec))



In [70]:
#create relation labels

label_edges_diag = []
for edge in edges_test:
    edge_diag = np.diag(edge)
    label_edges_diag.append(edge_diag)
    
label_edges_diag = np.array(label_edges_diag)

label_relations = np.matmul(rel_send.T, np.matmul(label_edges_diag, rel_rec))

In [73]:
#Evaluate model with Group-Mitre

from sknetwork.topology import get_connected_components
from utils import *

In [74]:
precision_test = []
recall_test = []
F1_test = []

for i in range(len(label_relations)):
    label = label_relations[i]
    pred = predicted_relations[i]
    if label.sum()==0:
        label_con = np.arange(num_atoms)
    else:
        label_con = get_connected_components(label)
    if pred.sum()==0:
        pred_con = np.arange(num_atoms)
    else:
        pred_con = get_connected_components(pred)
        
    recall, precision, F1 = compute_groupMitre_labels(label_con, pred_con)
    
    recall_test.append(recall)
    precision_test.append(precision)
    F1_test.append(F1)
    
print("Average Recall: ", np.mean(recall_test))
print("Average Precision: ", np.mean(precision_test))
print("Average F1: ", np.mean(F1_test))

Average Recall:  0.9116666666666666
Average Precision:  0.8569166666666667
Average F1:  0.876547619047619
