In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, LSTM, Dense, TimeDistributed, Reshape, MaxPooling2D
from tensorflow.keras.layers import UpSampling2D, Conv2DTranspose, Bidirectional, Flatten,Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import BatchNormalization, RepeatVector
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
# from keras.layers import dot
# from keras.layers import concatenate
import numpy as np
from tensorflow.keras.models import load_model
import tensorflow as tf
from tqdm import tqdm
# from keras.layers.normalization import BatchNormalization

def create_model():
    model = Sequential()
    model.add(Bidirectional(LSTM(256, return_sequences=True), input_shape=(1,76)))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(128), name="feature"))
    model.add(Dense(2))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
    model.summary()
    return model

In [None]:
X_train = np.load("data/X_train.npy")
X_test = np.load("data/X_test.npy")
y_train = np.load("data/y_train.npy")
y_test = np.load("data/y_test.npy")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

In [None]:
np.squeeze(X_train).shape

In [None]:
import json

with open ("X_train.json", 'r') as f:
    mapping_X_train = json.load(f)
# with open ("X_test.json", 'r') as f:
#     mapping_X_test = json.load(f)
# mapping_Y_train = np.load("mapping/IP_label_train.npy")
# mapping_Y_test = np.load("mapping/IP_test_test.npy")

# mapping_Y_train = np.argmax(mapping_Y_train, axis=1)
# mapping_Y_test = np.argmax(mapping_Y_test, axis=1)

In [None]:
stats = {}
for ip, flow in tqdm(mapping_X_train.items()):
    num_flow = len(flow)
    if num_flow not in stats.keys():
        stats[num_flow] = 1
    else:
        stats[num_flow] += 1

In [None]:
num_flows = list(stats.keys())

In [None]:
num_flows.sort()

In [None]:
min(num_flows)

In [None]:
stats[1]

In [None]:
lstm_model = load_model("checkpoint.hdf5")
# lstm_model.load_weights('checkpoint.hdf5')
# loss, acc = model.evaluate(X_test, y_test, verbose=2)

In [None]:
feat_extractor = Model(inputs=lstm_model.input, outputs=lstm_model.get_layer("flatten_3").output)

In [None]:
def combine(x, y, mapping):
    X = []
    Y = []
    for pair, item in tqdm(mapping.items()):
        X.append(np.mean(x[item], axis=0))
        Y.append(np.mean(y[item], axis=0))
    X = np.stack(X, axis=0)
    Y = np.stack(Y, axis=0)
    
    return np.squeeze(X), Y

In [None]:
# feature_train = feat_extractor.predict(X_train)
# feature_test = feat_extractor.predict(X_test)

feature_train = X_train
feature_test = X_test

In [None]:
X_train_ip, Y_train_ip = combine(feature_train, y_train, mapping_X_train)
X_test_ip, Y_test_ip = combine(feature_test, y_test, mapping_X_test)

In [None]:
list_X_train_ip = list(mapping_X_train.keys())
list_X_test_ip = list(mapping_X_test.keys())

In [None]:
for i in list_X_test_ip:
    if i in list_X_train_ip:
        print(i)

In [None]:
def get_edges(list_ip, Y_ip):
    edge_index = []
    for i in tqdm(range(0, len(list_ip))):
        ip1, ip2 = list_ip[i].split("-")
        for j in range(0, len(list_ip)):
            if (ip1 in list_ip[j]) or (ip2 in list_ip[j]):
                if Y_ip[i] != Y_ip[j]:
                    edge_index.append([i, j])
    edge_indices = np.array(edge_index)
    
    return edge_indices

In [None]:
edge_indices_train = get_edges(list_X_train_ip, Y_train_ip)
edge_indices_test = get_edges(list_X_test_ip, Y_test_ip)

In [None]:
edge_indices_train = np.load("edge_index.npy")
edge_indices_test = np.load("edge_index_test.npy")

In [None]:
from kgcnn.data.cora.cora_lu import cora_graph
from kgcnn.literature.GCN import make_gcn
from kgcnn.literature.GNNExplain import GNNExplainer, GNNInterface
from kgcnn.utils.adj import precompute_adjacency_scaled, sort_edge_indices, make_adjacency_from_edge_indices, make_adjacency_undirected_logical_or, convert_scaled_adjacency_to_list
from kgcnn.utils.data import ragged_tensor_from_nested_numpy
from kgcnn.utils.learning import lr_lin_reduction

In [None]:
edge_index_sorted_train = sort_edge_indices(edge_indices_train)
adj_matrix_train = make_adjacency_from_edge_indices(edge_index_sorted_train)
adj_matrix_train = precompute_adjacency_scaled(make_adjacency_undirected_logical_or(adj_matrix_train))
edge_index_train, edge_weight_train = convert_scaled_adjacency_to_list(adj_matrix_train)
edge_weight_train = np.expand_dims(edge_weight_train, axis=-1)

edge_index_sorted_test = sort_edge_indices(edge_indices_test)
adj_matrix_test = make_adjacency_from_edge_indices(edge_index_sorted_test)
adj_matrix_test = precompute_adjacency_scaled(make_adjacency_undirected_logical_or(adj_matrix_test))
edge_index_test, edge_weight_test = convert_scaled_adjacency_to_list(adj_matrix_test)
edge_weight_test = np.expand_dims(edge_weight_test, axis=-1)

y_train_ip = np.expand_dims(Y_train_ip, axis=-1)
y_train_ip = np.array(y_train_ip == np.arange(2), dtype=np.float32)

y_test_ip = np.expand_dims(Y_test_ip, axis=-1)
y_test_ip = np.array(y_test_ip == np.arange(2), dtype=np.float32)

nodes_train, edges_train, edge_indices_train = ragged_tensor_from_nested_numpy([X_train_ip]), ragged_tensor_from_nested_numpy(
    [edge_weight_train]), ragged_tensor_from_nested_numpy([edge_index_train])

nodes_test, edges_test, edge_indices_test = ragged_tensor_from_nested_numpy([X_test_ip]), ragged_tensor_from_nested_numpy(
    [edge_weight_test]), ragged_tensor_from_nested_numpy([edge_index_test])

In [None]:
xtrain = nodes_train, edges_train, edge_indices_train
ytrain = np.expand_dims(y_train_ip, axis=0)

xtest = nodes_test, edges_test, edge_indices_test
ytest = np.expand_dims(y_test_ip, axis=0)

In [None]:
# inds = np.arange(len(labels))
# ind_train, ind_val = train_test_split(inds, test_size=0.10, random_state=0)
# val_mask = np.zeros_like(inds)
# train_mask = np.zeros_like(inds)
# val_mask[ind_val] = 1
# train_mask[ind_train] = 1
# val_mask = np.expand_dims(val_mask, axis=0)  # One graph in batch
# train_mask = np.expand_dims(train_mask, axis=0)  # One graph in batch


In [None]:
model = make_gcn(
    input_node_shape=[None, 1032],
    input_edge_shape=[None, 1],
    # Output
    output_embedd={"output_mode": 'node'},
    output_mlp={"use_bias": [True, False], "units": [ 16, 2], "activation": [ 'gelu', 'softmax']},
    # model specs
    depth=2,
    gcn_args={"units": 2, "use_bias": True, "activation": "gelu", "has_unconnected": False}
)

In [None]:
learning_rate_start = 1e-3
learning_rate_stop = 1e-4
epo = 40
epomin = 260
epostep = 10

# Compile model with optimizer and loss
optimizer = tf.keras.optimizers.Adam(lr=learning_rate_start)
# cbks = tf.keras.callbacks.LearningRateScheduler(lr_lin_reduction(learning_rate_start, learning_rate_stop, epomin, epo))
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              weighted_metrics=['accuracy'])
print(model.summary())

# Training loop
trainlossall = []
testlossall = []
# start = time.process_time()
hist = model.fit(xtrain, ytrain,
                 epochs=epo,
                 batch_size=32,
#                  callbacks=[cbks],
                 validation_freq=epostep,
                 validation_data=(xtest, ytest),
                 verbose=2
                 )