# Pipeline

In [1]:
# Imports

import torch

import networkx as nx

import numpy as np
import pandas as pd
import scipy.io
 
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm

import pickle

from tensorly.decomposition import tucker, constrained_parafac

from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.metrics import f1_score, classification_report, roc_auc_score
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler

from scipy.sparse import csr_matrix, issparse

from SSLH_inference import *
from SSLH_utils import *

from tensorly.contrib.sparse import decomposition
import sparse

### Load Network & Ego Nets

In [2]:
def load_network(path):
    
    try:
        data = scipy.io.loadmat(path)
    except:
        print('Invalid data path')

    G = nx.from_scipy_sparse_array(data["Network"])
    # nx.set_node_attributes(G, bc_data["Attributes"], 'Attributes')
    print(str(G))

    # convert list of lists to list
    labels = [j for i in data["Label"] for j in i]

    # Add labels to each node
    for i in range(len(G.nodes)):
        G.nodes[i]['Anomaly'] = labels[i]

    is_undirected = not nx.is_directed(G)

    # G = max((G.subgraph(c) for c in nx.connected_components(G)), key=len)
    # G = nx.convert_node_labels_to_integers(G)

    ego_gs, roots = [], []

    # if 0-degree node(s), remove label(s) from consideration
    if len(labels) != G.number_of_nodes():
        labels = list(nx.get_node_attributes(G, 'Anomaly').values())

    for i in tqdm(range(G.number_of_nodes())):
        roots.append(G.nodes[i]['Anomaly'])
        G_ego = nx.ego_graph(G, i, radius=1, undirected=is_undirected)
        if G_ego.number_of_nodes() >= 2:
            ego_gs.append(G_ego)

    return G, ego_gs, roots, labels

In [4]:
load = input('Load previous loaded network? (y/n): ')
if load.lower()[0] == 'n':
    path = input('Enter file name to save as: ')
    
    result = load_network(input('Enter dataset/network path: '))

    saved_model = open(path, 'wb')
    pickle.dump(result, saved_model)
    saved_model.close()
else:
    with open(input('Enter file path: '), 'rb') as f:
        G, ego_gs, roots, labels = pickle.load(f)
        f.close()
        roots = [int(r) for r in roots]

Graph with 5196 nodes and 172897 edges


  0%|          | 0/5196 [00:00<?, ?it/s]

In [4]:
print(f'Using {len(ego_gs)} egonets')

Using 5196 egonets


### Sparse Tensor Construction

In [5]:
N = G.number_of_nodes()

In [6]:
values, indices = [], []
padded_gs = []

undirected = not nx.is_directed(G)

for i, g in enumerate(tqdm(ego_gs)):
    ego_adj_list = dict(g.adjacency())
    
    result = np.zeros((N, N))
    for node in ego_adj_list.keys():    
        for neighbor in ego_adj_list[node].keys():

            result[node][neighbor] = 1
            if undirected:
               result[neighbor][node] = 1
            indices.append([i, node, neighbor])
            indices.append([i, neighbor, node])
            
    norm = np.linalg.norm(result, ord='fro')
    values.append((g.number_of_edges(), norm))
    padded_gs.append(result * (1/norm))

  0%|          | 0/5196 [00:00<?, ?it/s]

In [None]:
i = torch.tensor(list(zip(*indices)))
# values = torch.ones(len(indices))
ten_values = torch.tensor(values)

cube = sparse.COO(i, data=ten_values)

In [None]:
saved_model = open('bc_sparse_tensor.sav', 'wb')
pickle.dump(cube, saved_model)
saved_model.close()

### Tensor Decomposition + Reconstruction Error

In [9]:
ranks = [int(r) for r in input('Enter ranks, space separated: ').split()]

In [11]:
scores = []
for rank in ranks:
    print(f'\nUSING RANK {rank}\n')
    load = input('Load Reconstruction Errors? (y/n): ')
    # not loading previously calculated reconstruction errors
    if load.lower()[0] == 'n':

        # checking for valid input
        load = input('\nLoad Previous Decomposition? (y/n): ')
        while (load.lower()[0] != 'n' and load.lower()[0] != 'y'):
            print('Invalid Input!')
            load = input('Load Previous Decomposition? (y/n): ')
        decomp = input('Select Tucker (1) or CP (2) Decomposition: ')
        while (decomp != '1' and decomp != '2'):
            print('Invalid Input!')
            decomp = input('Select Tucker (1) or CP (2) Decomposition: ')

        if load.lower()[0] == 'n':
            path = input('Enter file name to save factors as: ')
            if decomp == '1':
                print('Tucker Decomposition...')
                _, factors = decomposition.tucker(cube, rank=rank, init='random')
            elif decomp == '2':
                print('Parafac Decomposition...')
                _, factors = decomposition.parafac(cube, rank=rank, init='random')
            print(f"Factors Saved to {path}\n")
            saved_model = open(path, 'wb')
            pickle.dump(factors, saved_model)
            saved_model.close()
        else:
            with open(input('Enter file path: '), 'rb') as f:
                factors = pickle.load(f)
                f.close()
                print()
        
        A, B, C = factors
        if decomp == '1':
            A, B, C, = np.array(A), np.array(B), np.array(C)
        elif decomp == '2':
            A, B, C = A.todense(), B.todense(), C.todense()
            

        errors = []
        print("Calculating Reconstruction Errors...")
        for gs in tqdm(padded_gs):
            if decomp == '1':
                gs_p = (A @ ((A.T @ gs) @ B) @ B.T)
            elif decomp == '2':
                gs_p = (A @ ((np.linalg.pinv(A) @ gs) @ B) @ np.linalg.pinv(B))
            d = np.linalg.norm(gs - gs_p, ord='fro')
            errors.append(d)

        errors = np.array(errors).reshape(-1, 1)

        path = input('Enter file name to save reconstruction errors: ')
        saved_model = open(path, 'wb')
        pickle.dump(errors, saved_model)
        saved_model.close()
        print()

    # loading previously calculated reconstruction errors
    else:
        with open(input('Enter file path: '), 'rb') as f:
            errors = pickle.load(f)
            f.close()    
            print()    

    scale = MinMaxScaler()
    embeddings = scale.fit_transform(np.array(errors))

    scores.append(('No Model', rank, roc_auc_score(labels, embeddings)))


USING RANK 10


Calculating Reconstruction Errors...


  0%|          | 0/5196 [00:00<?, ?it/s]




In [None]:
for name, auc in scores:
    print(f'Model: {name}, AUC score: {auc}')