In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
import time
from tqdm import tqdm
from scipy.stats import truncnorm

import torch
import torch.nn as nn
from torch import Tensor
from torch_sparse import SparseTensor, matmul, SparseStorage
# from torch_sparse.tensor import SparseTensor
from torch_geometric.nn.dense.linear import Linear
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.typing import OptTensor, PairTensor
from torch_geometric.data import Data
from torch.utils.tensorboard import SummaryWriter
from torch_scatter import scatter

from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_curve,
    precision_recall_curve,
    auc,
)
from typing import Tuple, Union, Dict, Optional

In [2]:
# import necessary functions and classes from GraphBEAN.
# You can clone their repo here: https://github.com/grab/GraphBEAN/tree/master

from models.data import BipartiteData
from models.net import GraphBEAN
from models.sampler import EdgePredictionSampler
from models.loss import reconstruction_loss
from models.score import compute_anomaly_score, edge_prediction_metric

from anomaly_insert import (
    choose,
    dense_block,
    inject_dense_block_anomaly,
    inject_dense_block_and_feature_anomaly,
    inject_feature_anomaly,
    inject_random_block_anomaly,
    outside_confidence_interval,
    scaled_gaussian_noise
)

# Import Data

In [None]:
df_in = pd.read_csv('./dataset/Train_Inpatientdata.csv')

In [None]:
print(df_in.shape)
df_in.head()

(40474, 30)


Unnamed: 0,BeneID,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,AdmissionDt,...,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,ClmProcedureCode_5,ClmProcedureCode_6
0,BENE11001,CLM46614,2009-04-12,2009-04-18,PRV55912,26000,PHY390922,,,2009-04-12,...,2724.0,19889.0,5849.0,,,,,,,
1,BENE11001,CLM66048,2009-08-31,2009-09-02,PRV55907,5000,PHY318495,PHY318495,,2009-08-31,...,,,,,7092.0,,,,,
2,BENE11001,CLM68358,2009-09-17,2009-09-20,PRV56046,5000,PHY372395,,PHY324689,2009-09-17,...,,,,,,,,,,
3,BENE11011,CLM38412,2009-02-14,2009-02-22,PRV52405,5000,PHY369659,PHY392961,PHY349768,2009-02-14,...,25062.0,40390.0,4019.0,,331.0,,,,,
4,BENE11014,CLM63689,2009-08-13,2009-08-30,PRV56614,10000,PHY379376,PHY398258,,2009-08-13,...,5119.0,29620.0,20300.0,,3893.0,,,,,


In [None]:
df_train = pd.read_csv("./dataset/Train.csv")

In [None]:
print(df_train.shape)
df_train.head()

(5410, 2)


Unnamed: 0,Provider,PotentialFraud
0,PRV51001,No
1,PRV51003,Yes
2,PRV51004,No
3,PRV51005,Yes
4,PRV51007,No


In [None]:
df = pd.merge(
    df_in,
    df_train,
    on="Provider",
    how="left"
)

In [None]:
print(df.shape)
df.head()

(40474, 31)


Unnamed: 0,BeneID,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,AdmissionDt,...,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,ClmProcedureCode_5,ClmProcedureCode_6,PotentialFraud
0,BENE11001,CLM46614,2009-04-12,2009-04-18,PRV55912,26000,PHY390922,,,2009-04-12,...,19889.0,5849.0,,,,,,,,Yes
1,BENE11001,CLM66048,2009-08-31,2009-09-02,PRV55907,5000,PHY318495,PHY318495,,2009-08-31,...,,,,7092.0,,,,,,No
2,BENE11001,CLM68358,2009-09-17,2009-09-20,PRV56046,5000,PHY372395,,PHY324689,2009-09-17,...,,,,,,,,,,No
3,BENE11011,CLM38412,2009-02-14,2009-02-22,PRV52405,5000,PHY369659,PHY392961,PHY349768,2009-02-14,...,40390.0,4019.0,,331.0,,,,,,No
4,BENE11014,CLM63689,2009-08-13,2009-08-30,PRV56614,10000,PHY379376,PHY398258,,2009-08-13,...,29620.0,20300.0,,3893.0,,,,,,No


In [None]:
# Replace string with 1's and 0's for PotentialFraud column
df.loc[df["PotentialFraud"] == "Yes", "PotentialFraud"] = 1
df.loc[df["PotentialFraud"] == "No", "PotentialFraud"] = 0

In [None]:
df["PotentialFraud"].value_counts(dropna=False, normalize=True)

1    0.578198
0    0.421802
Name: PotentialFraud, dtype: float64

# Prepare Data

## Edge

In [None]:
df_edge = df.groupby(["BeneID", "AttendingPhysician"]).agg(
    count_claims=("ClaimID", "nunique"),
    count_providers=("Provider", "nunique"),
    total_reimbursed=("InscClaimAmtReimbursed", "sum"),
    avg_reimbursed=("InscClaimAmtReimbursed", "mean"),
    pct_fraud=("PotentialFraud", lambda x: round(x.mean() * 100))
).reset_index()

In [None]:
print(df_edge.shape)
df_edge.head()

(38559, 7)


Unnamed: 0,BeneID,AttendingPhysician,count_claims,count_providers,total_reimbursed,avg_reimbursed,pct_fraud
0,BENE100002,PHY424317,1,1,12000,12000.0,100
1,BENE100004,PHY319940,1,1,3000,3000.0,0
2,BENE100006,PHY325217,1,1,17000,17000.0,100
3,BENE100007,PHY415056,1,1,4000,4000.0,100
4,BENE100010,PHY403299,1,1,8000,8000.0,0


## Beneficiary

In [None]:
df_bene = df.groupby("BeneID").agg(
    count_claims=("ClaimID", "nunique"),
    count_physicians=("AttendingPhysician", "nunique"),
    count_providers=("Provider", "nunique"),
    total_reimbursed=("InscClaimAmtReimbursed", "sum"),
    avg_reimbursed=("InscClaimAmtReimbursed", "mean"),
    pct_fraud=("PotentialFraud", lambda x: round(x.mean() * 100))
).reset_index()

In [None]:
print(df_bene.shape)
df_bene.head()

(31289, 7)


Unnamed: 0,BeneID,count_claims,count_physicians,count_providers,total_reimbursed,avg_reimbursed,pct_fraud
0,BENE100002,1,1,1,12000,12000.0,100
1,BENE100004,1,1,1,3000,3000.0,0
2,BENE100006,1,1,1,17000,17000.0,100
3,BENE100007,1,1,1,4000,4000.0,100
4,BENE100010,2,2,1,12000,6000.0,0


## Physician

In [None]:
df_physician = df.groupby("AttendingPhysician").agg(
    count_claims=("ClaimID", "nunique"),
    count_beneficiaries=("BeneID", "nunique"),
    count_providers=("Provider", "nunique"),
    total_reimbursed=("InscClaimAmtReimbursed", "sum"),
    avg_reimbursed=("InscClaimAmtReimbursed", "mean"),
    pct_fraud=("PotentialFraud", lambda x: round(x.mean() * 100))
).reset_index()

In [None]:
print(df_physician.shape)
df_physician.head()

(11604, 7)


Unnamed: 0,AttendingPhysician,count_claims,count_beneficiaries,count_providers,total_reimbursed,avg_reimbursed,pct_fraud
0,PHY311002,1,1,1,3000,3000.0,100
1,PHY311023,1,1,1,6000,6000.0,0
2,PHY311028,1,1,1,11000,11000.0,100
3,PHY311035,1,1,1,9000,9000.0,0
4,PHY311056,1,1,1,57000,57000.0,0


# Create Graph

In [None]:
df_bene["bid"] = df_bene.index
df_physician["pid"] = df_physician.index

In [None]:
df_bene_id = df_bene[["BeneID", "bid"]]
df_physician_id = df_physician[["AttendingPhysician", "pid"]]

In [None]:
df_edge_2 = df_edge.merge(
    df_bene_id,
    on="BeneID"
).merge(df_physician_id, on="AttendingPhysician")
df_edge_2 = df_edge_2.sort_values(["bid","pid"])

In [None]:
bid = torch.tensor(df_edge_2["bid"].to_numpy())
pid = torch.tensor(df_edge_2["pid"].to_numpy())

In [None]:
adj = SparseTensor(row=bid, col=pid)

In [None]:
def standardize(features: np.ndarray) -> np.ndarray:
    scaler = StandardScaler()
    results = scaler.fit_transform(features)
    return results

In [None]:
df_edge_2.head()

Unnamed: 0,BeneID,AttendingPhysician,count_claims,count_providers,total_reimbursed,avg_reimbursed,pct_fraud,bid,pid
0,BENE100002,PHY424317,1,1,12000,12000.0,100,0,10661
46,BENE100004,PHY319940,1,1,3000,3000.0,0,1,788
49,BENE100006,PHY325217,1,1,17000,17000.0,100,2,1317
66,BENE100007,PHY415056,1,1,4000,4000.0,100,3,9801
77,BENE100010,PHY403299,1,1,8000,8000.0,0,4,8627


In [None]:
# Encode attributes of edge as tensors
edge_attr = torch.tensor(
    standardize(
        df_edge_2.iloc[:, 2: -2].to_numpy()
    )
).float()

In [None]:
# Encode attributes of beneficiaries as tensors
bene_attr = torch.tensor(
    standardize(
        df_bene.iloc[:, 1: -1].to_numpy()
    )
).float()

In [None]:
# Encode attributes of physicians as tensors
physician_attr = torch.tensor(
    standardize(
        df_physician.iloc[:, 1: -1].to_numpy()
    )
).float()

In [None]:
data = BiPartiteData(adj, xu=bene_attr, xv=physician_attr, xe=edge_attr)

In [None]:
data

BiPartiteData(adj=[31289, 11604, nnz=38559], xu=[31289, 6], xv=[11604, 6], xe=[38559, 5])

### Inject anomaly

If name includes "anomaly", call inject_random_block_anomaly() to add random anomalies.

The function adds anomalies such as variations in block structure,features, node/edge features and random variations in number of nodes.

#### Inject_random_block_anomaly

In [None]:
block_anomalies = ["full_dense_block", "partial_full_dense_block"]
feature_anomalies = ["outside_ci", "scaled_gaussian", "none"]
node_edge_feat_anomalies = ["node_only", "edge_only", "node_edge"]

block_anomalies_weight = [0.2, 0.8]
feature_anomalies_weight = [0.5, 0.4, 0.1]
node_edge_feat_anomalies_weight = [0.1, 0.3, 0.6]

In [None]:
num_graph = 5
num_group = 20
num_nodes_range=(1, 20)
num_nodes_range2=(1, 6)

In [None]:
data_new = BipartiteData(data.adj, xu=data.xu, xv=data.xv, xe=data.xe)

In [None]:
data_new

BipartiteData(adj=[31289, 11604, nnz=38559], xu=[31289, 6], xv=[11604, 6], xe=[38559, 5])

In [None]:
# code copied from "inject_random_block_anomaly()" in GraphBEAN's anomaly_insert


for itg in range(num_group):
    print(f"it {itg}: ", end="")

    # prints 3 random floats between 0 to 1 with 4d.p.
    rnd = torch.rand(3)
    # Using first random float, choose between block_anomalies choices with probability = weight
    block_an = choose(rnd[0], block_anomalies, block_anomalies_weight)
    # choose feature anomalies using second random float, choices for feature anomalies and weights
    feature_an = choose(rnd[1], feature_anomalies, feature_anomalies_weight)
    # choose node_edge_feature anomalies using third random float, choices and weights
    node_edge_an = choose(rnd[2], node_edge_feat_anomalies, node_edge_feat_anomalies_weight)

    # lr = min in range, rr = max in range, mr = median in range
    lr, rr, mr = (
        num_nodes_range[0],
        num_nodes_range[1],
        num_nodes_range[0] + num_nodes_range[1] / 2,
    )
    if num_nodes_range2 is not None:
        # generate random integers in range
        nn1 = int(
            np.minimum(
                # select minimum between output from previous function and max_range + 1
                np.maximum(
                    # select max between low_range and range tensor + median_range
                    lr,
                    # a tensor filled with random number from normal distribution
                    # .item() grabs item inside tensor
                    (torch.randn(1).item() * np.sqrt(mr)) + mr
                ),
                rr + 1
            )
        )

        # get node ranges from num_nodes_range2
        lr2, rr2, mr2 = (
            num_nodes_range2[0],
            num_nodes_range2[1],
            num_nodes_range2[0] + num_nodes_range2[1] / 2,
        )
        # generate random integers in range from num_nodes_range2
        nn2 = int(
            np.minimum(
                np.maximum(
                    lr2,
                    (torch.randn(1).item() * np.sqrt(mr2)) + mr
                ),
                rr2 + 1
            )
        )
        num_nodes = (nn1, nn2)

        # setup kwargs
        connected_prop = 1.0
        if block_an == "partial_full_dense_block":
            # generate random prob between 0.2 and 1.0
            connected_prop = np.minimum(
                np.maximum(
                    0.2,
                    (torch.randn(1).item() / 4) + 0.5
                ),
                1.0
            )
        # generate random prob between 0.1 to 0.9
        prop_feat = np.minimum(
            np.maximum(
                0.1,
                (torch.randn(1).item() / 8) + 0.3
            ),
            0.9
        )
        # generate random float between 2.0 to 3 + random float from std normal dist
        std_cutoff = np.maximum(
            2.0,
            torch.randn(1).item() + 3.0
        )
        scale = np.maximum(
            2.0,
            torch.randn(1).item() + 3.0
        )

        # inject anomaly
        node_feature_anomaly = None
        if block_an != "none" and feature_an != "none":
            node_feature_anomaly = False if node_edge_an == "edge_only" else True
            edge_feature_anomaly = False if node_edge_an == "node_only" else True

            if feature_an == "outside_ci":
                data_new = inject_dense_block_and_feature_anomaly(
                    data_new,
                    node_feature_anomaly,
                    edge_feature_anomaly,
                    num_group=1,
                    num_nodes=num_nodes,
                    connected_prop=connected_prop,
                    feature_anomaly_type="outside_ci",
                    prop_feat=prop_feat,
                    std_cutoff=std_cutoff
                )
            elif feature_an == "scaled_gaussian":
                data_new = inject_dense_block_and_feature_anomaly(
                    data_new,
                    node_feature_anomaly,
                    edge_feature_anomaly,
                    num_group=1,
                    num_nodes=num_nodes,
                    connected_prop=connected_prop,
                    feature_anomaly_type="scaled_gaussian",
                    scale=scale,
                )
        elif block_an != "none" and feature_an == "none":
            data_new = inject_dense_block_anomaly(
                data_new,
                num_group=1,
                num_nodes=num_nodes,
                connected_prop=connected_prop
            )

        elif block_an == "none" and feature_an != "none":
            node_anomaly = False if node_edge_an == "edge_only" else True
            edge_anomaly = False if node_edge_an == "node_only" else True

            if feature_an == "outside_ci":
                data_new = inject_feature_anomaly(
                    data_new,
                    node_anomaly,
                    edge_anomaly,
                    feature_anomaly_type="outside_ci",
                    prop_feat=prop_feat,
                    std_cutoff=std_cutoff,
                )

            elif feature_an == "scaled_gaussian":
                data_new = inject_feature_anomaly(
                    data_new,
                    node_anomaly,
                    edge_anomaly,
                    feature_anomaly_type="scaled_gaussian",
                    scale=scale,
                )

        print(
            f"affected: yu = {data_new.yu.sum()}, yv = {data_new.yv.sum()}, ye = (data_new.ye.sum()) ",
            end="",
        )
        print(
            f"[{block_an}: {connected_prop:.2f}, {feature_an}, {num_nodes}, {node_feature_anomaly}]"
        )


it 0: affected: yu = 49, yv = 34, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.61, outside_ci, (13, 7), True]
it 1: affected: yu = 61, yv = 41, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.49, outside_ci, (12, 7), True]
it 2: affected: yu = 75, yv = 48, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.95, outside_ci, (14, 7), False]
it 3: affected: yu = 90, yv = 55, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.61, scaled_gaussian, (15, 7), False]
it 4: affected: yu = 103, yv = 62, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.38, outside_ci, (13, 7), False]
it 5: affected: yu = 111, yv = 69, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.47, scaled_gaussian, (8, 7), True]
it 6: affected: yu = 120, yv = 76, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.37, scaled_gaussian, (9, 7), True]
it 7: affected: yu = 130, yv = 83, ye = (data_new.ye.sum()) [partial_full_dense_block: 0.65, outside_ci, (10, 7), False]
it 8: affected: yu = 142, y

# Modeling

In [None]:
u_ch = data_new.xu.shape[1]
v_ch = data_new.xv.shape[1]
e_ch = data_new.xe.shape[1]

print(f"Data dimensions: U = {data_new.xu.shape}; V = {data_new.xv.shape}; E = {data_new.xe.shape}")

Data dimensions: U = torch.Size([31289, 6]); V = torch.Size([11604, 6]); E = torch.Size([39604, 5])


In [None]:
"""
Dynamically choose computing device depending on availability of GPUs.
Device repesents the device where the code will run.
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### GraphBEAN

## Define variables

In [None]:
in_channels = (u_ch, v_ch, e_ch)
hidden_channels = make_tuple(32)
latent_channels = make_tuple(32, 2)
out_channels = make_tuple(32)
edge_pred_latent = 32
n_layers_encoder = 2
n_layers_decoder = 2
n_layers_mlp = 2
dropout_prob = 0.0
lr = 0.01
scheduler_milestone = []
gamma = 0.2

node_self_loop = False
normalize = True
bias=True
input_has_edge_channel = len(in_channels) == 3
output_has_edge_channel = len(out_channels) == 3

## Train Models

In [None]:
print(f"Data dimensions: \n\tU nodes = {data_new.xu.shape}; \n\tV nodes = {data_new.xv.shape}; \n\tE edge = {data_new.xe.shape}")

Data dimensions: 
	U nodes = torch.Size([31289, 6]); 
	V nodes = torch.Size([11604, 6]); 
	E edge = torch.Size([39604, 5])


In [None]:
model = GraphBEAN(
    in_channels=in_channels,
    hidden_channels=hidden_channels,
    latent_channels=latent_channels,
    edge_pred_latent=edge_pred_latent,
    n_layers_encoder=n_layers_encoder,
    n_layers_decoder=n_layers_decoder,
    n_layers_mlp=n_layers_mlp,
    dropout_prob=dropout_prob
)

model

GraphBEAN(
  (encoder_convs): ModuleList(
    (0): BEANConv((6, 6, 5), (32, 32, 32))
    (1): BEANConv((32, 32, 32), (32, 32))
  )
  (decoder_convs): ModuleList(
    (0): BEANConv((32, 32), (32, 32, 32))
    (1): BEANConv((32, 32, 32), (6, 6, 5))
  )
  (u_mlp_layers): ModuleList(
    (0): Linear(32, 32, bias=True)
    (1): Linear(32, 32, bias=True)
  )
  (v_mlp_layers): ModuleList(
    (0): Linear(32, 32, bias=True)
    (1): Linear(32, 32, bias=True)
  )
)

In [None]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)

In [None]:
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=scheduler_milestone, gamma=gamma
)

In [None]:
xu, xv = data_new.xu.to(device), data_new.xv.to(device)
xe, adj = data_new.xe.to(device), data_new.adj.to(device)

## Train Function

In [None]:
model.train()

GraphBEAN(
  (encoder_convs): ModuleList(
    (0): BEANConv((6, 6, 5), (32, 32, 32))
    (1): BEANConv((32, 32, 32), (32, 32))
  )
  (decoder_convs): ModuleList(
    (0): BEANConv((32, 32), (32, 32, 32))
    (1): BEANConv((32, 32, 32), (6, 6, 5))
  )
  (u_mlp_layers): ModuleList(
    (0): Linear(32, 32, bias=True)
    (1): Linear(32, 32, bias=True)
  )
  (v_mlp_layers): ModuleList(
    (0): Linear(32, 32, bias=True)
    (1): Linear(32, 32, bias=True)
  )
)

### Sampler

EdgePredictionSampler class is used to generate samples for edge prediction tasks in a graph for negative sampling. It creates a set of negative samples by randomly selecting edges that don't exist in input adjacency matrix.

The purpose is to **generate negative samples** for edge prediction tasks, e.g. those used in GCN where positive samples are existing edges and negative samples are randomly sampled non-existing edges. **The generated samples are used as negative examples during training (to handle class imbalance that can lead to biases, slow convergence and high computational costs).**

#### Sprand method

In [None]:
# Get size of adj (adjacency matrix)
nu, nv = adj.sparse_sizes()

# random samples to generate = multiple of # of positive samples in adjacency matrix
n_random = 2 * adj.nnz()

row = torch.randint(nu, (n_random,))
col = torch.randint(nv, (n_random,))

In [None]:
storage = SparseStorage(row=row, col=col, sparse_sizes=(nu, nv))
storage = storage.coalesce(reduce="max")

In [None]:
# Generate negative examples using sparse random values and sets values to -1
rnd_samples = SparseTensor.from_storage(storage)
rnd_samples = rnd_samples.fill_value(-1)
rnd_samples = rnd_samples.to(adj.device())

In [None]:
rnd_samples

SparseTensor(row=tensor([    0,     2,     2,  ..., 31288, 31288, 31288]),
             col=tensor([ 2975,  6152,  8638,  ...,   903,  5551, 10455]),
             val=tensor([-1, -1, -1,  ..., -1, -1, -1]),
             size=(31289, 11604), nnz=79202, density=0.02%)

In [None]:
# fill adjacency matrix with value of 2 as positive samples (assumed value for positive samples)
pos_samples = adj.fill_value(2)

In [None]:
pos_samples

SparseTensor(row=tensor([    0,     1,     2,  ..., 31286, 31287, 31288]),
             col=tensor([10661,   788,  1317,  ..., 10271,  9699,  9519]),
             val=tensor([2, 2, 2,  ..., 2, 2, 2]),
             size=(31289, 11604), nnz=39604, density=0.01%)

#### Spadd Method

This function is used to combine random and positive samples by concatenating their row, column and value tensors to create a new sparse tensor.

Then, the values of the combined samples are set to the minimum of {current values, 1}.

So, resulting tensor contains negative samples with -1 values and positive samples with 1 values

In [None]:
assert rnd_samples.sparse_sizes() == pos_samples.sparse_sizes()

In [None]:
m, n = rnd_samples.sparse_sizes()
print(m, n)

31289 11604


In [None]:
# Concatenate sequence of tensors
row = torch.cat([rnd_samples.storage.row(), pos_samples.storage.row()], dim=-1)
col = torch.cat([rnd_samples.storage.col(), pos_samples.storage.col()], dim=-1)
value = torch.cat([rnd_samples.storage.value(), pos_samples.storage.value()], dim=-1)

print(row, col, value)

tensor([    0,     2,     2,  ..., 31286, 31287, 31288]) tensor([ 2975,  6152,  8638,  ..., 10271,  9699,  9519]) tensor([-1, -1, -1,  ...,  2,  2,  2])


In [None]:
storage = SparseStorage(row=row, col=col, value=value, sparse_sizes=(m, n))
storage = storage.coalesce(reduce="add")

In [None]:
samples = SparseTensor.from_storage(storage)

In [None]:
samples

SparseTensor(row=tensor([    0,     0,     1,  ..., 31288, 31288, 31288]),
             col=tensor([ 2975, 10661,   788,  ...,  5551,  9519, 10455]),
             val=tensor([-1,  2,  2,  ..., -1,  2, -1]),
             size=(31289, 11604), nnz=118796, density=0.03%)

#### Set value

In [None]:
samples.storage.value()

tensor([-1,  2,  2,  ..., -1,  2, -1])

In [None]:
torch.ones_like(samples.storage.value())

tensor([1, 1, 1,  ..., 1, 1, 1])

In [None]:
torch.minimum(samples.storage.value(), torch.ones_like(samples.storage.value()))

tensor([-1,  1,  1,  ..., -1,  1, -1])

In [None]:
edge_pred_samples = samples.set_value_(
    # Take the min of tensor values from samples or 1
    torch.minimum(samples.storage.value(), torch.ones_like(samples.storage.value())),
    layout="coo"
)

In [None]:
# Is edge_pred_samples just a sampled/ randomized version of adj?
edge_pred_samples

SparseTensor(row=tensor([    0,     0,     1,  ..., 31288, 31288, 31288]),
             col=tensor([ 2975, 10661,   788,  ...,  5551,  9519, 10455]),
             val=tensor([-1,  1,  1,  ..., -1,  1, -1]),
             size=(31289, 11604), nnz=118796, density=0.03%)

In [None]:
adj

SparseTensor(row=tensor([    0,     1,     2,  ..., 31286, 31287, 31288]),
             col=tensor([10661,   788,  1317,  ..., 10271,  9699,  9519]),
             size=(31289, 11604), nnz=39604, density=0.01%)

### Optimizer

In [None]:
# Set gradients of all optimized tensors in optimizer to 0
optimzer.zero_grad()

### Model Output

In [None]:
out = model(xu, xv, xe, adj, edge_pred_samples)

out

{'xu': tensor([[-1.1299, -0.5371,  1.3822, -0.0695, -0.8118, -0.2574],
         [ 0.3705,  0.2595, -0.6070,  0.7119,  0.0336, -0.0526],
         [-0.4676,  0.0101,  0.5994,  0.1599,  0.0699, -0.2593],
         ...,
         [ 0.3586,  0.0052,  0.3125,  0.4646,  0.3878, -0.2834],
         [-0.4865, -0.0712,  1.5861, -0.1915,  0.6385,  0.5243],
         [-1.1713, -1.2177,  1.6912, -0.6179, -0.4070, -0.8524]],
        grad_fn=<NativeBatchNormBackward0>),
 'xv': tensor([[-0.0701,  0.6556,  0.3961,  0.0722, -1.1396,  0.9028],
         [ 0.5342, -0.3572,  0.3545,  0.4283, -0.4329,  0.7224],
         [ 0.4972,  0.0519,  0.4546,  0.5205, -0.8659,  0.6476],
         ...,
         [ 0.2775, -0.0249,  0.1434,  0.0439,  0.8636,  0.5784],
         [ 0.0469,  0.4745,  0.3947,  0.2023, -1.0996,  0.8779],
         [ 0.6132, -0.3767,  0.7363,  0.9775,  0.9059,  0.0291]],
        grad_fn=<NativeBatchNormBackward0>),
 'xe': tensor([[ 1.0431,  0.4862,  0.7719, -0.9827, -0.3857],
         [ 0.6909, -0.6962

### Loss

In [None]:
xe_loss_weight = 1
structure_loss_weight = 1

In [None]:
# Feature MSE
xu_loss = nn.functional.mse_loss(xu, out["xu"])
xv_loss = nn.functional.mse_loss(xv, out["xv"])
xe_loss = nn.functional.mse_loss(xe, out["xe"])

feature_loss = xu_loss + xv_loss + xe_loss_weight * xe_loss

print(feature_loss)

tensor(6.1822, grad_fn=<AddBackward0>)


In [None]:
# Structure Loss
edge_gt = (edge_pred_samples.storage.value() > 0).float()
structure_loss = nn.functional.binary_cross_entropy(out["eprob"], edge_gt)

print(structure_loss)

tensor(0.7000, grad_fn=<BinaryCrossEntropyBackward0>)


In [None]:
loss = feature_loss + structure_loss_weight * structure_loss

print(loss)

tensor(6.8822, grad_fn=<AddBackward0>)


In [None]:
loss_component = {
    "xu": xu_loss,
    "xv": xv_loss,
    "xe": xe_loss,
    "e": structure_loss,
    "total": loss,
}

print(loss_component)

{'xu': tensor(1.9850, grad_fn=<MseLossBackward0>), 'xv': tensor(2.0911, grad_fn=<MseLossBackward0>), 'xe': tensor(2.1060, grad_fn=<MseLossBackward0>), 'e': tensor(0.7000, grad_fn=<BinaryCrossEntropyBackward0>), 'total': tensor(6.8822, grad_fn=<AddBackward0>)}


### Back Propagation

In [None]:
# Initiate backpropagation in PyTorch to compute gradients with respect to model parameters
loss.backward()

### Update Model Parameters

In [None]:
"""
Call optimzer's step to perform a parameter update to improve model performance.
Optimizer (e.g. stochastic gradient descent, Adam etc.) uses computed gradients to adjust model parameters
in the direction that minimizes loss.
"""
optimzer.step()

In [None]:
"""
Adjust learning rate during training according to the predefined schedule.
Learning rate schedulers are useful for improving training stability and convergence.
"""
scheduler.step()



### Edge_prediction_metric

In [None]:
edge_pred = (out["eprob"] >= 0.5).int().cpu().numpy()

print(edge_pred)

[1 1 0 ... 1 1 1]


In [None]:
edge_gt = (edge_pred_samples.storage.value() > 0).int().cpu().numpy()

print(edge_gt)

[0 1 1 ... 0 1 0]


In [None]:
acc = accuracy_score(edge_gt, edge_pred)
prec = precision_score(edge_gt, edge_pred)
rec = recall_score(edge_gt, edge_pred)
f1 = f1_score(edge_gt, edge_pred)

In [None]:
result = {
    "acc": acc,
    "prec": prec,
    "rec": rec,
    "f1": f1
}

print(result)

{'acc': 0.5030977473989023, 'prec': 0.3364649627908543, 'rec': 0.5045954954045045, 'f1': 0.4037253277843996}


# Evaluate Model

In [None]:
yu = data_new.yu
yv = data_new.yv
ye = data_new.ye

print(f"yu: {yu.shape}, yv: {yv.shape}, ye: {ye.shape}")

yu: torch.Size([31289]), yv: torch.Size([11604]), ye: torch.Size([39604])


In [None]:
# Disable gradient descent
with torch.no_grad():
    out = model(xu, xv, xe, adj, edge_pred_samples)

    # Calculate loss
    xu_loss = nn.functional.mse_loss(xu, out["xu"])
    xv_loss = nn.functional.mse_loss(xv, out["xv"])
    xe_loss = nn.functional.mse_loss(xe, out["xe"])

    feature_loss = xu_loss + xv_loss + xe_loss_weight * xe_loss

    edge_gt = (edge_pred_samples.storage.value() > 0).float()
    structure_loss = nn.functional.binary_cross_entropy(out["eprob"], edge_gt)

    loss = feature_loss + structure_loss_weight * structure_loss

    loss_component = {
        "xu": xu_loss,
        "xv": xv_loss,
        "xe": xe_loss,
        "e": structure_loss,
        "total": loss,
    }

    # edge prediciton metric
    edge_pred = (out["eprob"] >= 0.5).int().cpu().numpy()
    edge_gt = (edge_pred_samples.storage.value() > 0).int().cpu().numpy()

    acc = accuracy_score(edge_gt, edge_pred)
    prec = precision_score(edge_gt, edge_pred)
    rec = recall_score(edge_gt, edge_pred)
    f1 = f1_score(edge_gt, edge_pred)

    epred_metric = {
        "acc": acc,
        "prec": prec,
        "rec": rec,
        "f1": f1
    }

    anomaly_score = compute_anomaly_score(
        xu,
        xv,
        xe,
        adj,
        edge_pred_samples,
        out,
        xe_loss_weight,
        structure_loss_weight,
    )
    eval_metrics = compute_evaluation_metrics(
        anomaly_score, yu, yv, ye, agg="max"
    )

In [None]:
print(
    f"Eval, loss: {loss:.4f}, ",
    f"u auc-roc: {eval_metrics['u_roc_auc']:.4f}, v auc-roc: {eval_metrics['v_roc_auc']:.4f}, e auc-roc: {eval_metrics['e_roc_auc']:.4f},"
    f"u auc-pr: {eval_metrics['u_pr_auc']:.4f}, v auc-pr: {eval_metrics['v_pr_auc']:.4f}, e auc-pr: {eval_metrics['e_pr_auc']:.4f}"
)

Eval, loss: 6.8822,  u auc-roc: 0.9499, v auc-roc: 0.9681, e auc-roc: 0.9331,u auc-pr: 0.0829, v auc-pr: 0.1910, e auc-pr: 0.2238


In [None]:
model_stored = {
    "loss": loss,
    "loss_component": loss_component,
    "epred_metric": epred_metric,
    "eval_metrics": eval_metrics,
#     "loss_hist": loss_hist,
#     "loss_component_hist": loss_component_hist,
#     "epred_metric_hist": epred_metric_hist,
    "state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
}

model_stored

{'loss': tensor(6.8822),
 'loss_component': {'xu': tensor(1.9850),
  'xv': tensor(2.0911),
  'xe': tensor(2.1060),
  'e': tensor(0.7000),
  'total': tensor(6.8822)},
 'epred_metric': {'acc': 0.5030977473989023,
  'prec': 0.3364649627908543,
  'rec': 0.5045954954045045,
  'f1': 0.4037253277843996},
 'eval_metrics': {'u_roc_curve': (array([0.00000000e+00, 3.21957502e-05, 2.57566001e-04, ...,
          9.99871217e-01, 9.99967804e-01, 1.00000000e+00]),
   array([0., 0., 0., ..., 1., 1., 1.]),
   array([68.040634 , 67.040634 , 21.329073 , ...,  1.2298568,  1.2289169,
           1.2007246], dtype=float32)),
  'u_pr_curve': (array([0.02964785, 0.02952221, 0.02952985, ..., 0.        , 0.        ,
          1.        ]),
   array([1.        , 0.99563319, 0.99563319, ..., 0.        , 0.        ,
          0.        ]),
   array([ 3.1530578,  3.1530614,  3.1540833, ..., 25.04745  , 66.25704  ,
          67.040634 ], dtype=float32)),
  'u_roc_auc': 0.949856595348628,
  'u_pr_auc': 0.08292987170270

In [None]:
anomaly_score

{'xu_error': tensor([0.9949, 0.9385, 0.7005,  ..., 0.6952, 1.0288, 1.1901]),
 'xv_error': tensor([0.5136, 0.7549, 0.6990,  ..., 0.6257, 0.5189, 0.7509]),
 'xe_error': tensor([1.0124, 0.8771, 0.8788,  ..., 0.6671, 0.5064, 0.8901]),
 'edge_ce': tensor([0.4791, 0.8085, 0.7113,  ..., 0.5795, 0.6261, 0.4625]),
 'e_score': tensor([1.4916, 1.6857, 1.5901,  ..., 1.2466, 1.1326, 1.3526]),
 'u_score_edge_max': tensor([2.4865, 2.6242, 2.2906,  ..., 1.9417, 2.1614, 2.5426]),
 'u_score_edge_mean': tensor([2.4865, 2.6242, 2.2906,  ..., 1.9417, 2.1614, 2.5426]),
 'u_score_edge_sum': tensor([2.4865, 2.6242, 2.2906,  ..., 1.9417, 2.1614, 2.5426]),
 'v_score_edge_max': tensor([2.1359, 2.3901, 2.2753,  ..., 2.1165, 2.1122, 2.5027]),
 'v_score_edge_mean': tensor([2.1359, 2.3901, 2.2753,  ..., 2.1165, 2.1122, 2.5027]),
 'v_score_edge_sum': tensor([2.1359, 2.3901, 2.2753,  ..., 2.1165, 2.1122, 2.5027])}