In [None]:
from time import time
import logging
import os
import os.path as osp
import numpy as np
import time

import torch
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.utils import degree
from torch.autograd import Variable

import random
from torch.optim.lr_scheduler import StepLR


from utils import stat_graph, split_class_graphs, align_graphs
from utils import two_graphons_mixup, universal_svd
from graphon_estimator import universal_svd
from models import GIN,GCN
from tensorboardX import SummaryWriter

import argparse
logdir=''
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s: - %(message)s', datefmt='%Y-%m-%d')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tensorboard_writer = SummaryWriter(log_dir=logdir)



def prepare_dataset_x(dataset):
    if dataset[0].x is None:
        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max( max_degree, degs[-1].max().item() )
            data.num_nodes = int( torch.max(data.edge_index) ) + 1

        if max_degree < 2000:
            # dataset.transform = T.OneHotDegree(max_degree)

            for data in dataset:
                degs = degree(data.edge_index[0], dtype=torch.long)
                data.x = F.one_hot(degs, num_classes=max_degree+1).to(torch.float)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            for data in dataset:
                degs = degree(data.edge_index[0], dtype=torch.long)
                data.x = ( (degs - mean) / std ).view( -1, 1 )
    return dataset



def prepare_dataset_onehot_y(dataset):

    y_set = set()
    for data in dataset:
        y_set.add(int(data.y))
    num_classes = len(y_set)

    for data in dataset:
        data.y = F.one_hot(data.y, num_classes=num_classes).to(torch.float)[0]
    return dataset


def mixup_cross_entropy_loss(input, target, size_average=True):
    """Origin: https://github.com/moskomule/mixup.pytorch
    in PyTorch's cross entropy, targets are expected to be labels
    so to predict probabilities this loss is needed
    suppose q is the target and p is the input
    loss(p, q) = -\sum_i q_i \log p_i
    """
    assert input.size() == target.size()
    assert isinstance(input, Variable) and isinstance(target, Variable)
    loss = - torch.sum(input * target)
    return loss / input.size()[0] if size_average else loss




def train(model, train_loader):
    model.train()
    loss_all = 0
    graph_all = 0
    for data in train_loader:
        # print( "data.y", data.y )
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.edge_index, data.batch)
        y = data.y.view(-1, num_classes)
        loss = mixup_cross_entropy_loss(output, y)
        loss.backward()
        loss_all += loss.item() * data.num_graphs
        graph_all += data.num_graphs
        optimizer.step()
    loss = loss_all / graph_all
    return model, loss


def test(model, loader):
    model.eval()
    correct = 0
    total = 0
    loss = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.edge_index, data.batch)
        pred = output.max(dim=1)[1]
        y = data.y.view(-1, num_classes)
        loss += mixup_cross_entropy_loss(output, y).item() * data.num_graphs
        y = y.max(dim=1)[1]
        correct += pred.eq(y).sum().item()
        total += data.num_graphs
    acc = correct / total
    loss = loss / total
    return acc, loss

originaldataset=TUDataset(root="/data",name='IMDB-BINARY')
dataset=list(originaldataset)
random.shuffle(dataset)
for graph in dataset:
        graph.y = graph.y.view(-1)

#dataset = prepare_dataset_onehot_y(dataset)
dataset = prepare_dataset_x( dataset )
train_nums = int(len(dataset) * 0.8)
train_val_nums = int(len(dataset) * 0.9)

train_dataset = dataset[:train_nums]
val_dataset = dataset[train_nums:train_val_nums]
test_dataset = dataset[train_val_nums:]
batch_size=32
learning_rate=0.01
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
num_features = dataset[0].x.shape[1]
num_classes = dataset[0].y.shape[0] 

print("Num features",num_features)
print("num_classes",num_classes)
# model = GCN(num_features=num_features, num_classes=num_classes, num_hidden=64)
    


# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
# scheduler = StepLR(optimizer, step_size=100, gamma=0.5)




In [None]:
print(len(dataset))

In [None]:

torch.manual_seed(12345)
random.shuffle(dataset)

train_dataset = dataset[:800]
test_dataset = dataset[len(train_dataset):]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data.x)
    print()

In [None]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GCNEncoder(torch.nn.Module):
    def __init__(self, inputdim,hidden_channels):
        super(GCNEncoder, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(inputdim, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.bn = torch.nn.BatchNorm1d(hidden_channels)
        self.dropout = torch.nn.Dropout(0.5)
        self.leaky_relu = torch.nn.LeakyReLU(0.2)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = self.leaky_relu(x)
        x = self.conv2(x, edge_index)
        x = self.leaky_relu(x)
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x, batch)
        x = self.bn(x)
        x = F.dropout(x, p=0.5, training=self.training)
        return x

class LinearClassifier(torch.nn.Module):
    def __init__(self, input_dim, num_classes):
        super(LinearClassifier, self).__init__()
        self.linear = Linear(input_dim, num_classes)

    def forward(self, x):
        return self.linear(x)
class CombinedModel(torch.nn.Module):
    def __init__(self,inputdim, hidden_channels, num_classes):
        super(CombinedModel, self).__init__()
        self.encoder = GCNEncoder(inputdim,hidden_channels)
        self.classifier = LinearClassifier(input_dim=hidden_channels, num_classes=num_classes)

    def forward(self, x, edge_index, batch):
        # Get the embeddings from the encoder
        embeddings = self.encoder(x, edge_index, batch)

        # Get the logits from the classifier
        logits = self.classifier(embeddings)

        return embeddings, logits
inputdim=num_features
model=CombinedModel(inputdim, hidden_channels=64,num_classes=2)


In [None]:
from tqdm import tqdm
# inputdim=num_features
# model=CombinedModel(inputdim, hidden_channels=64,num_classes=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Add a learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)

def train():
    model.train()

    for epoch in tqdm(range(300)):
        for data in train_loader:  # Iterate in batches over the training dataset.
            #print(data.x)
            embedding,  out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
            #y = data.y.view(-1, 2)
            loss = criterion(out, data.y)
            #print(out)
            
            #loss = criterion(out, data.y)  # Compute the loss.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            optimizer.zero_grad()  # Clear gradients.

        # Update the learning rate scheduler
        scheduler.step()

        # Print the current learning rate every epoch (optional)
        print(f"Epoch {epoch + 1}/{num_epochs}, Learning Rate: {scheduler.get_last_lr()[0]}",loss)
        # train_acc = test(train_loader)
        # test_acc = test(test_loader)
        # print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

# Set the number of epochs
num_epochs = 300

# Call the training loop
train()



In [None]:
def test(model,dataset):
    model.eval()
    acc=0
    for data in dataset:
        data.to(device)
        model.to(device)
        emb,out=model(data.x,data.edge_index,data.batch)
        #data.y=data.y.cpu().numpy()
        #out=out.cpu().numpy()
        print(data.y,out.argmax(dim=1))
        if(out.argmax(dim=1)==data.y):
            acc+=1
    return acc/len(dataset)

In [None]:
acc=test(model,test_dataset)
print(acc)

In [None]:
 save_path='/model/imdbbinary.pth'
 torch.save(model.state_dict(), save_path)

In [None]:
load_path = '/model/imdbbinary.pth'



# Initialize the model architecture

# Load the saved model weights
model.load_state_dict(torch.load(load_path))
model.eval()

In [None]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

def plot_confusion_matrix(model, dataset, class_dict):
    """
    Evaluate the model on the provided dataset, compute the confusion matrix,
    and plot it with class names.

    Parameters:
    - model: Trained GNN model
    - dataset: List of data objects
    - class_dict: Dictionary mapping class labels to class names, e.g., {0: 'Class A', 1: 'Class B'}
    """

    # Step 1: Evaluate the model and get predictions and true labels
    model.eval()
    all_preds = []
    all_labels = []


    with torch.no_grad():
        for data in dataset:
            print("data.y of newdataset is",data.y)
            _, out = model(data.x, data.edge_index, data.batch)
            pred = out.argmax(dim=1)
            all_preds.append(pred.cpu().numpy())
            all_labels.append(data.y.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    # Step 2: Compute the confusion matrix
    conf_matrix = confusion_matrix(all_labels, all_preds)

    # Step 3: Plot the confusion matrix
    class_names = [class_dict[i] for i in range(len(class_dict))]
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", 
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.show()

# Example usage:
# Assuming the class labels are {0: 'Mutagenic', 1: 'Non-Mutagenic'}
#class_dict = {0: 'Mutagenic', 1: 'Non-Mutagenic'}

# Example dataset (assuming it's a list of data objects)
# dataset = [...]

# Call the function with the model, dataset (as a list), and class dictionary
#plot_confusion_matrix(model, dataset, class_dict)


In [None]:
imdb_class_dict={0:'Action',1:'Romance'}
reddit_class_dict={0:'Question-Answer',1:'Discussion'}
plot_confusion_matrix(model,dataset,class_dict=imdb_class_dict)

In [None]:
model.eval()
dataset2=list(originaldataset)
classifieraccuracy=0
random.shuffle(dataset2)
# for graph in dataset2:
#      graph.y = graph.y.view(-1)

# dataset = prepare_dataset_onehot_y(dataset2)
dataset2 = prepare_dataset_x( dataset2 )
num_features = dataset2[0].x.shape[1]
num_classes = dataset2[0].y.shape[0]
print(num_features)
print(num_classes)
#explain_loader= DataLoader(dataset2[:30], batch_size=1, shuffle=True)
newdataset=[]
latentdata1=[]
latentdata2=[]
model.to('cpu')
for data in dataset2: 
    #data=data.to(device)
    emb,output = model(data.x, data.edge_index,data.batch)
    #print("Output is",output)
    #print(output)
    #output=output.to("cpu")

    pred = output.argmax(dim=1)
    if(pred==data.y):
        classifieraccuracy+=1
    #print(pred)
    if (pred==0):
        data.y=torch.zeros_like(data.y)
        latentdata1.append(emb)
        #newdataset.append(data)
        
    if (pred==1):
        data.y=torch.ones_like(data.y)
        latentdata2.append(emb)
   
    newdataset.append(data)
print(len(latentdata2))

    #print("pred is",pred)
    #y = data.y.view(-1, num_classes)

In [None]:
testdata=newdataset[9]
print(testdata.y)

In [None]:
dataset=newdataset
classgraphs=split_class_graphs(dataset)

avg_num_nodes, avg_num_edges, avg_density, median_num_nodes, median_num_edges, median_density = stat_graph(dataset)
resolution = int(median_num_nodes)
#print("resolution is",resolution)
graphons=[]
for label,graphs in classgraphs:
    #print("Label is",label)
    #print("graph is",graphs[0])
    align_graphs_list, normalized_node_degrees, max_num, min_num = align_graphs(
                    graphs, padding=True, N=resolution)
    #print("Aligned adj",align_graphs_list[8].shape,align_graphs_list[56].shape)
    graphon = universal_svd(align_graphs_list, threshold=0.2)
    #print("Graphon is ",graphon.shape)

    graphons.append((label, graphon))
#two_graphons = random.sample(graphons, 2)
print(graphons)
two_graphons= [graphons[0] , graphons[1]]
print(graphons[0][0], graphons[1][0])
new_graph = two_graphons_mixup(two_graphons, la=1.0, num_sample=1,show=True)



# ng=two_graphons_mixup(two_graphons,la=1.0,num_sample=1)
# print(new_graph)
# print(ng)


IMDB BINARY

In [None]:

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv,GINConv
from torch.distributions import Bernoulli,Categorical
import matplotlib.cm as cmxplt
#print(graphons[1][1])
maxval=0.028
plt.figure(1)
plt.axis('off')
print(graphons[0][0])
plt.imshow(graphons[0][1], cmap="inferno")
plt.figure(2)
print(graphons[1][0])
plt.axis('off')
plt.imshow(graphons[1][1], cmap="inferno")


In [None]:

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv,GINConv
from torch.distributions import Bernoulli,Categorical
import matplotlib.cm as cmxplt
#print(graphons[1][1])
maxval=0.028
plt.figure(1)
plt.axis('off')
print(graphons[0][0])
plt.imshow(graphons[0][1], cmap="inferno", vmin=0.00001,vmax=maxval)
plt.figure(2)
print(graphons[1][0])
plt.axis('off')
plt.imshow(graphons[1][1], cmap="inferno", vmin=0.00001,vmax=maxval)

In [None]:
#groundtruthset=list(originaldataset)
from torch_geometric.utils import to_networkx
import networkx as nx
groundtruthset=list(originaldataset)
random.shuffle(groundtruthset)
#print(groundtruthset)
for i in range(len(groundtruthset)):
    data=groundtruthset[i]
    if(data.y==1):
        class0graph=to_networkx(data,to_undirected=True)
        class0graph.remove_edges_from(nx.selfloop_edges(class0graph))
        plt.figure(1)
        nx.draw_networkx(class0graph, node_size=15, node_color='lightblue',with_labels=False)
        break
        

In [None]:
Density=[]

In [None]:
#new_graph=[]
import time
import networkx as nx
from torch_geometric.utils import to_networkx
new_graph = two_graphons_mixup(two_graphons, la=0.0, num_sample=10)
print("Label of new graph is",new_graph[0].y)
dataset3=list(originaldataset)

newlist=list(originaldataset)+new_graph
# print(len(newlist))
# print(newlist[-1].x)
newlist=prepare_dataset_x(newlist)
explainergraph=newlist[len(dataset3):]
count=0
# print(explainergraph[0].x)
#print(new_graph[0].x,new_graph[0].y,new_graph[0].edge_index)
#max=0
maxprob=0
start_time=time.time()
for data in explainergraph:
    _,targetoutput=model(data.x,data.edge_index,None)
    #print(targetoutput)
    soft=torch.nn.Softmax(dim=1)
    problitites=soft(targetoutput)
    #print(problitites)
    targetpred = targetoutput.max(dim=1)[1]
    
    #print("Probabilities are", problitites[0][0])
    if (maxprob<problitites[0][1]):
        maxprob= problitites[0][1]
        #print(problitites)
        bestdata=data
        bestprob=maxprob

    

        print("Label of explainer graph is",targetpred)


examplegraph=to_networkx(bestdata,to_undirected=True)
examplegraph.remove_edges_from(nx.selfloop_edges(examplegraph))
numnodes=examplegraph.number_of_nodes()
numedges=examplegraph.number_of_edges()
density=numedges/(numnodes*numnodes)
Density.append(density)
print("density is",numedges/(numnodes*numnodes))
print("best probability is",bestprob)
for component in list(nx.connected_components(examplegraph)):
    if len(component)<7:
        for node in component:
            examplegraph.remove_node(node)
pos = nx.spring_layout(examplegraph, scale=20.0)
plt.figure(25)
nx.draw_networkx(examplegraph, node_size=15, node_color='lightblue',with_labels=False)
endtime=time.time()
executiontime=endtime-start_time
print("executiontime is",executiontime)
#newlist=[]




In [None]:
meandensity=np.mean(Density)
stddensity=np.std(Density)
print(meandensity,stddensity)

In [None]:
boundaryembeddings=[]

In [None]:
la= 0.4
accuracybound=[]
stdbound=[]
lalist=[]

while(la<=0.6):
    ratio= la/(1-la)

    
    boundary_graph = two_graphons_mixup(two_graphons, la=la, num_sample=500)
    boundary_graph=list(originaldataset)+boundary_graph
    boundary_graph=prepare_dataset_x(boundary_graph)
    boundary_graph=boundary_graph[len(originaldataset):]
    #print(len(boundary_graph))
    #boundary_graph= assign_same_features_to_data(boundary_graph,Alignedfeatures[0])
    #print("Label of new graph is",torch.argmax(new_graph[1].y,dim=-1))
    label=torch.argmax(boundary_graph[1].y,dim=-1)
    print(label)
    #from torch_geometric.utils import to_networkx
    
    boundaryaccuracy=[]
    for numexp in range(100):
        min1=1
        for data in boundary_graph:
            num_nodes = int( torch.max(data.edge_index) ) + 1
            #new_graph= assign_same_features_to_data(new_graph,Alignedfeatures[0])
            #data.x= torch.ones(num_nodes,1)
            embedding,out=model(data.x,data.edge_index,data.batch)
            soft=torch.nn.Softmax(dim=1)
            problities=soft(out)
            #print(problities)
            
            
            if( abs(problities[0][1]-0.5)<min1):
                #print("if")
                min1= abs(problities[0][1]-0.5)
                boundaryprobs = problities[0][1]
                latentboundary=embedding
                bestdata=data
        #plotmutag2(bestdata)
        #print(boundaryprobs)
        boundaryaccuracy.append(boundaryprobs)
        boundaryembeddings.append(latentboundary)
    boundaryaccuracy=torch.stack(boundaryaccuracy)
    accuracybound.append(boundaryaccuracy.mean(dim=0))
    stdbound.append(boundaryaccuracy.std(dim=0))
    lalist.append(la)
    la=la+0.05
    

    
        
        
  
    #print("Label of new graph is",new_graph[1].y)



# ng=two_graphons_mixup(two_graphons,la=1.0,num_sample=1)

In [None]:
print(accuracybound)


In [None]:
# Computing boundary metrics
import torch
import torch.nn.functional as F

def boundary_margin(embeddings_c1, embeddings_c2):
    """
    Compute the boundary margin.
    
    Args:
    - embeddings_c1 (torch.Tensor): Embeddings of class c1 graphs.
    - embeddings_c2 (torch.Tensor): Embeddings of boundary graphs between class c1 and c2.
    
    Returns:
    - margin (float): The boundary margin.

    """
    embeddings_c1=torch.cat(embeddings_c1,dim=0)
    embeddings_c2=torch.cat(embeddings_c2,dim=0)
    distances = torch.norm(embeddings_c1 - embeddings_c2, dim=1)
    margin = torch.min(distances).item()
    return margin

def boundary_thickness(embeddings_c1, embeddings_c1_c2, model, c1, c2, gamma=0.75, num_points=100):
    thickness_values = []

    for emb_c1, emb_c1_c2 in zip(embeddings_c1, embeddings_c1_c2):
        t_values = torch.linspace(0, 1, num_points)
        h_t = (1 - t_values).unsqueeze(1) * emb_c1 + t_values.unsqueeze(1) * emb_c1_c2
        #print(model(h_t).size())

        # Compute the logits
        logits_h_t = model(h_t)  # Assuming `model` is your classifier
        probs_h_t = F.softmax(logits_h_t, dim=1)

        # Compute the integrand
        integrand = (gamma > (probs_h_t[:, c1] - probs_h_t[:, c2])).float()

        # Approximate the integral using the trapezoidal rule
        integral = torch.trapz(integrand, t_values)

        # Compute the thickness value
        thickness_value = (emb_c1 - emb_c1_c2).norm() * integral.mean()
        thickness_values.append(thickness_value.item())

    return sum(thickness_values) / len(thickness_values)

# def boundary_complexity(embeddings, D):
#     """
#     Compute the boundary complexity.
    
#     Args:
#     - embeddings (torch.Tensor): Embeddings of the boundary graphs with shape (num_graphs, embedding_dim).
#     - D (int): Dimensionality of the embeddings.
    
#     Returns:
#     - complexity (float): The boundary complexity.
#     """
#     # Compute the covariance matrix of the embeddings
#     embeddings=torch.cat(embeddings,dim=0)
#     covariance_matrix = torch.cov(embeddings.T)
    
#     # Compute the eigenvalues of the covariance matrix
#     eigenvalues = torch.linalg.eigvalsh(covariance_matrix)
#     print(eigenvalues)
    
#     # Normalize the eigenvalues
#     eigenvalues_normalized = eigenvalues / eigenvalues.sum()
#     print(eigenvalues_normalized)
    
#     # Compute the entropy of the normalized eigenvalues
#     entropy = -torch.sum(eigenvalues_normalized * torch.log(eigenvalues_normalized + 1e-7))
#     print(entropy)
    
#     # Normalize the entropy by dividing it by log(D)
#     complexity = entropy / torch.log(torch.tensor(D, dtype=torch.float32))
    
#     return complexity.item()
def boundary_complexity(embeddings, D, epsilon=1e-7):
    """
    Compute the boundary complexity.
    
    Args:
    - embeddings (torch.Tensor): Embeddings of the boundary graphs with shape (num_graphs, embedding_dim).
    - D (int): Dimensionality of the embeddings.
    - epsilon (float): Small value added to eigenvalues to prevent log(0).
    
    Returns:
    - complexity (float): The boundary complexity.
    """
    # Flatten and concatenate embeddings
    embeddings = torch.cat(embeddings, dim=0)
    
    # Compute the covariance matrix of the embeddings
    covariance_matrix = torch.cov(embeddings.T)
    
    # Add a small value to the diagonal for regularization
    covariance_matrix += epsilon * torch.eye(covariance_matrix.size(0))
    
    # Compute the eigenvalues of the covariance matrix
    eigenvalues = torch.linalg.eigvalsh(covariance_matrix)
    
    # Clamp eigenvalues to avoid very small negative values due to numerical errors
    eigenvalues = torch.clamp(eigenvalues, min=epsilon)
    
    # Normalize the eigenvalues
    eigenvalues_normalized = eigenvalues / eigenvalues.sum()
    
    # Compute the entropy of the normalized eigenvalues
    entropy = -torch.sum(eigenvalues_normalized * torch.log(eigenvalues_normalized + epsilon))
    
    # Normalize the entropy by dividing it by log(D)
    complexity = entropy / torch.log(torch.tensor(D, dtype=torch.float32))
    
    return complexity.item()

In [None]:
latentclass=latentdata2
margin=boundary_margin(latentclass[:len(boundaryembeddings)],boundaryembeddings)
print("margin is",margin)
classifier=model.classifier

thickness=boundary_thickness(latentclass[:len(boundaryembeddings)] ,boundaryembeddings,classifier,0,1)
print(thickness)
print("thickness is",thickness)
complexity=boundary_complexity(boundaryembeddings,64)
print("complexity is",complexity)

In [None]:
la= 0.3
accuracybound=[]
stdbound=[]
lalist=[]
count=100
while(la<=0.7):
    ratio= la/(1-la)

    

    #print("Label of new graph is",torch.argmax(new_graph[1].y,dim=-1))
    # label=torch.argmax(boundary_graph[1].y,dim=-1)
    # print(label)
    #from torch_geometric.utils import to_networkx
    
    boundaryaccuracy=[]
    for numexp in range(10):
        min=1
        boundary_graph = two_graphons_mixup(two_graphons, la=la, num_sample=10)
        boundary_graph=list(originaldataset)+boundary_graph
        boundary_graph=prepare_dataset_x(boundary_graph)
        for data in boundary_graph:
            # num_nodes = int( torch.max(data.edge_index) ) + 1
            # data.x= torch.ones(num_nodes,1)
            out=model(data.x,data.edge_index,None)
            soft=torch.nn.Softmax(dim=1)
            problities=soft(out)
            if( abs(problities[0][0]-0.5)<min):
                #print("if")
                min= abs(problities[0][0]-0.5)
                boundaryprobs = problities[0][0]
 
                bestdata=data
        print("boundaryaccuracy being appended is",boundaryprobs )
        boundaryaccuracy.append(boundaryprobs)
    examplegraph=to_networkx(bestdata,to_undirected=True)
    examplegraph.remove_edges_from(nx.selfloop_edges(examplegraph))
    plt.figure(count)
    nx.draw_networkx(examplegraph, node_size=20, node_color='lightblue',with_labels=False)
    boundaryaccuracy=torch.stack(boundaryaccuracy)
    accuracybound.append(boundaryaccuracy.mean(dim=0))
    print("Mean is",boundaryaccuracy.mean(dim=0))
    print("Std is",boundaryaccuracy.std(dim=0))
    stdbound.append(boundaryaccuracy.std(dim=0))
    lalist.append(la)
    la=la+0.05
    count=count+1
    

    
        
        
  
    #print("Label of new graph is",new_graph[1].y)



# ng=two_graphons_mixup(two_graphons,la=1.0,num_sample=1)

In [None]:
# print(accuracybound)
# print(stdbound)
#stdbound2=[torch.zeros_like(stdbound[i]) for i in range(len(stdbound))]
plot_mean_with_error(accuracybound,stdbound,lalist)

In [None]:
#new_graph=[]
import networkx as nx
from torch_geometric.utils import to_networkx
new_graph = two_graphons_mixup(two_graphons, la=0.5, num_sample=100)
print("Label of new graph is",new_graph[0].y)
dataset3=list(originaldataset)

newlist=list(originaldataset)+new_graph
# print(len(newlist))
# print(newlist[-1].x)
newlist=prepare_dataset_x(newlist)
explainergraph=newlist[len(dataset3):]
count=0
# print(explainergraph[0].x)
#print(new_graph[0].x,new_graph[0].y,new_graph[0].edge_index)
#max=0
maxprob=0
for data in explainergraph:
    targetoutput=model(data.x,data.edge_index,None)
    #print(targetoutput)
    soft=torch.nn.Softmax(dim=1)
    problitites=soft(targetoutput)
    
    #print("Probabilities are", problitites[0][0])
    if (abs(maxprob -  0.5)>abs(problitites[0][1] - 0.5)):
        maxprob= problitites[0][1]
        bestdata=data
        print(problitites)

    
    targetpred = targetoutput.max(dim=1)[1]
    #print("Label of explainer graph is",targetpred)
examplegraph=to_networkx(bestdata,to_undirected=True)
plt.figure(1)
nx.draw_networkx(examplegraph, node_size=20, node_color='lightblue',with_labels=False)
#newlist=[]




In [None]:
import torch

def convert_to_one_hot(data_list, num_classes=2):
    """
    Convert the 'data.y' attribute of a list of PyTorch Geometric data objects into one-hot vectors.

    Args:
        data_list (list): A list of PyTorch Geometric data objects.
        num_classes (int): The number of classes. Default is 2.

    Returns:
        list: A list of PyTorch Geometric data objects with 'data.y' attribute converted into one-hot vectors.
    """
    for data in data_list:
        # Convert labels to one-hot encoding
        one_hot = torch.zeros((len(data.y), num_classes))
        one_hot.scatter_(1, data.y.view(-1, 1).long(), 1)
        # Replace data.y with one-hot vectors
        data.y = one_hot.float()

    return data_list


In [None]:
#explain_loader= DataLoader(dataset2[:30], batch_size=1, shuffle=True)
#newdataset=dataset

classgraphs=split_class_graphs(newdataset)
avg_num_nodes, avg_num_edges, avg_density, median_num_nodes, median_num_edges, median_density = stat_graph(newdataset)
resolution = int(median_num_nodes)-10 # This parameter controls the number of nodes in the generated explanations
mean_accuracy1=[]
std_accuracy1=[]
mean_accuracy2=[]
std_accuracy2=[]
ExplanationNodes=[]

for i in range(10):

    #print("resolution is",resolution)
    stddataset=list(originaldataset)
    graphons=[]
    for label,graphs in classgraphs:
        #print("Label is",label)
        #print("graph is",graphs[0])
        align_graphs_list, normalized_node_degrees, max_num, min_num = align_graphs(
                        graphs, padding=True, N=resolution)
        #print("Aligned adj",align_graphs_list[8].shape,align_graphs_list[56].shape)
        graphon = universal_svd(align_graphs_list, threshold=0.2)
        #print("Graphon is ",graphon.shape)

        graphons.append((label, graphon))
    #two_graphons = random.sample(graphons, 2)
    two_graphons= [graphons[0] , graphons[1]]
    #print("Label of graphon 0 is",graphons[0][0], graphons[0])
    explainer_graph1 = two_graphons_mixup(two_graphons, la=0.0, num_sample=250)
    explainer_graph2 = two_graphons_mixup(two_graphons,la=1.0, num_sample=5)
    explainer_graph1=convert_to_one_hot(explainer_graph1)
    explainer_graph2=convert_to_one_hot(explainer_graph2)

    label1=torch.argmax(explainer_graph1[0].y,dim=-1)

    label2=torch.argmax(explainer_graph2[0].y,dim=-1)
    # print(label1,explainer_graph1[0].y)
    # print(label2,explainer_graph2[0].y)


    explainer_graph1=list(originaldataset)+explainer_graph1
    explainer_graph2=list(originaldataset)+explainer_graph2
    explainer_graph1=prepare_dataset_x(explainer_graph1)
    explainer_graph2=prepare_dataset_x(explainer_graph2)

    explainer_graph1=explainer_graph1[len(list(originaldataset)):]
    explainer_graph2=explainer_graph2[len(list(originaldataset)):]

    accuracy1=[]
    accuracy2=[]

    #print("Label of new graph is",new_graph[1].y)


    for numexplanations in range(10):
        max1=0
        max2=0
        for data in explainer_graph1:
            # num_nodes = int( torch.max(data.edge_index) ) + 1
            # data.x= torch.ones(num_nodes,1)
            out=model(data.x,data.edge_index,None)
            soft=torch.nn.Softmax(dim=1)
            
            problities=soft(out)
            print("Probabilities are",problities)
            if(max1<problities[0][label1]):
                max1= problities[0][label1]
           
        for data in explainer_graph2:
            # num_nodes = int( torch.max(data.edge_index) ) + 1
            # data.x= torch.ones(num_nodes,1)
            out=model(data.x,data.edge_index,None)
            soft=torch.nn.Softmax(dim=1)
            problities=soft(out)
            if (max2<problities[0][label2]):
                max2= problities[0][label2]
        accuracy1.append(max1)
        accuracy2.append(max2)
    accuracy1=torch.stack(accuracy1)
    accuracy2=torch.stack(accuracy2)
    mean1=accuracy1.mean(dim=0)
    #print("Mean1 is", mean1)
    mean2=accuracy2.mean(dim=0)
    std1=accuracy1.std(dim=0)
    std2=accuracy2.std(dim=0)
    mean_accuracy1.append(mean1)
    mean_accuracy2.append(mean2)
    std_accuracy1.append(std1)
    std_accuracy2.append(std2)
    ExplanationNodes.append(resolution)
    resolution= resolution+1
                
            
  
            

In [None]:
print(label1)
print(mean_accuracy1)
print(mean_accuracy2)
print(std_accuracy1)

In [None]:
print(std_accuracy2)

In [None]:
plot_mean_with_error(mean_accuracy1,std_accuracy1,ExplanationNodes)
print(label1)

In [None]:
plot_mean_with_error(mean_accuracy2,std_accuracy2,ExplanationNodes)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_mean_with_error(mean, std, threshold,title=None, ax=None):
    """
    Plot mean with error bars.

    Parameters:
        mean (array_like): Array containing mean values.
        std (array_like): Array containing standard deviation values.
        threshold (array_like): Array containing threshold values.
        label (str): Label for the data.
        color (str): Color of the line.
        numsample (int): Sample number.
        ax (matplotlib.axes.Axes, optional): Axes object to plot on. If not provided, a new figure will be created.
    """
    # Flatten the arrays
    mean=torch.tensor(mean,dtype=torch.float32)
    
    std=torch.tensor(std,dtype=torch.float32)
    mean = np.array(mean).flatten()
    std = np.array(std).flatten()
    threshold = np.array(threshold).flatten()
    print("mean, std , threshold", mean,std,threshold)
    # # Select color automatically
    # colors = plt.cm.tab10(np.linspace(0, 1, 10))
    # color = colors[numsample % 10]  # Cycle through colors

    # Plotting
    if ax is None:
        fig, ax = plt.subplots()
    ax.errorbar(threshold, mean, yerr=std, fmt='-')  # '-' for line

    # Adding labels and title
    ax.set_xlabel('Lambda')
    ax.set_ylabel('Mean Class Score')
    ax.set_title(title)

    # ax.legend(loc='lower right',fontsize='small')  # Show legend
    # ax.grid(True)  # Add grid
# # Create a figure outside the function
# fig, ax = plt.subplots()
# plot_mean_with_error(Mean1,Std1,Threshold,label='class1',numsample=1,ax=ax)
# plot_mean_with_error(Mean2,Std2,Threshold,label='class1',numsample=2,ax=ax)
# plt.show()


In [None]:
print(len(list(originaldataset)))