In [None]:
import numpy as np
from scipy.stats import wasserstein_distance
from itertools import combinations

import matplotlib.pyplot as plt
import torch

In [None]:
import numpy as np
from scipy.stats import wasserstein_distance
from itertools import combinations

import matplotlib.pyplot as plt
import torch
from torch import nn

In [None]:
from sims import *

In [None]:
models = ["GCN","SAGE","GAT","CGCN"]
dataset_name='Amazon'
#dataset_name='Flickr'
#dataset_name='Cora'
#dataset_name='Pubmed'
#dataset_name='Citeseer'
path = 'model_data/'+dataset_name+'/'

In [None]:
import os

max_epochs={}
for model_name in models:
    files = os.listdir(path+model_name)
    for fv_path in files:
        #print(fv_path)
        fv_path, ext = fv_path.split(".")
        if ext != "npz":
            continue
        #print(fv_path)
        task,run_id,epoch = fv_path.split("_")
        epoch = int(epoch)
        #print(epoch)
        identifier = f"{model_name}/{task}_{run_id}_"
        
        # Update the maximum epoch for the current task and run_id combination
        if identifier not in max_epochs or epoch > max_epochs[identifier]:
            max_epochs[identifier] = epoch

In [None]:
max_epochs

In [None]:
run_ids = ["1","2"]
task = "NC"

models = ['GCN','SAGE','GAT','CGCN']

## Sanity Tests

In [None]:
torch.cuda.empty_cache()

In [None]:
# Iterate over models
heatmap_holder = {}

for model_name in models:
    print("Currently running for: ",model_name)
    heatmap_holder[model_name]={}
    identifier1 = f'{model_name}/{task}_{run_ids[0]}_'
    identifier2 = f'{model_name}/{task}_{run_ids[1]}_'
    
    fv_path1 = path+identifier1+str(max_epochs[identifier1])
    fv_path2 = path+identifier2+str(max_epochs[identifier2])
    print(fv_path1)
    print(fv_path2)
    A_data = np.load(fv_path1+".npz")
    B_data = np.load(fv_path2+".npz")
    A = dict(A_data)
    B = dict(B_data)
    l1 = len(A.keys())
    l2 = len(B.keys())
    if l1!=l2:
        break
    conv_layers = list(A.keys())
    nsa_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),min(4000,A[conv_layers[0]].shape[0]), replace=False)
    rtd_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),400, replace=False)
    cka_heatmap = np.empty((l1,l2))
    rtd_heatmap = np.empty((l1,l2))
    nsa_heatmap = np.empty((l1,l2))
    for i,layer1 in enumerate(conv_layers):
      for j,layer2 in enumerate(conv_layers):
        print("Grid ID: ",i,j)
        X = A[layer1]
        Y = B[layer2]
        print("Calculating CKA")
        cka_heatmap[i,j]=cka(gram_linear(X),gram_linear(Y))
        print("Calculating NSA")
        nsa_heatmap[i,j]=nsa_criterion(torch.tensor(X[nsa_sample_indices],dtype=float), torch.tensor(Y[nsa_sample_indices],dtype=float))
        print(nsa_heatmap[i,j])
        print("Calculating RTD")
        rtd_heatmap[i,j]=rtd(torch.tensor(X[rtd_sample_indices]),torch.tensor(Y[rtd_sample_indices]), n_runs=5)
        print(rtd_heatmap[i,j])
    heatmap_holder[model_name]['CKA']=cka_heatmap
    heatmap_holder[model_name]['LID_NSA']=nsa_heatmap
    heatmap_holder[model_name]['RTD']=rtd_heatmap

In [None]:
import pickle
save_path = f'sanity_test_{dataset_name}_{task}_heatmap_holder.pkl'
with open(save_path, 'wb') as file:
    pickle.dump(heatmap_holder, file)

In [None]:
import pickle

load_path = f'sanity_test_{dataset_name}_{task}_heatmap_holder.pkl'
with open(load_path, 'rb') as file:
    heatmap_holder = pickle.load(file)

#heatmap_holder

In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt
# Define the output directory where you want to save the heatmap images
output_directory = f'heatmaps/{dataset_name}/sanity_tests/{task}/'
!mkdir -p $output_directory

# Create the output directory if it doesn't exist
# os.makedirs(output_directory, exist_ok=True)

# Iterate through the nested dictionary
for model_name, metrics in heatmap_holder.items():
    for metric_name, heatmap_data in metrics.items():
        # Create a heatmap plot using seaborn
        sns.set()
        plt.figure(figsize=(10, 8))  # Adjust the figure size as needed
        if metric_name=='CKA':
            heatmap_data = 1 - heatmap_data
            metric_name = "CKA'"
        annot_kwargs = {"fontsize": 30}  # Adjust the fontsize as needed        
        ax = sns.heatmap(heatmap_data, annot=False, annot_kws=annot_kwargs, cbar=False)  # Modify cmap and formatting as needed

        plt.title(f'{model_name} - {metric_name} - {task} Sanity Test Heatmap', fontsize=23)
        plt.xlabel(f'{model_name} layers', fontsize=30)
        plt.ylabel(f'{model_name} layers', fontsize=30)
        plt.xticks(fontsize=45)
        plt.yticks(fontsize=45)
        
        # Save the heatmap plot as an image
        #plt.show()
        output_filename = f'{model_name}_{metric_name}_heatmap.png'
        output_path = os.path.join(output_directory, output_filename)
        plt.savefig(output_path, bbox_inches='tight')
        plt.close()  # Close the plot to release resources

        print(f'Saved: {output_path}')

print('All heatmaps saved successfully.')

## Cross Architecture Tests

In [None]:
# Iterate over models
heatmap_holder = {}
n = len(models)  # Change this to your desired matrix size
# Initialize an empty list to store the indices of the upper triangle

# Loop through the rows and columns of the matrix
for y in range(n):
    for z in range(y+1, n):  # Start from i to avoid duplicates in the lower triangle
        
        identifier1 = f'{models[y]}/{task}_{run_ids[0]}_'
        identifier2 = f'{models[z]}/{task}_{run_ids[0]}_'
    
        fv_path1 = path+identifier1+str(max_epochs[identifier1])
        fv_path2 = path+identifier2+str(max_epochs[identifier2])
        A_data = np.load(fv_path1+".npz")
        B_data = np.load(fv_path2+".npz")
        model_variant = models[y]+"_"+models[z]
        print(fv_path1)
        print(fv_path2)
        print("Currently running for: ",model_variant)
        heatmap_holder[model_variant]={}
        A = dict(A_data)
        B = dict(B_data)
        l1 = len(A.keys())
        l2 = len(B.keys())
        if l1!=l2:
            break
        conv_layers = list(A.keys())
        nsa_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),min(4000,A[conv_layers[0]].shape[0]), replace=False)
        rtd_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),400, replace=False)
        cka_heatmap = np.empty((l1,l2))
        rtd_heatmap = np.empty((l1,l2))
        nsa_heatmap = np.empty((l1,l2))
        for i,layer1 in enumerate(conv_layers):
          for j,layer2 in enumerate(conv_layers):
            print("Grid ID: ",i,j)
            X = A[layer1]
            Y = B[layer2]
            print("Calculating CKA")
            cka_heatmap[i,j]=cka(gram_linear(X),gram_linear(Y))
            print("Calculating NSA")
            nsa_heatmap[i,j]=nsa_criterion(torch.tensor(X[nsa_sample_indices],dtype=float), torch.tensor(Y[nsa_sample_indices],dtype=float))
            print("Calculating RTD")
            rtd_heatmap[i,j]=rtd(X[rtd_sample_indices],Y[rtd_sample_indices])
        heatmap_holder[model_variant]['CKA']=cka_heatmap
        heatmap_holder[model_variant]['NSA']=nsa_heatmap
        heatmap_holder[model_variant]['RTD']=rtd_heatmap

In [None]:
import pickle
save_path = f'cross_arch_test_{dataset_name}_{task}_heatmap_holder.pkl'
with open(save_path, 'wb') as file:
    pickle.dump(heatmap_holder, file)

In [None]:
load_path = f'cross_arch_test_{dataset_name}_{task}_heatmap_holder.pkl'
with open(load_path, 'rb') as file:
    heatmap_holder = pickle.load(file)

heatmap_holder

In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt

# Define the output directory where you want to save the heatmap images
output_directory = f'heatmaps/{dataset_name}/cross_arch_tests/{task}/'
!mkdir -p $output_directory

# Create the output directory if it doesn't exist
#os.makedirs(output_directory, exist_ok=True)

# Iterate through the nested dictionary
for model_name, metrics in heatmap_holder.items():
    for metric_name, heatmap_data in metrics.items():
        # Create a heatmap plot using seaborn
        sns.set()
        plt.figure(figsize=(10, 8))  # Adjust the figure size as needed
        if metric_name=='CKA':
            heatmap_data = 1 - heatmap_data
            metric_name = "CKA'"
        annot_kwargs = {"fontsize": 30}  # Adjust the fontsize as needed
        
        ax = sns.heatmap(heatmap_data, annot=True, annot_kws=annot_kwargs, cbar=False)  # Modify cmap and formatting as needed
        model1, model2 = model_name.split('_')
        plt.title(f'{model1} vs {model2} - {task} - {metric_name} Heatmap', fontsize=23)
        plt.xlabel(f'{model2} layers', fontsize=30) # X axis is always model 2
        plt.ylabel(f'{model1} layers', fontsize=30) # Y axis is model 1 since the second loop iterates over columns
        # Save the heatmap plot as an image
        plt.xticks(fontsize=25)
        plt.yticks(fontsize=25)
        #plt.show()
        output_filename = f'{model_name}_{metric_name}_heatmap.png'
        output_path = os.path.join(output_directory, output_filename)
        plt.savefig(output_path, bbox_inches='tight')
        plt.close()  # Close the plot to release resources

        print(f'Saved: {output_path}')

print('All heatmaps saved successfully.')

## Cross Downstream Task Tests

In [None]:
# Iterate over models
heatmap_holder = {}
tasks = ["NC","LP"]
n = len(models)  # Change this to your desired matrix size
# Initialize an empty list to store the indices of the upper triangle

# Loop through the rows and columns of the matrix
for model in models:        
        identifier1 = f'{model}/{tasks[0]}_{run_ids[0]}_'
        identifier2 = f'{model}/{tasks[1]}_{run_ids[0]}_'
    
        fv_path1 = path+identifier1+str(max_epochs[identifier1])
        fv_path2 = path+identifier2+str(max_epochs[identifier2])
        A_data = np.load(fv_path1+".npz")
        B_data = np.load(fv_path2+".npz")
        print(fv_path1)
        print(fv_path2)
        print("Currently running for: ",model)
        heatmap_holder[model]={}
        A = dict(A_data)
        B = dict(B_data)
        l1 = len(A.keys())
        l2 = len(B.keys())
        if l1!=l2:
            break
        conv_layers = list(A.keys())
        nsa_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),min(4000,A[conv_layers[0]].shape[0]), replace=False)
        rtd_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),250, replace=False)
        cka_heatmap = np.empty((l1,l2))
        rtd_heatmap = np.empty((l1,l2))
        nsa_heatmap = np.empty((l1,l2))
        for i,layer1 in enumerate(conv_layers):
          for j,layer2 in enumerate(conv_layers):
            print("Grid ID: ",i,j)
            X = A[layer1]
            Y = B[layer2]
            print("Calculating CKA")
            cka_heatmap[i,j]=cka(gram_linear(X),gram_linear(Y))
            print("Calculating NSA")
            nsa_heatmap[i,j]=nsa_criterion(torch.tensor(X[nsa_sample_indices],dtype=float), torch.tensor(Y[nsa_sample_indices],dtype=float))
            print("Calculating RTD")
            rtd_heatmap[i,j]=rtd(X[rtd_sample_indices],Y[rtd_sample_indices])
        heatmap_holder[model]['CKA']=cka_heatmap
        heatmap_holder[model]['NSA']=nsa_heatmap
        heatmap_holder[model]['RTD']=rtd_heatmap

In [None]:
import pickle
save_path = f'cross_task_test_{dataset_name}_heatmap_holder.pkl'
with open(save_path, 'wb') as file:
    pickle.dump(heatmap_holder, file)

In [None]:
load_path = f'cross_task_test_{dataset_name}_heatmap_holder.pkl'
with open(load_path, 'rb') as file:
    heatmap_holder = pickle.load(file)

#heatmap_holder

In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt

task_list = ["NC","LP"]
# Define the output directory where you want to save the heatmap images
output_directory = f'heatmaps/{dataset_name}/cross_task_tests/'
!mkdir -p $output_directory

# Create the output directory if it doesn't exist
#os.makedirs(output_directory, exist_ok=True)

# Iterate through the nested dictionary
for model_name, metrics in heatmap_holder.items():
    for metric_name, heatmap_data in metrics.items():
        # Create a heatmap plot using seaborn
        sns.set()
        plt.figure(figsize=(10, 8))  # Adjust the figure size as needed
        if metric_name=='CKA':
            heatmap_data = 1 - heatmap_data
            metric_name = "CKA'"
        annot_kwargs = {"fontsize": 30}  # Adjust the fontsize as needed
        
        ax = sns.heatmap(heatmap_data, annot=True, annot_kws=annot_kwargs, cbar=False)  # Modify cmap and formatting as needed
        plt.title(f'{model_name} - {metric_name} Cross Task Test Heatmap', fontsize=23)
        plt.xlabel(f'{task_list[1]} Layers', fontsize=30) # X axis is always model 2
        plt.ylabel(f'{task_list[0]} Layers', fontsize=30) # Y axis is model 1 since the second loop iterates over columns
        # Save the heatmap plot as an image
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)
        #plt.show()
        output_filename = f'{model_name}_{metric_name}_heatmap.png'
        output_path = os.path.join(output_directory, output_filename)
        plt.savefig(output_path, bbox_inches='tight')
        plt.close()  # Close the plot to release resources

        print(f'Saved: {output_path}')

print('All heatmaps saved successfully.')

## Convergence Tests

In [None]:
# Iterate over models
heatmap_holder = {}
n = len(models)  # Change this to your desired matrix size
# Initialize an empty list to store the indices of the upper triangle

# Loop through the rows and columns of the matrix
for model in models:        
    identifier1 = f'{model}/{task}_{run_ids[0]}_'
    identifier2 = f'{model}/{task}_{run_ids[0]}_'
    heatmap=np.empty((4,max_epochs[identifier1]//5))
    print(heatmap.shape)
    heatmap_holder[model]={}
    fv_path1 = path+identifier1+str(max_epochs[identifier1])
    A_data = np.load(fv_path1+".npz")
    print(fv_path1)
    A = dict(A_data)
    conv_layers = list(A.keys())
    print("Currently running for: ",model)
    nsa_sample_indices = np.random.choice(range(A[conv_layers[0]].shape[0]),4000, replace=False)
    for i,epoch in enumerate(range(5,max_epochs[identifier1]+1,5)):
        fv_path2 = path+identifier2+str(epoch)
        print(fv_path2, epoch)
        
        B_data = np.load(fv_path2+".npz")
        B = dict(B_data)
        l1 = len(A.keys())
        l2 = len(B.keys())
        if l1!=l2:
            break
        conv_layers = list(A.keys())
        for j,layer in enumerate(conv_layers):
            print("Grid ID: ",j,i)
            X = A[layer]
            Y = B[layer]
            print("Calculating NSA")
            heatmap[j,i]=nsa_criterion(torch.tensor(X[nsa_sample_indices],dtype=float), torch.tensor(Y[nsa_sample_indices],dtype=float))
        heatmap_holder[model]['NSA']=heatmap

In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt

# Define the output directory where you want to save the heatmap images
output_directory = f'heatmaps/{dataset_name}/convergence_tests/{task}/'
!mkdir -p $output_directory

# Create the output directory if it doesn't exist
#os.makedirs(output_directory, exist_ok=True)


# Iterate through the nested dictionary
for model_name, metrics in heatmap_holder.items():
    for metric_name, heatmap_data in metrics.items():
        # Create a heatmap plot using seaborn
        sns.set()
        plt.figure(figsize=(20, 8))  # Adjust the figure size as needed
        if metric_name=='CKA':
            heatmap_data = 1 - heatmap_data
            metric_name = "CKA'"
        #formatted_heatmap_data = np.vectorize(lambda x: f'{x:.1e}')(heatmap_data)
        annot_kwargs = {"fontsize": 14}  # Adjust the fontsize as needed
        if task=='NC':
            ax = sns.heatmap(heatmap_data, annot=False, annot_kws=annot_kwargs, cbar=False)  # Modify cmap and formatting as needed
        else:
            ax = sns.heatmap(heatmap_data, annot_kws=annot_kwargs, cbar=False)  # Modify cmap and formatting as needed
        #plt.title(f'{model_name} - {metric_name} - {task} Convergence Test Heatmap', fontsize=23)
        plt.xlabel(f'Epochs', fontsize=30) # X axis is always model 2
        plt.ylabel(f'Layers of {model_name}', fontsize=30) # Y axis is model 1 since the second loop iterates over columns
        xticks = plt.xticks()[0]
        identifier = f'{model_name}/{task}_{1}_'
        
        plt.xticks(xticks[::2], np.arange(5,max_epochs[identifier]+1,10))
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)
        # Save the heatmap plot as an image
        #plt.show()
        output_filename = f'{model_name}_{metric_name}_heatmap.png'
        output_path = os.path.join(output_directory, output_filename)
        plt.savefig(output_path, bbox_inches='tight')
        plt.close()  # Close the plot to release resources

        print(f'Saved: {output_path}')

print('All heatmaps saved successfully.')