In [1]:
import numpy as np
import torch
import pickle as pkl
from tqdm import tqdm
from convolutional_model import *
from utils import *
from matplotlib import pyplot as plt

In [2]:
device = 'cuda' if torch.cuda.is_available() else ('cpu')
device

'cuda'

In [3]:
for dataset in ['8_10000_25','16_10000_15']:
    for distance, iterated in [(False,False), (True,False)]:
        for supervised in [True, False]:
            results_path = f'results/convolutional_training_results_{dataset_string(dataset,distance,iterated)}{"" if supervised else "_unsupervised"}.pkl'
            print(results_path)
            if not os.path.isfile(results_path):
                print('loading data')
                with open(f'../data/graphs_{dataset}.pkl','rb') as f:
                    graphs = pkl.load(f)
                with open(f'../data/mercer_{dataset_string(dataset,distance,iterated)}.pkl','rb') as f:
                    mercer_pos,mercer_neg = pkl.load(f)
                with open(f'../data/{"distance" if distance else "kernel"}_matrix_{dataset}.pkl','rb') as f:
                    kernel_matrix = pkl.load(f)
                
                print('preparing data')
                targets = np.concatenate([mercer_pos,mercer_neg],axis=-1)
                targets = torch.tensor(targets.squeeze(1),dtype=torch.float32).to(device)
                print(targets.shape)
                data = torch.tensor(np.stack([laplacian_from_adjacency(np.asarray(graph.todense())) for graph in graphs],axis=0),dtype=torch.float32).to(device)
                kernel_matrix = torch.tensor(kernel_matrix,dtype=torch.float32).to(device)
                print('initializing model')
                model = ConvolutionalModel(graph_size=graphs[0].shape[0],embedding_size=targets.shape[1], device=device)
                print('model parameters:',sum(p.numel() for p in model.parameters()))
                data = [data, model.prepare_data(data)]
                if supervised:
                    results, test_data, test_kernel = fit_supervised(model,data,targets,kernel_matrix,mercer_pos.shape[-1],return_test=True)
                else:
                    results, test_data, test_kernel = fit_unsupervised(model,data,kernel_matrix,return_test=True)
                with open(results_path,'wb') as f:
                    pkl.dump((results,test_data,test_kernel),f)
                torch.save(model,f'models/model_{dataset_string(dataset,distance,iterated)}{"" if supervised else "_unsupervised"}.pt')
            else:
                with open(f'../data/mercer_{dataset_string(dataset,distance,iterated)}.pkl','rb') as f:
                    mercer_pos,mercer_neg = pkl.load(f)
                with open(results_path,'rb') as f:
                    results, test_data, test_kernel = pkl.load(f)
                model = torch.load(f'models/model_{dataset_string(dataset,distance,iterated)}{"" if supervised else "_unsupervised"}.pt')
                print('model parameters:',sum(p.numel() for p in model.parameters()))
            
            epoch, loss, test_loss, r_squared = zip(*results)
            
            plt.figure(figsize=(12,8))
            plt.rcParams.update({'font.size': 13})
            
            cmap = plt.cm.magma  # define the colormap
            cmaplist = [cmap(i) for i in range(cmap.N)] # extract all colors from the colormap
            
            rho = "$\\delta$"
            plt.title(f'Loss per epoch - convolutional model, {"" if supervised else "un"}supervised, for {rho if distance else "$K$"} on {DATASET_MAP[dataset]}')
            plt.gca().set_ylabel('loss')
            plt.gca().set_xlabel(f'epoch')
            
            plt.plot(epoch,loss,label='loss', lw=3, color=cmaplist[64])
            plt.plot(epoch,test_loss,label='test loss',lw=3,color=cmaplist[196])
            
            plt.tight_layout()
            plt.legend()
            plt.gca().set_ylim(bottom=.5*min(min(loss),min(test_loss)),top=1.25*loss[100])
            plt.savefig(f'../plots/conv_loss_{dataset_string(dataset,distance,iterated)}_{"" if supervised else "un"}supervised.png',format='png',dpi=300)
            # plt.show()
            plt.close()
            
            
            plt.figure(figsize=(12,8))
            plt.rcParams.update({'font.size': 13})
            
            cmap = plt.cm.magma  # define the colormap
            cmaplist = [cmap(i) for i in range(cmap.N)] # extract all colors from the colormap
            
            rho = "$\\delta$"
            plt.title(f'Test sample $R^2$ per epoch - convolutional model, {"" if supervised else "un"}supervised, for {rho if distance else "$K$"} on {DATASET_MAP[dataset]}')
            plt.gca().set_ylabel('$R^2$')
            plt.gca().set_xlabel(f'epoch')
            
            plt.plot(epoch,r_squared,label='$R^2$',lw=3, color=cmaplist[128])
            plt.tight_layout()
            plt.legend()
            plt.gca().set_ylim(bottom=max(0,r_squared[999]-.1),top=1.)
            plt.savefig(f'../plots/conv_r_squared_{dataset_string(dataset,distance,iterated)}_{"" if supervised else "un"}supervised.png',format='png',dpi=300)
            # plt.show()
            plt.close()
            
            predictions = model(*test_data).to('cpu')
            n_positive = mercer_pos.shape[-1] if supervised else predictions.shape[-1]//2
            print(f'{n_positive=}')
            predictions_pos, predictions_neg = predictions[:,:n_positive], predictions[:,n_positive:]
            predicted_kernel = predictions_pos @ predictions_pos.T - predictions_neg @ predictions_neg.T
            
            indices = torch.triu_indices(*predicted_kernel.shape, offset=1)
            subsample = np.random.choice(np.arange(len(indices[0])), 10_000,replace=False)
            indices = indices[:,subsample]
            predicted_kernel_vals = predicted_kernel[indices[0],indices[1]]
            kernel_vals = test_kernel.to('cpu')[indices[0],indices[1]]
            
            plt.figure(figsize=(12,8))
            plt.rcParams.update({'font.size': 13})
            
            cmap = plt.cm.magma  # define the colormap
            cmaplist = [cmap(i) for i in range(cmap.N)] # extract all colors from the colormap
            
            plt.title(f'Predictions vs. true kernel values for the test sample - convolutional model, {"" if supervised else "un"}supervised, for {rho if distance else "$K$"} on {DATASET_MAP[dataset]}')
            plt.gca().set_ylabel('predicted')
            plt.gca().set_xlabel(f'true kernel value')
            
            plt.scatter(kernel_vals.detach().numpy(),predicted_kernel_vals.detach().numpy(),s=2,alpha=.2,color=cmaplist[192])
            plt.plot([0,kernel_vals.detach().numpy().max()],[0,kernel_vals.detach().numpy().max()],color=cmaplist[0])
            plt.tight_layout()
            
            plt.savefig(f'../plots/conv_scatterplot_{dataset_string(dataset,distance,iterated)}_{"" if supervised else "un"}supervised.png',format='png',dpi=300)
            # plt.show()
            plt.close()
            
            print('min test loss:', min(test_loss))
            print('max R^2:', max(r_squared))
            
            
            

results/convolutional_training_results_8_10000_25.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 797312
kernel approximation: 0.9877597093582153


100%|██████████| 10000/10000 [1:01:22<00:00,  2.72it/s, loss=0.00568, test_loss=0.00746, test_R^2=0.896]


n_positive=160
min test loss: 0.0071996827609837055
max R^2: 0.9024989008903503
results/convolutional_training_results_8_10000_25_unsupervised.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 797312


100%|██████████| 10000/10000 [1:38:59<00:00,  1.68it/s, loss=0.115, test_loss=0.149, test_R^2=0.892]


n_positive=128
min test loss: 0.14497779309749603
max R^2: 0.8946293592453003
results/convolutional_training_results_8_10000_25_distance.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 797312
kernel approximation: 0.9563431143760681


100%|██████████| 10000/10000 [59:08<00:00,  2.82it/s, loss=0.00332, test_loss=0.00449, test_R^2=0.671] 


n_positive=63
min test loss: 0.004292813595384359
max R^2: 0.6930884122848511
results/convolutional_training_results_8_10000_25_distance_unsupervised.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 797312


100%|██████████| 10000/10000 [1:34:55<00:00,  1.76it/s, loss=0.0292, test_loss=0.0492, test_R^2=0.599]


n_positive=128
min test loss: 0.04160373657941818
max R^2: 0.6629785299301147
results/convolutional_training_results_16_10000_15.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 805888
kernel approximation: 0.8816515207290649


100%|██████████| 10000/10000 [54:17<00:00,  3.07it/s, loss=0.0148, test_loss=0.0197, test_R^2=0.785]


n_positive=213
min test loss: 0.017969965934753418
max R^2: 0.7903039455413818
results/convolutional_training_results_16_10000_15_unsupervised.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 805888


100%|██████████| 10000/10000 [1:29:40<00:00,  1.86it/s, loss=0.523, test_loss=0.579, test_R^2=0.799]


n_positive=128
min test loss: 0.5775573253631592
max R^2: 0.7991635799407959
results/convolutional_training_results_16_10000_15_distance.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 805888
kernel approximation: 0.7516463994979858


100%|██████████| 10000/10000 [51:45<00:00,  3.22it/s, loss=0.00522, test_loss=0.00709, test_R^2=0.541]


n_positive=24
min test loss: 0.006339981686323881
max R^2: 0.5636906623840332
results/convolutional_training_results_16_10000_15_distance_unsupervised.pkl
loading data
preparing data
torch.Size([10000, 256])
initializing model
model parameters: 805888


100%|██████████| 10000/10000 [1:29:32<00:00,  1.86it/s, loss=0.063, test_loss=0.0756, test_R^2=0.53] 


n_positive=128
min test loss: 0.0732349306344986
max R^2: 0.5466824769973755
