In [1]:
import numpy as np
import scipy.linalg
import pickle as pkl
import matplotlib.pyplot as plt

from utils import *
import os

In [2]:
def analyse_spectrum(dataset,distance=False,iterated=False, sample_sizes = [128], x_scale_log=True):
    kernel = load_kernel_matrix(dataset,distance,iterated)
        
    plt.figure(figsize=(12,8))
    plt.gca().set_yscale('log')
    if x_scale_log:
        plt.gca().set_xscale('log')
    plt.ylim(10**-8,10**1)
    plt.rcParams.update({'font.size': 14})
    
    plt.title('Eigenvalue distribution by number of landmark points, ')
    
    if distance:
        plt.title(f'Eigenvalue distribution by number of landmark points, for $\\delta$ on {DATASET_MAP[dataset]}')
    else:
        plt.title(f'Eigenvalue distribution by number of landmark points, for $K{"^{(2)}" if iterated else ""}$ on {DATASET_MAP[dataset]}')
        
    
    plt.gca().set_ylabel('absolute value of eigenvalues (log-scale)')
    plt.gca().set_xlabel(f'eigenvalue index{" (log-scale)" if x_scale_log else ""}')
    
    cmap = plt.cm.magma  # define the colormap
    cmaplist = [cmap(i) for i in range(cmap.N)] # extract all colors from the colormap
    
    for i,sample_size in enumerate(sample_sizes):
        print(f'{sample_size=}')
        subkernel = kernel[:sample_size,:sample_size]
        eigenvals, eigenvecs = scipy.linalg.eigh(1/sample_size * subkernel)
        order = np.flip(np.argsort(np.abs(eigenvals)))
        eigenvals = eigenvals[order]
        eigenvecs = eigenvecs[:,order]
        plot_x = np.arange(kernel.shape[0])+1
        plot_y = np.zeros(kernel.shape[0])
        for ix,eigenval in enumerate(eigenvals):
            plot_y[ix] = np.abs(eigenval)
            
        plt.scatter(plot_x,plot_y,label=f'{sample_size}',s=10,color=cmaplist[int(.8*(i+1)/len(sample_sizes)*(len(cmaplist)-1))])
    
    plt.xticks([1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192],[1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192])
    plt.minorticks_off()
    plt.legend(title='# landmarks')
    plt.tight_layout()
    plt.savefig(f'../plots/eigenvalue_distribution_{dataset_string(dataset,distance,iterated)}.png',format='png',dpi=300)
    # plt.show()
    plt.close()
    

In [3]:
for dataset in ['8_10000_25','16_10000_15']:
    for distance, iterated in [(False,False), (True,False)]:
        print(dataset)
        analyse_spectrum(dataset,distance,iterated,[128,181,256,362,512,724,1024,1448,2048,2896,4096,5793,8192], x_scale_log=True) 
        print()

8_10000_25
sample_size=128
sample_size=181
sample_size=256
sample_size=362
sample_size=512
sample_size=724
sample_size=1024
sample_size=1448
sample_size=2048
sample_size=2896
sample_size=4096
sample_size=5793
sample_size=8192

8_10000_25
sample_size=128
sample_size=181
sample_size=256
sample_size=362
sample_size=512
sample_size=724
sample_size=1024
sample_size=1448
sample_size=2048
sample_size=2896
sample_size=4096
sample_size=5793
sample_size=8192

16_10000_15
sample_size=128
sample_size=181
sample_size=256
sample_size=362
sample_size=512
sample_size=724
sample_size=1024
sample_size=1448
sample_size=2048
sample_size=2896
sample_size=4096
sample_size=5793
sample_size=8192
