In [1]:
import torch
from datasets import SyntheticData

import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets, transforms
import torchvision

import numpy as np

data=SyntheticData()


def k(x, xprime):
    with torch.no_grad():
        v = torch.linalg.norm(x) * torch.linalg.norm(xprime)
        u = .99999 * torch.dot(x, xprime) / v
        return v * (u * (torch.pi - torch.arccos(u) + torch.sqrt(1 - u ** 2) )/ (2 * np.pi)
                    +  u * (torch.pi - torch.arccos(u)) /  (2 * np.pi))

def ntk_kernel(x,z):
    n,_=x.shape
    m,_=z.shape
    H = torch.empty((n, m))
    for i in range(n):
        for j in range(m):
            H[i,j] = k(x[i], z[j])

    return H



In [2]:
def kappa(u,v):
    u=.99999*u
    return v * (u * (torch.pi - torch.arccos(u) + torch.sqrt(1 - u ** 2) )/ (2 * np.pi)
                    +  u * (torch.pi - torch.arccos(u)) /  (2 * np.pi))

def kappa2(u):
    u=.99999*u
    return 2*u/torch.pi * (torch.pi - torch.arccos(u))  + torch.sqrt(1 - u ** 2) /torch.pi

def easier_ntk(x,z):
    inner_prod=x@z.T
    norm_x=x.norm(dim=-1)
    norm_z=z.norm(dim=-1)
    norm_mat=norm_x.unsqueeze(1)@norm_z.unsqueeze(1).T

    return kappa(inner_prod/norm_mat,norm_mat)

def easier_ntk2(x,z):
    inner_prod=x@z.T
    norm_x=x.norm(dim=-1)
    norm_z=z.norm(dim=-1)
    norm_mat=norm_x.unsqueeze(1)@norm_z.unsqueeze(1).T

    return norm_mat*kappa2(inner_prod/norm_mat)

In [3]:
from sklearn.metrics import accuracy_score
training_sizes = [200, 1001, 2000, 5000, 10000]
for ntrain in training_sizes:

    X_train,y_train=data.generate_synthetic_data_non_separable(ntrain,0.1)
    X_test,y_test=data.generate_synthetic_data_non_separable(100,0.1)
    #Kernel_train=ntk_kernel(X_train,X_train)
    Kernel_train=easier_ntk2(X_train,X_train)
    # Solve for alpha = K^-1 y
    alpha_interp = torch.linalg.solve(Kernel_train, y_train)
    #alpha_interp=torch.linalg.inv(Kernel_train)@y_train
    # Compute RKHS norm for interpolated solution
    rkhs_norm_interp = torch.sqrt((alpha_interp @ ( Kernel_train@ alpha_interp)))
    rkhs_norm_interp = rkhs_norm_interp.item()


    K_test_interp = easier_ntk2(X_train, X_test)
    y_pred_interp = torch.sign(K_test_interp.T @ alpha_interp).squeeze()
    error_interp = 1 - accuracy_score(y_test.cpu().numpy(), y_pred_interp.cpu().numpy())

    print("Training size : ", ntrain, " Norm : ",rkhs_norm_interp, " Error test : ",error_interp)

Training size :  200  Norm :  3.206566333770752  Error test :  0.24
Training size :  1001  Norm :  9.406493186950684  Error test :  0.32999999999999996
Training size :  2000  Norm :  14.801255226135254  Error test :  0.29000000000000004
Training size :  5000  Norm :  26.843603134155273  Error test :  0.29000000000000004
Training size :  10000  Norm :  41.13756561279297  Error test :  0.19999999999999996


In [6]:
import eigenpro2
X_train,y_train=data.generate_synthetic_data_non_separable(200,0)
X_test,y_test=data.generate_synthetic_data_non_separable(100,0)

n_subsamples = min(len(X_train), 5000)
top_q = min(160, n_subsamples - 1)


kernel_fn = lambda x, y: easier_ntk2(x, y)

model = eigenpro2.KernelModel(kernel_fn, X_train, 1, device=torch.device("cpu"))

results = model.fit(X_train, y_train.unsqueeze(1), X_test, y_test.unsqueeze(1), epochs=20, print_every=2, mem_gb=8,top_q=top_q)

coeff_kernel=model.weight.squeeze() 
kernel_train=model.kernel_matrix(X_train)

rkhs_norm_overfit = torch.sqrt(coeff_kernel@(kernel_train@coeff_kernel))

# Predict and calculate classification error for overfitted
y_pred_overfit = model.forward(X_test).sign().squeeze()
error_overfit = 1 - accuracy_score(y_test.cpu().numpy(), y_pred_overfit.cpu().numpy())
print(rkhs_norm_overfit,error_overfit)

IndexError: index 159 is out of bounds for dimension 0 with size 66