In [1]:
import argparse

import torch
import torch.nn as nn
import utils
import numpy as np
import numpy.linalg as linalg

def representations_to_adj(representations, sigma=1):
    rview = representations.view(representations.size(0),-1)
    rview =  torch.nn.functional.normalize(rview, p=2, dim=1)
    adj = torch.mm(rview,torch.t(rview))
#    adj = torch.exp(-distances)
    ind = np.diag_indices(adj.shape[0])
    adj[ind[0], ind[1]] = torch.zeros(adj.shape[0]).cuda()
    degree = torch.pow(adj.sum(dim=1),-0.5)
    degree_matrix = torch.diag(degree)
    return torch.matmul(degree_matrix,torch.matmul(adj,degree_matrix))


device = "cuda"
teacher_file = "checkpoint/WideResNet28-10.pth"
teacher_model = torch.load(teacher_file)["net"].module

#student_file = "checkpoint/HKD_28-10_teaches_28-1_16_4.pth"
student_file = "checkpoint/WideResNet28-1.pth"
#student_file = "checkpoint/GKD_28-10_teaches_28-1_0_0_p1_25.pth"
student_model = torch.load(student_file)["net"].module
student_model.eval()
teacher_model.eval()

def to_one_hot(inp,num_classes):
    y_onehot = torch.cuda.FloatTensor(inp.size(0), num_classes)
    y_onehot.zero_()

    y_onehot.scatter_(1, inp.unsqueeze(1), 1)
    
    return y_onehot

trainloader, testloader = utils.load_data(128)
identity = torch.eye(1000).cuda()
utils.test(teacher_model,testloader, "cuda", "no",show="error")
utils.test(student_model,testloader, "cuda", "no",show="error")


Files already downloaded and verified
Files already downloaded and verified
Test error: 89.93
Test error: 71.69


In [2]:
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs_teacher, layers_teacher = teacher_model(inputs)
        outputs_student, layers_student = student_model(inputs)
        for student_layer,teacher_layer in zip(layers_student,layers_teacher):
            adj_teacher = representations_to_adj(teacher_layer)
            laplacian_teacher = adj_teacher
            laplacian_teacher = laplacian_teacher.cpu().numpy()

            w, v = linalg.eig(laplacian_teacher)
            seen = {}
            unique_eigenvalues = []
            for (x, y) in zip(w, v):
                if x in seen:
                    continue
                seen[x] = 1
                unique_eigenvalues.append((x, y))
            fiedler_vector = sorted(unique_eigenvalues)[1][1].reshape(1000,1)

            adj_student = representations_to_adj(student_layer)
            laplacian_student = identity - adj_student
            laplacian_student = laplacian_student.cpu().numpy()
            smoothness = np.dot(fiedler_vector.T,laplacian_student)
            smoothness = np.dot(smoothness,fiedler_vector)
            print(smoothness.sum())
        break


0.9957203
0.99935067
1.0016841


In [3]:
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs_teacher, layers_teacher = teacher_model(inputs)
        outputs_student, layers_student = student_model(inputs)
        for student_layer,teacher_layer in zip(layers_student,layers_teacher):
            adj_teacher = representations_to_adj(teacher_layer)
            laplacian_teacher = adj_teacher
            laplacian_teacher = laplacian_teacher.cpu().numpy()

            w, v = linalg.eig(laplacian_teacher)
            seen = {}
            unique_eigenvalues = []
            for (x, y) in zip(w, v):
                if x in seen:
                    continue
                seen[x] = 1
                unique_eigenvalues.append((x, y))
            eigenvectors = []
            for x,y in sorted(unique_eigenvalues)[1:]:
                eigenvectors.append(y)
            eigenvectors = np.array(eigenvectors).T
            print(eigenvectors.shape)

            adj_student = representations_to_adj(student_layer)
            laplacian_student = identity - adj_student
            laplacian_student = laplacian_student.cpu().numpy()
            smoothness = np.dot(eigenvectors.T,laplacian_student)
            smoothness = np.dot(smoothness,eigenvectors)
            print(smoothness.sum())
            break
        break


(1000, 999)
998.8728


In [4]:
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs_teacher, layers_teacher = teacher_model(inputs)
        outputs_student, layers_student = student_model(inputs)
        class_signal = to_one_hot(targets,10).cpu().numpy()
        for student_layer,teacher_layer in zip(layers_student,layers_teacher):
            adj_student = representations_to_adj(student_layer)
            laplacian_student = identity - adj_student
            laplacian_student = laplacian_student.cpu().numpy()
            smoothness = np.dot(class_signal.T,laplacian_student)
            smoothness = np.dot(smoothness,class_signal)
            print(smoothness.sum())
        break


0.5094147
1.0608368
2.1382065


In [5]:
rview = student_layer.view(student_layer.size(0),-1)
distances = torch.cdist(rview,rview)/(2*1**2)
distances

tensor([[0.0000, 0.8480, 0.7609,  ..., 0.9352, 0.4785, 0.7277],
        [0.8480, 0.0000, 0.3398,  ..., 0.5396, 1.0513, 0.7288],
        [0.7609, 0.3398, 0.0000,  ..., 0.6055, 0.9256, 0.6054],
        ...,
        [0.9352, 0.5396, 0.6055,  ..., 0.0000, 0.9854, 1.0658],
        [0.4785, 1.0513, 0.9256,  ..., 0.9854, 0.0000, 1.0013],
        [0.7277, 0.7288, 0.6054,  ..., 1.0658, 1.0013, 0.0000]],
       device='cuda:0')