In [32]:
import sys
sys.path.append('/home/ivan/distribution_connector')
import os

In [33]:
from sklearn import datasets
import numpy as np
import torch
from tqdm import tqdm
from connector_utils import test_models, gather_statistics, test_func
import matplotlib.pyplot as plt

In [34]:
from connector import Connector
# from one_layer_utils import samples, make_dataset, get_model, get_b
from utils import test_model

In [35]:
import models
architecture = getattr(models, "Linear3NoBias") #LinearOneLayer LogRegression

import data
loaders, num_classes = data.loaders(
    "CIFAR10",
    "data",
    1024,
    1,
    "VGG",
    True)

model1 = architecture.base(num_classes=10, **architecture.kwargs)
model2 = architecture.base(num_classes=10, **architecture.kwargs)

model1.load_state_dict(torch.load('curves/Linear3NoBias/curve1/checkpoint-100.pt')['model_state'])
model2.load_state_dict(torch.load('curves/Linear3NoBias/curve2/checkpoint-100.pt')['model_state'])


Files already downloaded and verified
You are going to run models on the test set. Are you sure?
Files already downloaded and verified


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [36]:
# model1.cuda();
# model1.eval();
# test_model(model1, loaders, cuda=True)

In [37]:
# model2.cuda();
# model2.eval();
# test_model(model2, loaders, cuda=True)

In [38]:
def samples(model):
    p = [list(model.parameters())[i].data.cpu().numpy() for i in range(len(list(model.parameters())))]
    return p

def samples_per_layer(model, bias=True):
    p = samples(model)
    if bias:
        p = [np.hstack([p[i], p[i+1][:, None]]) for i in range(0, len(p), 2)]        
    return p

def samples_butterfly(model, bias=True):
    if bias:
        return None
    else:   
        p = samples_per_layer(model, bias=bias)
        parameters = [np.hstack([p[i], p[i+1].T]) for i in range(0, len(p), 2)]
    return parameters

def get_model(W, architecture, bias=False, per_layer=True):
    model_sampled = architecture.base(num_classes=10, **architecture.kwargs)
    model_samples = np.array(W)  

    if per_layer:
        for parameter, w in zip(model_sampled.parameters(), W):
            parameter.data.copy_(torch.from_numpy(w))
    else:
        for i, parameter in enumerate(model_sampled.parameters()):
            w = W[i//2]
            if i % 2 == 0:
                offset = 0
            N = parameter.data.shape[1]
            w_part = w[:, offset:offset+N]
            offset = N
            if i % 2 == 0:
                parameter.data.copy_(torch.from_numpy(w_part))
            else:
                parameter.data.copy_(torch.from_numpy(w_part.T))
            

    return model_sampled

In [39]:
def transform(x1, x2, E12, E22_inv, second=False):
    y1 = x1 - E12 @ E22_inv @ x2
    return y1.T
   
def inv_transform(y1, y2, E12, E22_inv, second=False):
    x1 = y1 + E12 @ E22_inv @ y2
    return x1.T

In [40]:
parameters1 = samples_per_layer(model1, bias=False)
parameters2 = samples_per_layer(model2, bias=False)

In [51]:
def connect_simple(W1, W2, lines=True, func='arc_connect'):
    
    if lines:    
        print('W1', W1.shape)
        cntr = Connector(W1, W2)
        f = getattr(cntr, func)
        p_res = f()[1]
    else:
        print('W1', W1.T.shape)
        cntr = Connector(W1.T, W2.T)
        f = getattr(cntr, func)
        p_res = f()[1].T
        
    return p_res

def connect_cov(W1, W2, P1, P2, P_f, lines=True, inverse=False,  func='arc_connect'):
        
        if inverse:
            P1, W1 = P1.T, W1.T
            P2, W2 = P2.T, W2.T
 
        p1 = np.hstack([P1, W1.T])
        p2 = np.hstack([P2, W2.T])
        len_x2 = len(W1)
        p = np.concatenate([p1,p2])
        
        print('p', p.shape)
        print('len', len_x2)
        
        mu1 = p.mean(0)[:-len_x2]
        mu2 = p.mean(0)[len_x2:]

        print('p', p.shape)

        p = p - p.mean(0)    
        cov = p.T @ p

        E12 = cov[:-len_x2, -len_x2:]
        E22 = cov[-len_x2:, -len_x2:]
        E22_inv = np.linalg.inv(E22)

        
        print(1, P1.T.shape, W1.shape)
        W1 = transform(P1.T, W1, E12,  E22_inv)
        W2 = transform(P2.T, W2, E12,  E22_inv)
        print('W1', W1.shape)
    
        p_res = connect_simple(W1, W2, lines=lines, func=func)
        
        print(2, p_res.T.shape, P_f.shape)
        p_res_tr = inv_transform(p_res.T, P_f, E12,  E22_inv)
        
        return p_res_tr

In [42]:
# parameters_res_tr = []
# parameters_res = []

# print(3)
# p_res = connect_simple(parameters1[0], parameters2[0], lines=False)
# parameters_res_tr.append(p_res)


# print(2)
# p_res = connect_simple(parameters1[-2], parameters2[-2], lines=False)
# parameters_res_tr.append(p_res)


# print(1)
# print('parameters', parameters2[-1].shape)
# # p_res = connect_simple(parameters1[-1], parameters2[-1], lines=True)
# p_res = connect_cov(parameters1[1], parameters2[1], parameters1[-1],  parameters2[-1], parameters_res_tr[-1].T, 
#                     lines=False, inverse=True).T


# parameters_res_tr.append(p_res)
    
# model = get_model(parameters_res_tr, architecture, per_layer=True)
# model.cuda();
# model.eval();
# test_model(model, loaders, cuda=True)

    

In [43]:
parameters1[0].shape

(6144, 3072)

In [44]:
p_mean = []
for i, (p1, p2) in enumerate(zip(parameters1, parameters2)):
    p = np.concatenate([p1.T,p2.T])
#     print(p.shape)
    mean = p.mean(0)
    p_mean.append(mean)
#     print(p1.T.shape, mean.shape)
    parameters1[i] = (p1.T - mean).T
    parameters2[i] = (p2.T - mean).T

In [23]:
p_mean = []
for i, (p1, p2) in enumerate(zip(parameters1, parameters2)):
    p = np.concatenate([p1,p2])
    print(p.shape)
    mean = p.mean(0)
    p_mean.append(mean)
    parameters1[i] = p1 - mean
    parameters2[i] = p2 - mean

(12288, 3072)
(4000, 6144)
(20, 2000)


In [24]:
# plt.hist(parameters1[0][:, 1], bins=100);

In [93]:
# parameters1[0] -= parameters1[0].mean(0)
# parameters1[0][:, 1].mean(), parameters1[0].T[:, 1].mean()

In [53]:
parameters1[0][:, 1].mean(), parameters1[0].T[:, 1].mean()

(0.006478533, -3.176718e-05)

In [48]:
np.vstack([ parameters2[-1],  parameters1[-1]]).shape

(20, 2000)

In [67]:
parameters_res_tr = []
parameters_res = []
func = 'arc_connect'

print(1)
print('parameters', parameters2[-1].shape)
p_res = connect_simple(parameters1[-1], parameters2[-1], lines=False, func=func)
parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

print(2)
print('parameters', parameters2[-2].shape)
p_res = connect_simple(parameters1[-2], parameters2[-2], lines=True, func=func)
# p_res = connect_cov(parameters1[-1], parameters2[-1], parameters1[-2],  parameters2[-2], parameters_res[0], 
#                     lines=False, func=func)
parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

print(3)
print('parameters', parameters2[0].shape)
p_res = connect_simple(parameters1[0], parameters2[0], lines=False, func=func)
# p_res = connect_cov(parameters1[1], parameters2[1], parameters1[0],  parameters2[0], parameters_res[0], 
#                     lines=False, func=func)


parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

for i, (p, m) in enumerate(zip(parameters_res_tr, p_mean)):
    parameters_res_tr[i] = (p.T + m).T

    
model = get_model(parameters_res_tr, architecture, per_layer=True)
model.cuda();
model.eval();
test_model(model, loaders, cuda=True)

    

1
parameters (10, 2000)
W1 (2000, 10)
2
parameters (2000, 6144)
W1 (2000, 6144)
3
parameters (6144, 3072)
W1 (3072, 6144)
train results {'nll': 1.271210627593994, 'loss': 1.271210627593994, 'accuracy': 57.922}
test results {'nll': 1.6456598905563355, 'loss': 1.6456598905563355, 'accuracy': 42.36}


({'nll': 1.271210627593994, 'loss': 1.271210627593994, 'accuracy': 57.922},
 {'nll': 1.6456598905563355, 'loss': 1.6456598905563355, 'accuracy': 42.36})

In [88]:
parameters_res_tr = []
parameters_res = []
func = 'arc_connect'


print(1)
p_res = connect_simple(parameters1[-1], parameters2[-1], lines=False, func=func)
parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

print(2)
p_res = connect_simple(parameters1[-2], parameters2[-2], lines=False, func=func)
# p_res = connect_cov(parameters1[-1], parameters2[-1], parameters1[-2],  parameters2[-2], parameters_res[0], 
#                     lines=False, func=func)
parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

print(3)
p_res = connect_simple(parameters1[0], parameters2[0], lines=False, func=func)
# p_res = connect_cov(parameters1[1], parameters2[1], parameters1[0],  parameters2[0], parameters_res[0], 
#                     lines=False, func=func)
print('parameters', parameters2[0].shape)

parameters_res_tr.insert(0, p_res)
parameters_res.insert(0, p_res)

# for i, (p, m) in enumerate(zip(parameters_res_tr, p_mean)):
#     parameters_res_tr[i] = p + m

    
model = get_model(parameters_res_tr, architecture, per_layer=True)
model.cuda();
model.eval();
test_model(model, loaders, cuda=True)

    

1
2
3
parameters (6144, 3072)
train results {'nll': 1.3164503594589234, 'loss': 1.3164503594589234, 'accuracy': 57.19}
test results {'nll': 1.6831079917907714, 'loss': 1.6831079917907714, 'accuracy': 41.08}


({'nll': 1.3164503594589234, 'loss': 1.3164503594589234, 'accuracy': 57.19},
 {'nll': 1.6831079917907714, 'loss': 1.6831079917907714, 'accuracy': 41.08})

In [15]:
# parameters1 = samples_butterfly(model1, bias=False)
# parameters2 = samples_butterfly(model2, bias=False)
# for func in ['lin_connect', 'arc_connect', 'arc_connect_PCA', 'inverse_connect_PCA', 
#              'third_cumulant_connect']:
#     parameters_res = []
#     print(func)
#     for p1, p2 in zip(parameters1, parameters2):
#         cntr = Connector(p1, p2)
#         f = getattr(cntr, func)
#         if 'PCA' in func:
#             res = f(K=300)[1]
#         elif 'third_cumulant' in func:
#             res = f(K=100)[1] 
#         else:
#             res = f()[1]
#         parameters_res.append(res)

#     model = get_model(parameters_res, architecture, per_layer=False)
#     model.cuda();
#     model.eval();
#     test_model(model, loaders, cuda=True)

In [17]:
parameters1 = samples_per_layer(model1, bias=False)
parameters2 = samples_per_layer(model2, bias=False)
for func in ['lin_connect', 'arc_connect', 'arc_connect_PCA']:
    parameters_res = []
    print(func)
    for p1, p2 in zip(parameters1, parameters2):
        cntr = Connector(p1, p2)
        f = getattr(cntr, func)
        if 'PCA' in func:
            K = min(300, p1.shape[0], p1.shape[1])
            res = f(K=K)[1]
        elif 'third_cumulant' in func:
            K = min(100, p1.shape[0], p1.shape[1])
            res = f(K=K)[1] 
        else:
            res = f()[1]
        parameters_res.append(res)

    model = get_model(parameters_res, architecture, per_layer=True)
    model.cuda();
    model.eval();
    test_model(model, loaders, cuda=True)

lin_connect
train results {'nll': 1.9955093453598023, 'loss': 1.9955093453598023, 'accuracy': 51.474}
test results {'nll': 2.162593546295166, 'loss': 2.162593546295166, 'accuracy': 29.76}
arc_connect
train results {'nll': 1.3757157013320922, 'loss': 1.3757157013320922, 'accuracy': 50.706}
test results {'nll': 1.6448731899261475, 'loss': 1.6448731899261475, 'accuracy': 42.03}
arc_connect_PCA
train results {'nll': 1.5449913298034668, 'loss': 1.5449913298034668, 'accuracy': 42.81}
test results {'nll': 1.722438893699646, 'loss': 1.722438893699646, 'accuracy': 36.14}


In [18]:
# columns
parameters1 = samples_per_layer(model1, bias=False)
parameters2 = samples_per_layer(model2, bias=False)
for func in ['arc_connect', 'arc_connect_PCA']:
    parameters_res = []
    print(func)
    for p1, p2 in zip(parameters1, parameters2):
        cntr = Connector(p1.T, p2.T)
        f = getattr(cntr, func)
        if 'PCA' in func:
            K = min(300, p1.shape[0], p1.shape[1])
            res = f(K=K)[1]
        elif 'third_cumulant' in func:
            K = min(100, p1.shape[0], p1.shape[1])
            res = f(K=K)[1] 
        else:
            res = f()[1]
        parameters_res.append(res.T)

    model = get_model(parameters_res, architecture, per_layer=True)
    model.cuda();
    model.eval();
    test_model(model, loaders, cuda=True)

arc_connect
train results {'nll': 1.3138795108032226, 'loss': 1.3138795108032226, 'accuracy': 57.49}
test results {'nll': 1.6831079917907714, 'loss': 1.6831079917907714, 'accuracy': 41.08}
arc_connect_PCA
train results {'nll': 1.3796758843994141, 'loss': 1.3796758843994141, 'accuracy': 55.522}
test results {'nll': 1.7231661840438843, 'loss': 1.7231661840438843, 'accuracy': 39.77}


In [19]:
# one last column and lines
parameters1 = samples_per_layer(model1, bias=False)
parameters2 = samples_per_layer(model2, bias=False)
for func in ['arc_connect', 'arc_connect_PCA']:
    parameters_res = []
    print(func)
    for ind, (p1, p2) in enumerate(zip(parameters1, parameters2)):
        if ind==len(parameters1)-1:
            print('last')
            p1, p2 = p1.T, p2.T
        cntr = Connector(p1, p2)
        f = getattr(cntr, func)
        if 'PCA' in func:
            K = min(300, p1.shape[0], p1.shape[1])
            res = f(K=K)[1]
        elif 'third_cumulant' in func:
            K = min(100, p1.shape[0], p1.shape[1])
            res = f(K=K)[1] 
        else:
            res = f()[1]
            
        if ind==len(parameters1)-1:
            res = res.T
        parameters_res.append(res)

    model = get_model(parameters_res, architecture, per_layer=True)
    model.cuda();
    model.eval();
    test_model(model, loaders, cuda=True)

arc_connect
last
train results {'nll': 1.402382646522522, 'loss': 1.402382646522522, 'accuracy': 49.57}
test results {'nll': 1.6654330642700195, 'loss': 1.6654330642700195, 'accuracy': 41.36}
arc_connect_PCA
last
train results {'nll': 1.4344119925308227, 'loss': 1.4344119925308227, 'accuracy': 48.944}
test results {'nll': 1.6590761445999145, 'loss': 1.6590761445999145, 'accuracy': 41.85}


In [20]:
# all columns except first lines
parameters1 = samples_per_layer(model1, bias=False)
parameters2 = samples_per_layer(model2, bias=False)
for func in ['arc_connect', 'arc_connect_PCA']:
    parameters_res = []
    print(func)
    for ind, (p1, p2) in enumerate(zip(parameters1, parameters2)):
        if ind!=0:
            print('last')
            p1, p2 = p1.T, p2.T
        cntr = Connector(p1, p2)
        f = getattr(cntr, func)
        if 'PCA' in func:
            K = min(300, p1.shape[0], p1.shape[1])
            res = f(K=K)[1]
        elif 'third_cumulant' in func:
            K = min(100, p1.shape[0], p1.shape[1])
            res = f(K=K)[1] 
        else:
            res = f()[1]
            
        if ind!=0:
            res = res.T
        parameters_res.append(res)

    model = get_model(parameters_res, architecture, per_layer=True)
    model.cuda();
    model.eval();
    test_model(model, loaders, cuda=True)

arc_connect
last
last
train results {'nll': 1.3678412181472779, 'loss': 1.3678412181472779, 'accuracy': 53.644}
test results {'nll': 1.6921545532226563, 'loss': 1.6921545532226563, 'accuracy': 40.73}
arc_connect_PCA
last
last
train results {'nll': 1.4357195449066162, 'loss': 1.4357195449066162, 'accuracy': 51.714}
test results {'nll': 1.7344378902435302, 'loss': 1.7344378902435302, 'accuracy': 39.71}
