In [1]:
from src.config import RESULTS_FOLDER, get_config
from file_utils import load_most_recent_results, load_most_recent_model
from src.analysis import combine_all_trials, process_results, compute_top_codes
import numpy as np
from src.model import build_model
import os
import pickle as pk
import torch
import torch.nn.functional as F
from src.data import get_mnist_data


In [2]:
result_dict = load_most_recent_results(RESULTS_FOLDER)


In [3]:
num_trials = len(result_dict.keys())
combined_results = combine_all_trials(result_dict)
processed_result = process_results(combined_results)
top_codes = compute_top_codes(result_dict[0], NUM_TOP_CODES=5)
top_full_codes_info = top_codes['post_train_code_histogram']


In [4]:
top_full_codes_info

{'top_codes': [(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0),
  (0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0),
  (0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1),
  (1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1),
  (0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1)],
 'cmass_top_code': [0.024166666666666666,
  0.037983333333333334,
  0.04883333333333333,
  0.059283333333333334,
  0.06866666666666667],
 'mass_top_code': [0.024166666666666666,
  0.013816666666666666,
  0.01085,
  0.01045,
  0.009383333333333334],
 'ratio_1_0': 1.5075094684602324}

In [5]:
top_codes = top_full_codes_info['top_codes']
top_codes

[(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0),
 (0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0),
 (0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1),
 (1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1),
 (0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1)]

In [6]:
model = load_most_recent_model(RESULTS_FOLDER)
model.eval()
model.state_dict()

OrderedDict([('first_layer.weight',
              tensor([[-1.6704e-03, -3.1063e-02,  1.9776e-02,  ..., -1.4933e-02,
                        4.6959e-03, -3.4221e-02],
                      [-4.6716e-02, -2.8994e-02, -1.0641e-02,  ..., -6.4256e-06,
                       -3.5220e-02,  7.9628e-03],
                      [-9.1903e-03,  1.7133e-02, -1.3676e-02,  ...,  9.5499e-03,
                        5.9822e-03, -4.8659e-02],
                      ...,
                      [-2.1377e-02,  1.8013e-02, -2.2198e-02,  ..., -3.8224e-02,
                        5.3193e-03,  1.7176e-02],
                      [-1.1148e-02, -1.2302e-02,  2.8995e-02,  ..., -9.5972e-03,
                       -8.3732e-03, -3.3763e-02],
                      [ 1.0330e-03, -3.5355e-02, -1.4247e-02,  ..., -4.8571e-02,
                       -4.3420e-02, -5.4056e-02]])),
             ('first_layer.bias',
              tensor([ 0.0307,  0.0531,  0.0157,  0.0181, -0.0228,  0.0394,  0.0319,  0.0411])),
             ('li

In [7]:
# Splits a single code into a list of code for each layer.
def tuple_code_to_list(full_code):
    hidden_layer_size = get_config()['hidden_size']
    depth = get_config()['depth']
    list_codes_per_layer=  []
    for i in range(depth):
        start_idx = i * hidden_layer_size
        end_idx = start_idx + hidden_layer_size
        layer_code = full_code[start_idx : end_idx]
        list_codes_per_layer.append(list(layer_code))
    print(full_code, '->',list_codes_per_layer )
    return list_codes_per_layer



In [8]:
test = tuple_code_to_list(top_codes[0])


(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0) -> [[1, 0, 1, 1, 0, 1, 0, 1], [1, 0, 1, 1, 0, 1, 1, 0]]


In [9]:
class LinearSoftMax(torch.nn.Module):
    def __init__(self, input_size, weights, bias, output_size = 10):
        super(LinearSoftMax, self).__init__()
        self.input_size = input_size
        self.weights = torch.from_numpy(weights)
        self.bias = torch.from_numpy(bias)
        self.linear = torch.nn.Linear(self.input_size, output_size)
        # nn.Parameter(F.softmax(self.layer_weights,dim=0))
        with torch.no_grad():
            self.linear.weight = torch.nn.Parameter(self.weights)
            self.linear.bias = torch.nn.Parameter(self.bias)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        x = self.linear(x)
        log_out = F.log_softmax(x, dim=1)
        return log_out

# Accepts a relu activation code and generates a linear softmax model from it.
def NN_to_logreg(model, list_codes_per_layer):
    model.eval()
    weights = []
    biases = []
    for i, layer_code in enumerate(list_codes_per_layer):
        # build matrix to cancel off ReLU's
        layer = model.first_layer if i == 0 else model.list_layers[i - 1] # fix this so we can access the layers directly in model.list_layer
        cancel_matrix = np.eye(layer.weight.shape[0])
        for r in range(len(layer_code)):
            cancel_matrix[r, r] = layer_code[r]
        new_weight = np.matmul(cancel_matrix, layer.weight.detach().numpy())
        new_bias = np.matmul(cancel_matrix, layer.bias.detach().numpy())
        weights.append(new_weight)
        biases.append(new_bias)
    # Add last layer fed into softmax
    weights.append(model.last_layer.weight.detach().numpy())
    biases.append(model.last_layer.bias.detach().numpy())

    # Combine all weights and biases into a single
    combined_weight = weights[len(weights) - 1]
    combined_bias = biases[len(biases) - 1]
    for i in range(len(weights) - 2, -1, -1):
        combined_bias = combined_bias + np.matmul(combined_weight, biases[i]) # This line should go before the below one
        combined_weight = np.matmul(combined_weight, weights[i])
    return LinearSoftMax(model.input_size, combined_weight, combined_bias)


linearized = NN_to_logreg(model, test)

In [30]:
train_loader, test_loader = get_mnist_data(get_config())

correct_preds_count = 0
for X, y in test_loader:
    X_double = X.double()
    output = linearized(X_double)
    model_output, codes = model.forward_get_code(X)
    layer_1 = codes[0].detach().numpy().astype(int)
    layer_2 = codes[1].detach().numpy().astype(int)
    zipped = list(zip(layer_1, layer_2))
    combined_codes = [tuple(code[0]) + tuple(code[1]) for code in zipped]
    indices_for_matching_code = [i for i in range(len(combined_codes)) if combined_codes[i] == top_codes[0]]
    pred = [int(out.cpu().detach().numpy())
            for out in output.data.max(1, keepdim=True)[1]]
    model_pred = [int(out.cpu().detach().numpy())
                  for out in model_output.data.max(1, keepdim=True)[1]]
    correct_preds = (pred == y.detach().numpy())
    correct_preds_count += correct_preds.sum()


accuracy = 100. * correct_preds_count / len(test_loader.dataset)
print(accuracy)



28.48
