In [33]:
from src.config import RESULTS_FOLDER, get_config
from file_utils import load_most_recent_results, load_most_recent_model
from src.analysis import combine_all_trials, process_results, compute_top_codes
import numpy as np
from src.model import build_model
import os
import pickle as pk
import torch
import torch.nn.functional as F
from src.data import get_mnist_data


In [34]:
result_dict = load_most_recent_results(RESULTS_FOLDER)


In [35]:
num_trials = len(result_dict.keys())
combined_results = combine_all_trials(result_dict)
processed_result = process_results(combined_results)
top_codes = compute_top_codes(result_dict[0], NUM_TOP_CODES=5)
top_full_codes_info = top_codes['post_train_code_histogram']


In [36]:
top_full_codes_info

{'top_codes': ['01111111-11111011',
  '11111111-11111011',
  '01110111-11111011',
  '11010111-11110000',
  '01101111-11111011'],
 'cmass_top_code': [0.12426666666666666,
  0.2034,
  0.2744,
  0.31074999999999997,
  0.34401666666666664],
 'mass_top_code': [0.12426666666666666,
  0.07913333333333333,
  0.071,
  0.03635,
  0.03326666666666667],
 'ratio_1_0': 3.0944975923501135}

In [37]:
top_codes = top_full_codes_info['top_codes']
top_codes

['01111111-11111011',
 '11111111-11111011',
 '01110111-11111011',
 '11010111-11110000',
 '01101111-11111011']

In [38]:
model = load_most_recent_model(RESULTS_FOLDER)

In [39]:
def string_code_to_list(code_string):
    list_codes_per_layer=  []
    list_string_per_layer = code_string.split('-')
    for string_per_layer in list_string_per_layer:
        list_codes_per_layer.append([int(c) for c in string_per_layer])
    print(code_string, '->',list_codes_per_layer )
    return list_codes_per_layer

In [40]:
test = string_code_to_list(top_codes[0])


01111111-11111011 -> [[0, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1]]


In [52]:
class LinearSoftMax(torch.nn.Module):
    def __init__(self, input_size, weights, bias, output_size = 10):
        super(LinearSoftMax, self).__init__()
        self.input_size = input_size
        self.weights = torch.from_numpy(weights)
        self.bias = torch.from_numpy(bias)
        self.linear = torch.nn.Linear(self.input_size, output_size)
        # nn.Parameter(F.softmax(self.layer_weights,dim=0))
        with torch.no_grad():
            self.linear.weight = torch.nn.Parameter(self.weights)
            self.linear.bias = torch.nn.Parameter(self.bias)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        x = self.linear(x)
        log_out = F.log_softmax(x, dim=1)
        return log_out

# Accepts a relu activation code and generates a linear softmax model from it.
def NN_to_logreg(model, list_codes_per_layer):
    model.eval()
    weights = []
    biases = []
    for i, layer_code in enumerate(list_codes_per_layer):
        # build matrix to cancel off ReLU's
        layer = model.first_layer if i == 0 else model.list_layers[i - 1] # fix this so we can access the layers directly in model.list_layer
        cancel_matrix = np.eye(layer.weight.shape[0])
        for r in range(len(layer_code)):
            cancel_matrix[r, r] = layer_code[r]
        new_weight = np.matmul(cancel_matrix, layer.weight.detach().numpy())
        new_bias = np.matmul(cancel_matrix, layer.bias.detach().numpy())
        weights.append(new_weight)
        biases.append(new_bias)
    # Add last layer fed into softmax
    weights.append(model.last_layer.weight.detach().numpy())
    biases.append(model.last_layer.bias.detach().numpy())

    # Combine all weights and biases into a single
    combined_weight = weights[len(weights) - 1]
    combined_bias = biases[len(biases) - 1]
    for i in range(len(weights) - 2, -1, -1):
        combined_bias = combined_bias + np.matmul(combined_weight, biases[i]) # This line should go before the below one
        combined_weight = np.matmul(combined_weight, weights[i])
    return LinearSoftMax(model.input_size, combined_weight, combined_bias)


linearized = NN_to_logreg(model, test)

In [53]:
train_loader, test_loader = get_mnist_data(get_config())

for X, y in test_loader:
    X = X.double()
    output = linearized(X)
    pred = [int(out.cpu().detach().numpy())
            for out in output.data.max(1, keepdim=True)[1]]
    print(pred)
    break



KeyboardInterrupt: 

In [11]:
model.state_dict()

OrderedDict([('first_layer.weight',
              tensor([[ 0.0348,  0.0054,  0.0562,  ...,  0.0215,  0.0411,  0.0022],
                      [-0.0271, -0.0093,  0.0090,  ...,  0.0196, -0.0156,  0.0276],
                      [-0.0236,  0.0028, -0.0280,  ..., -0.0048, -0.0084, -0.0630],
                      ...,
                      [-0.0767, -0.0373, -0.0776,  ..., -0.0936, -0.0500, -0.0382],
                      [-0.0531, -0.0543, -0.0130,  ..., -0.0515, -0.0503, -0.0757],
                      [-0.0551, -0.0915, -0.0704,  ..., -0.1047, -0.0995, -0.1102]])),
             ('first_layer.bias',
              tensor([-0.0552,  0.0068,  0.0495,  0.0316, -0.0067,  0.1698,  0.1308,  0.1733])),
             ('last_layer.weight',
              tensor([[ 1.4483,  0.0207, -1.3224, -0.3605, -0.0021,  0.0046,  0.5091, -0.4646],
                      [-1.5897, -0.8683,  0.9067,  1.2290, -0.1305,  0.0967,  0.4637,  0.1305],
                      [ 0.3136, -0.0405, -0.6119,  0.2937, -0.9546,  0.0