# Rewrite of `convexnn_pytorch_stepsize_fig.py` with Lottery
Borrows from https://github.com/rahulvigneswaran/Lottery-Ticket-Hypothesis-in-Pytorch/blob/master/main.py

In [1]:
import copy
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import time
from tqdm.auto import tqdm, trange

import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.init as init

from helperfunctions import *

# Parameters and Args
I'm not using argparse in a notebook, it's gross. 

In [11]:
P = dict()
P['seed'] = 42        # Well we can tell who read Hitchhiker's Guide to the Galaxy lol
P['device'] = 'cuda'  # Or 'cpu'
P['verbose'] = True
P['P'] = 4096         # Number of hyperplane arrangements and number of neurons
P['num_neurons'] = P['P']
P["num_classes"] = 10
P["dim_in"] = 3*32*32
P['batch_size'] = 1000
P['beta'] = 1e-3      # Regularization parameter (in loss)
P['dir'] = os.path.abspath('')
P["print_freq"] = 5
P['device'] = 'cuda'

# Nonconvex (Regular) Args:
P['ncvx_solver'] = 'sgd'       # pick: "sgd", "adam", "adagrad", "adadelta", "LBFGS"
P['ncvx_schedule'] = 0         # learning rate schedule (0: Nothing, 1: ReduceLROnPlateau, 2: ExponentialLR)
P['ncvx_LBFGS_param'] = (10,4) # params for solver LBFGS
P['ncvx_num_epochs'] = 2
P["ncvx_learning_rate"] = 1e-3
P["ncvx_train_len"] = 50000
P["ncvx_test_len"] = 10000

P["ncvx_prune_epochs"] = 25
P["ncvx_prune_rounds"] = 5
P["ncvx_prune_perc"] = 0.2

# Convex Args:
P['cvx_solver'] = 'sgd'   # pick: "sgd", "adam", "adagrad", "adadelta", "LBFGS"
P['cvx_LBFGS_param'] = (10,4) # params for solver LBFGS
P['cvx_num_epochs'] = 100
P['cvx_learning_rate'] = 5e-7
P['cvx_rho'] = 1e-2
P['cvx_test_len'] = 10000

P["cvx_prune_epochs"] = 25
P["cvx_prune_rounds"] = 5
P["cvx_prune_perc"] = 0.2

# Set seed
random.seed(a=P['seed'])
np.random.seed(seed=P['seed'])
torch.manual_seed(seed=P['seed'])

<torch._C.Generator at 0x2551b91c110>

# Load Data
Downloads CIFAR10 if not already downloaded.  

In [3]:
normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])

if True:
    train_dataset = datasets.CIFAR10(P['dir'], train=True, download=True,
        transform=transforms.Compose([transforms.ToTensor(), normalize,]))

    test_dataset = datasets.CIFAR10(P['dir'], train=False, download=True,
        transform=transforms.Compose([transforms.ToTensor(), normalize,]))

# Extract the data via a dummy loader (dumps entire dataset at once)
dummy_loader= torch.utils.data.DataLoader(train_dataset, batch_size=50000, shuffle=False, pin_memory=True, sampler=None)
for A, y in dummy_loader:
    pass
Apatch=A.detach().clone() # Detaches from graph

A = A.view(A.shape[0], -1)
n,dim_in=A.size()

P["cvx_n"] = n

print("Apatch (Detached A) Shape:",Apatch.shape)
print("A shape:", A.shape)

Files already downloaded and verified
Files already downloaded and verified
Apatch (Detached A) Shape: torch.Size([50000, 3, 32, 32])
A shape: torch.Size([50000, 3072])


# Standard Non-Convex Network
Consists of typical 2-layer network definition, training and test loss, as well as training loop. 

In [30]:
class FCNetwork(nn.Module):
    def __init__(self, num_neurons=4096, num_classes=10, input_dim=3072):
        self.num_classes = num_classes
        super(FCNetwork, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(input_dim, num_neurons, bias=False), nn.ReLU())
        self.layer2 = nn.Linear(num_neurons, num_classes, bias=False)

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        out = self.layer2(self.layer1(x))
        return out
    
def save_model(model,path):
    torch.save(model.state_dict(),path)
    
def load_fc_model(path,P):
    model = FCNetwork(P["num_neurons"],P["num_classes"],P["dim_in"])
    model.load_state_dict(torch.load(path))
    return model

def loss_func_primal(yhat, y, model, beta):
    loss = 0.5 * torch.norm(yhat - y)**2
    # l2 norm on first layer weights, l1 squared norm on second layer
    for layer, p in enumerate(model.parameters()):
        if layer == 0:
            loss += beta/2 * torch.norm(p)**2
        else:
            loss += beta/2 * sum([torch.norm(p[:, j], 1)**2 for j in range(p.shape[1])])
    return loss

def validation_primal(model, testloader, beta, device):
    test_loss = 0
    test_correct = 0
    for ix, (_x, _y) in enumerate(testloader):
        _x = Variable(_x).float().to(device)
        _y = Variable(_y).float().to(device)
        #output = model.forward(_x) # Does this do anything?
        yhat = model(_x).float()
        loss = loss_func_primal(yhat, one_hot(_y).to(device), model, beta)
        test_loss += loss.item()
        test_correct += torch.eq(torch.argmax(yhat, dim=1), torch.squeeze(_y)).float().sum()
    return test_loss, test_correct.item()

def ncvx_train_step(model, ds, optimizer, P, d_out, freeze=True):
    EPS = 1e-6
    device = P["device"]
    for ix, (_x, _y) in enumerate(train_loader):
        optimizer.zero_grad()
        # Make input differentiable
        _x = Variable(_x).to(device) # shape 1000,3,32,32
        _y = Variable(_y).to(device) # shape 1000
        yhat = model(_x).float()
        
        loss = loss_func_primal(yhat, one_hot(_y).to(device), model, P["beta"])/len(_y)
        correct = torch.eq(torch.argmax(yhat, dim=1), torch.squeeze(_y)).float().sum()/len(_y)
        
        loss.backward()
        # Freezing Pruned weights by making their gradients Zero (if zero stay zero)
        if freeze:
            for name, p in model.named_parameters():
                if 'weight' in name:
                    tensor = p.data.cpu().numpy()
                    grad_tensor = p.grad.data.cpu().numpy()
                    grad_tensor = np.where(tensor < EPS, 0, grad_tensor)
                    p.grad.data = torch.from_numpy(grad_tensor).to(device)
        optimizer.step()
        d_out["losses"].append(loss.item())
        d_out["accs"].append(correct.item())
        d_out["times"].append(time.time())
    return ix

def ncvx_train(model, ds, ds_test, P, prune=True, re_init=False, init_state_dict=None, mask=None):
    # Runs training loop
    num_epochs = P["ncvx_prune_epochs"] if prune else P["ncvx_num_epochs"]
    rounds = P["ncvx_prune_rounds"] if prune else 1

    device = torch.device(P["device"])
    model.to(device)
    optimizer = get_optimizer(model,P["ncvx_solver"],P["ncvx_learning_rate"],P["ncvx_LBFGS_param"])
    scheduler = get_scheduler(P["ncvx_schedule"],optimizer,P["verbose"])
    
    d_out = {"losses":[], "accs":[], "losses_test":[],"accs_test":[], "times":[time.time()], "epoch": [], "round": []}
    if prune:
        d_out["nonzero_perc"] = []

    for p in range(rounds):
        if prune:
            prune_by_percentile(model,mask,P["ncvx_prune_perc"])
            _ = re_init(model,mask) if re_init else og_init(model,mask,init_state_dict)
            optimizer = get_optimizer(model,P["ncvx_solver"],P["ncvx_learning_rate"],P["ncvx_LBFGS_param"])
            scheduler = get_scheduler(P["ncvx_schedule"],optimizer,P["verbose"])
            
            print("Pruning Round [{:>2}/{:}]".format(p,num_epochs))
            d_out["nonzero_perc"].append(print_nonzeros(model))
            
        iter_no = 0
        for i in tqdm(range(num_epochs)):
            model.train()
            train_iters = ncvx_train_step(model, ds, optimizer, P, d_out, freeze=prune)

            model.eval()
            lt,at = validation_primal(model, ds_test, P["beta"], device)
            d_out["losses_test"] += [lt/P["ncvx_test_len"]]*(train_iters + 1)
            d_out["accs_test"] += [at/P["ncvx_test_len"]]*(train_iters + 1)
            d_out["epoch"] += [i]*(train_iters + 1)
            d_out["round"] += [p]*(train_iters + 1)
            if prune:
                d_out["nonzero_perc"] += [d_out["nonzero_perc"][-1]]*train_iters
            iter_no += train_iters + 1

            if i % P["print_freq"] == 0 or i == num_epochs - 1:
                print("Epoch [{:>2}/{:}], loss: {:.3f} acc: {:.3f}, TEST loss: {:.3f} test acc: {:.3f}".format(
                       i,num_epochs,d_out["losses"][-1],d_out["accs"][-1],d_out["losses_test"][-1],d_out["accs_test"][-1]))

            if P["ncvx_schedule"] > 0:
                scheduler.step(losses[iter_no-1])
    d_out["times"] = np.diff(d_out["times"])
    return pd.DataFrame.from_dict(d_out)

# Nonconvex (Regular) Training

In [13]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=P["batch_size"], shuffle=True, pin_memory=True, sampler=None)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=P["batch_size"], shuffle=False, pin_memory=True)
print_params(P,True,True)

Parameter          : Value
seed               : 42
device             : cuda
verbose            : True
P                  : 4096
num_neurons        : 4096
num_classes        : 10
dim_in             : 3072
batch_size         : 1000
beta               : 0.001
dir                : C:\Users\trevo\Documents\repos\spring22\convex_nn
print_freq         : 5
ncvx_solver        : sgd
ncvx_schedule      : 0
ncvx_LBFGS_param   : (10, 4)
ncvx_num_epochs    : 2
ncvx_learning_rate : 0.001
ncvx_train_len     : 50000
ncvx_test_len      : 10000
ncvx_prune_epochs  : 25
ncvx_prune_rounds  : 5
ncvx_prune_perc    : 0.2


In [31]:
ncvx_save_loc = "models/ncvx_nn{:}_solver{:}_l1e-3".format(P['num_neurons'],P['cvx_solver'])
model = FCNetwork(P["num_neurons"], P["num_classes"], P["dim_in"])

# Save initial model
model.apply(weight_init)
initial_state_dict = copy.deepcopy(model.state_dict())
torch.save(model,ncvx_save_loc+"_INITIAL.pth")

# Initial training,
results_ncvx = ncvx_train(model, train_loader, test_loader, P, prune=False)

# Save model after 100 epochs
initial_state_dict_post = copy.deepcopy(model.state_dict())
torch.save(model,ncvx_save_loc+"_EPOCHS{:}.pth".format(P["ncvx_num_epochs"]))

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch [ 0/2], loss: 0.779 acc: 0.238, TEST loss: 0.784 test acc: 0.240
Epoch [ 1/2], loss: 0.649 acc: 0.288, TEST loss: 0.665 test acc: 0.288


In [32]:
# Pruning


Unnamed: 0,losses,accs,losses_test,accs_test,times,epoch,round
0,4.085722,0.086,0.783738,0.2398,1.044231,0,0
1,2.484919,0.111,0.783738,0.2398,1.006202,0,0
2,2.297691,0.110,0.783738,0.2398,1.089338,0,0
3,2.945541,0.132,0.783738,0.2398,0.999180,0,0
4,2.323293,0.125,0.783738,0.2398,0.999911,0,0
...,...,...,...,...,...,...,...
95,0.662155,0.290,0.664654,0.2882,0.989944,1,0
96,0.649460,0.293,0.664654,0.2882,0.977210,1,0
97,0.668582,0.241,0.664654,0.2882,1.002912,1,0
98,0.664169,0.280,0.664654,0.2882,0.991915,1,0


## Convex Network
Haven't cleaned these up as much given that I'm not 100% sure how it works.
TODO: Figure out the hyperplane arangement stuff

In [126]:
def generate_conv_sign_patterns(A2, P, verbose=False): 
    # generate convolutional sign patterns
    n, c, p1, p2 = A2.shape
    A = A2.reshape(n,int(c*p1*p2))
    fsize=9*c
    d=c*p1*p2;
    fs=int(np.sqrt(9))
    unique_sign_pattern_list = []  
    u_vector_list = []             

    for i in range(P): 
        # obtain a sign pattern
        ind1=np.random.randint(0,p1-fs+1)
        ind2=np.random.randint(0,p2-fs+1)
        u1p= np.zeros((c,p1,p2))
        u1p[:,ind1:ind1+fs,ind2:ind2+fs]=np.random.normal(0, 1, (fsize,1)).reshape(c,fs,fs)
        u1=u1p.reshape(d,1)
        sampled_sign_pattern = (np.matmul(A, u1) >= 0)[:,0]
        unique_sign_pattern_list.append(sampled_sign_pattern)
        u_vector_list.append(u1)

    if verbose:
        print("Number of unique sign patterns generated: " + str(len(unique_sign_pattern_list)))
    return len(unique_sign_pattern_list),unique_sign_pattern_list, u_vector_list

def generate_sign_patterns(A, P, verbose=False):
    # generate sign patterns
    n, d = A.shape
    sign_pattern_list = []  # sign patterns
    u_vector_list = []             # random vectors used to generate the sign paterns
    umat = np.random.normal(0, 1, (d,P))
    sampled_sign_pattern_mat = (np.matmul(A, umat) >= 0)
    for i in range(P):
        sampled_sign_pattern = sampled_sign_pattern_mat[:,i]
        sign_pattern_list.append(sampled_sign_pattern)
        u_vector_list.append(umat[:,i])
    if verbose:
        print("Number of sign patterns generated: " + str(len(sign_pattern_list)))
    return len(sign_pattern_list),sign_pattern_list, u_vector_list

In [127]:
class custom_cvx_layer(torch.nn.Module):
    def __init__(self, num_neurons=4096, num_classes=10, input_dim=3072):
        self.num_classes = num_classes
        super(custom_cvx_layer, self).__init__()
        
        # (num_neurons) P x (input_dim) d x (num_classes) C
        self.v = torch.nn.Parameter(data=torch.zeros(num_neurons, input_dim, num_classes), requires_grad=True)
        self.w = torch.nn.Parameter(data=torch.zeros(num_neurons, input_dim, num_classes), requires_grad=True)

    def forward(self, x, sign_patterns):
        sign_patterns = sign_patterns.unsqueeze(2)
        x = x.view(x.shape[0], -1) # n x d
        
        Xv_w = torch.matmul(x, self.v - self.w) # P x N x C
        
        # for some reason, the permutation is necessary. not sure why
        DXv_w = torch.mul(sign_patterns, Xv_w.permute(1, 0, 2)) #  N x P x C
        y_pred = torch.sum(DXv_w, dim=1, keepdim=False) # N x C
        
        return y_pred
    
def get_nonconvex_cost(y, model, _x, beta, device):
    _x = _x.view(_x.shape[0], -1)
    Xv = torch.matmul(_x, model.v)
    Xw = torch.matmul(_x, model.w)
    Xv_relu = torch.max(Xv, torch.Tensor([0]).to(device))
    Xw_relu = torch.max(Xw, torch.Tensor([0]).to(device))
    
    prediction_w_relu = torch.sum(Xv_relu - Xw_relu, dim=0, keepdim=False)
    prediction_cost = 0.5 * torch.norm(prediction_w_relu - y)**2
    regularization_cost = beta * (torch.sum(torch.norm(model.v, dim=1)**2) + torch.sum(torch.norm(model.w, p=1, dim=1)**2))
    return prediction_cost + regularization_cost

def loss_func_cvxproblem(yhat, y, model, _x, sign_patterns, beta, rho, device):
    _x = _x.view(_x.shape[0], -1)
    # term 1
    loss = 0.5 * torch.norm(yhat - y)**2
    # term 2
    loss = loss + beta * torch.sum(torch.norm(model.v, dim=1))
    loss = loss + beta * torch.sum(torch.norm(model.w, dim=1))
    # term 3
    sign_patterns = sign_patterns.unsqueeze(2) # N x P x 1
    
    Xv = torch.matmul(_x, torch.sum(model.v, dim=2, keepdim=True)) # N x d times P x d x 1 -> P x N x 1
    DXv = torch.mul(sign_patterns, Xv.permute(1, 0, 2)) # P x N x 1
    relu_term_v = torch.max(-2*DXv + Xv.permute(1, 0, 2), torch.Tensor([0]).to(device))
    loss = loss + rho * torch.sum(relu_term_v)
    
    Xw = torch.matmul(_x, torch.sum(model.w, dim=2, keepdim=True))
    DXw = torch.mul(sign_patterns, Xw.permute(1, 0, 2))
    relu_term_w = torch.max(-2*DXw + Xw.permute(1, 0, 2), torch.Tensor([0]).to(device))
    loss = loss + rho * torch.sum(relu_term_w)
    return loss

def validation_cvxproblem(model, testloader, u_vectors, beta, rho, device):
    test_loss = 0
    test_correct = 0
    test_noncvx_cost = 0

    with torch.no_grad():
        for ix, (_x, _y) in enumerate(testloader):
            _x = Variable(_x).to(device)
            _y = Variable(_y).to(device)
            _x = _x.view(_x.shape[0], -1)
            _z = (torch.matmul(_x, torch.from_numpy(u_vectors).float().to(device)) >= 0)

            output = model.forward(_x, _z)
            yhat = model(_x, _z).float()

            loss = loss_func_cvxproblem(yhat, one_hot(_y).to(device), model, _x, _z, beta, rho, device)

            test_loss += loss.item()
            test_correct += torch.eq(torch.argmax(yhat, dim=1), _y).float().sum()

            test_noncvx_cost += get_nonconvex_cost(one_hot(_y).to(device), model, _x, beta, device)

    return test_loss, test_correct, test_noncvx_cost


def cvxproblem(ds, ds_test, save_path='', num_epochs=100, num_neurons=4096, beta=1e-3, 
                          learning_rate=1e-2, batch_size=1000, rho=1e-2, u_vectors=None, 
                          solver_type='sgd', LBFGS_param=(10,4), verbose=False, num_classes=10,
                          dim_in=3072, n=50000, test_len=10000, device='cuda'):
    """
    ds            : Training dataset (torch DataLoader)
    ds_test       : Test dataset (torch DataLoader)
    save_path     : str, path to save the model at (doesn't save if '')
    num_epochs    : int
    num_neurons   : hidden layer size, int
    beta          : regularization scalar on the norms of the weight matrices, float
    learning rate : float
    batch_size    : int
    rho           : float, coefficient to penalize the violated constraints
    u_vectors     : Comes from sign patterns, tbd
    solver_type   : any in ['sgd','adam','adagrad','adadelta','LBFGS']
    schedule      : int in (0: Nothing, 1: ReduceLROnPlateau, 2: ExponentialLR)
    LBFGS_param   : (int,int) history size and max iterations for solver_type='LBFGS'
    verbose       : bool
    num_classes   : int
    dim_in        : int, input dimension (32*32*3) for cifar10
    n             : int, number of iterations? 
    test_len      : int, number of test iterations
    device        : str, 'cuda' or 'cpu' 
    """
    device = torch.device(device)
    model = custom_cvx_layer(num_neurons=num_neurons, num_classes=num_classes, input_dim=dim_in).to(device)
    
    optimizer = get_optimizer(model,solver_type,learning_rate,LBFGS_param)
    
    # arrays for saving the loss and accuracy 
    losses = np.zeros((int(num_epochs*np.ceil(n / batch_size))))
    accs = np.zeros(losses.shape)
    noncvx_losses = np.zeros(losses.shape)
    losses_test = np.zeros((num_epochs+1))
    accs_test = np.zeros((num_epochs+1))
    noncvx_losses_test = np.zeros((num_epochs+1))
    
    times = np.zeros((losses.shape[0]+1))
    times[0] = time.time()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=verbose, factor=0.5, eps=1e-12)
    
    model.eval() # Evaluation mode
    # Compute loss on entire test set
    losses_test[0], accs_test[0], noncvx_losses_test[0] = validation_cvxproblem(model, ds_test, u_vectors, beta, rho, device) 
    
    iter_no = 0
    for i in tqdm(range(num_epochs)):
        model.train() # Training mode
        for ix, (_x, _y, _z) in enumerate(ds):
            #=========make input differentiable=======================
            _x = Variable(_x).to(device)
            _y = Variable(_y).to(device)
            _z = Variable(_z).to(device)
            #========forward pass=====================================
            yhat = model(_x, _z).float()
            loss = loss_func_cvxproblem(yhat, one_hot(_y).to(device), model, _x,_z, beta, rho, device)/len(_y)
            correct = torch.eq(torch.argmax(yhat, dim=1), _y).float().sum()/len(_y) # accuracy
            #=======backward pass=====================================
            optimizer.zero_grad() # zero the gradients on each pass before the update
            loss.backward() # backpropagate the loss through the model
            optimizer.step() # update the gradients w.r.t the loss

            losses[iter_no] = loss.item() # loss on the minibatch
            accs[iter_no] = correct
            noncvx_losses[iter_no] = get_nonconvex_cost(one_hot(_y).to(device), model, _x, beta, device)/len(_y)
            iter_no += 1
            times[iter_no] = time.time()
        
        model.eval()
        # get test loss and accuracy
        losses_test[i+1], accs_test[i+1], noncvx_losses_test[i+1] = validation_cvxproblem(model, ds_test, u_vectors, beta, rho, device)
        
        if i % 5 == 0 or i == num_epochs - 1:
            print("Epoch [{:>2}/{:}], noncvx_loss: {:.3f} loss: {:.3f} acc: {:.3f}, TEST noncvx_loss: {:.3f} loss: {:.3f} test acc: {:.3f}".format(i, num_epochs,
                    noncvx_losses[iter_no-1], losses[iter_no-1], accs[iter_no-1], noncvx_losses_test[i+1]/test_len, 
                    losses_test[i+1]/test_len, accs_test[i+1]/test_len))
        scheduler.step(losses[iter_no-1])
        
    if save_path != '':
        save_model(model,save_path)
    return {"losses":losses, "accs":accs, "noncvx_losses":noncvx_losses, "losses_test":losses_test/test_len,
            "accs_test":accs_test/test_len, "noncvx_losses_test":noncvx_losses_test/test_len, "times":np.diff(times), "model":model}

# Convex Training

In [129]:
print_params(P,True,False,True)

Parameter         : Value
seed              : 42
device            : cuda
verbose           : True
P                 : 4096
num_neurons       : 4096
num_classes       : 10
dim_in            : 3072
batch_size        : 1000
beta              : 0.001
dir               : C:\Users\trevo\Documents\repos\spring22\convex_nn
cvx_solver        : sgd
cvx_LBFGS_param   : (10, 4)
cvx_num_epochs    : 100
cvx_learning_rate : 5e-07
cvx_rho           : 0.01
cvx_test_len      : 10000
cvx_n             : 50000


In [130]:
# Generate sign patterns for convex network
num_neurons,sign_pattern_list, u_vector_list = generate_sign_patterns(A, P["P"], P["verbose"])
sign_patterns = np.array([sign_pattern_list[i].int().data.numpy() for i in range(num_neurons)])
u_vectors = np.asarray(u_vector_list).reshape((num_neurons, A.shape[1])).T

ds_train = PrepareData3D(X=A, y=y, z=sign_patterns.T)
ds_train = DataLoader(ds_train, batch_size=P["batch_size"], shuffle=True)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=P["batch_size"], shuffle=False,
    pin_memory=True)

Number of sign patterns generated: 4096


In [131]:
cvx_save_loc = "models/cvx_nn{:}_solver{:}_lr5e-7".format(P['num_neurons'],P['cvx_solver'])

results_cvx = cvxproblem(ds_train, test_loader, save_path=cvx_save_loc,
                         num_epochs = P["cvx_num_epochs"],
                         num_neurons = P["num_neurons"], 
                         beta = P["beta"],
                         learning_rate = P["cvx_learning_rate"],
                         batch_size = P["batch_size"],
                         rho = P["cvx_rho"],
                         u_vectors = u_vectors,
                         solver_type = P["cvx_solver"],
                         LBFGS_param = P["cvx_LBFGS_param"],
                         verbose = P["verbose"],
                         num_classes = P["num_classes"],
                         dim_in = P["dim_in"],
                         n = n,
                         test_len = P['cvx_test_len'],
                         device = P["device"])

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch [ 0/100], noncvx_loss: 0.413 loss: 0.373 acc: 0.447, TEST noncvx_loss: 0.411 loss: 0.366 test acc: 0.470
Epoch [ 1/100], noncvx_loss: 0.419 loss: 0.346 acc: 0.522, TEST noncvx_loss: 0.422 loss: 0.355 test acc: 0.491
Epoch [ 2/100], noncvx_loss: 0.427 loss: 0.335 acc: 0.540, TEST noncvx_loss: 0.428 loss: 0.351 test acc: 0.503
Epoch [ 3/100], noncvx_loss: 0.423 loss: 0.314 acc: 0.616, TEST noncvx_loss: 0.431 loss: 0.347 test acc: 0.512
Epoch [ 4/100], noncvx_loss: 0.431 loss: 0.313 acc: 0.602, TEST noncvx_loss: 0.433 loss: 0.344 test acc: 0.518
Epoch [ 5/100], noncvx_loss: 0.430 loss: 0.300 acc: 0.649, TEST noncvx_loss: 0.439 loss: 0.345 test acc: 0.510
Epoch [ 6/100], noncvx_loss: 0.433 loss: 0.291 acc: 0.666, TEST noncvx_loss: 0.437 loss: 0.341 test acc: 0.523
Epoch [ 7/100], noncvx_loss: 0.434 loss: 0.285 acc: 0.686, TEST noncvx_loss: 0.441 loss: 0.340 test acc: 0.523
Epoch [ 8/100], noncvx_loss: 0.433 loss: 0.274 acc: 0.715, TEST noncvx_loss: 0.442 loss: 0.340 test acc: 0.528
E

Epoch [74/100], noncvx_loss: 0.436 loss: 0.105 acc: 0.988, TEST noncvx_loss: 0.454 loss: 0.350 test acc: 0.538
Epoch [75/100], noncvx_loss: 0.436 loss: 0.107 acc: 0.988, TEST noncvx_loss: 0.453 loss: 0.350 test acc: 0.539
Epoch [76/100], noncvx_loss: 0.438 loss: 0.109 acc: 0.986, TEST noncvx_loss: 0.454 loss: 0.351 test acc: 0.539
Epoch [77/100], noncvx_loss: 0.440 loss: 0.104 acc: 0.988, TEST noncvx_loss: 0.456 loss: 0.352 test acc: 0.538
Epoch [78/100], noncvx_loss: 0.442 loss: 0.106 acc: 0.992, TEST noncvx_loss: 0.454 loss: 0.351 test acc: 0.535
Epoch [79/100], noncvx_loss: 0.442 loss: 0.104 acc: 0.989, TEST noncvx_loss: 0.455 loss: 0.352 test acc: 0.539
Epoch [80/100], noncvx_loss: 0.442 loss: 0.100 acc: 0.991, TEST noncvx_loss: 0.455 loss: 0.352 test acc: 0.540
Epoch [81/100], noncvx_loss: 0.442 loss: 0.103 acc: 0.987, TEST noncvx_loss: 0.454 loss: 0.352 test acc: 0.541
Epoch [82/100], noncvx_loss: 0.443 loss: 0.100 acc: 0.992, TEST noncvx_loss: 0.455 loss: 0.353 test acc: 0.536
E

# Save Parameters (USE SAME FILENAME STYLE)

In [None]:
param_save_loc = 'models/nn{:}_solver{:}_lr_PARAMS.json'.format(P['num_neurons'],P['cvx_solver'])
save_params(P,param_save_loc)

# Save Training Data (Uses same filename style as model files)

In [118]:
# NonConvex output to DataFrame
ncvx_model = results_ncvx["model"]
del results_ncvx["model"]

ncvx_iters = int(np.ceil(P["ncvx_train_len"] / P["batch_size"]))
results_ncvx["losses_test"] = np.repeat(results_ncvx["losses_test"][1:],ncvx_iters)
results_ncvx["accs_test"] = np.repeat(results_ncvx["accs_test"][1:],ncvx_iters)
results_ncvx["epoch"] = np.repeat(np.arange(P["ncvx_num_epochs"]),ncvx_iters)

df_ncvx = pd.DataFrame.from_dict(results_ncvx)

In [None]:
df_ncvx.to_csv(ncvx_save_loc+".csv")

In [None]:
# Convex output to DataFrame
cvx_model = results_cvx["model"]
del results_cvx["model"]

cvx_iters = int(np.ceil(P["cvx_n"] / P["batch_size"]))
results_cvx["losses_test"] = np.repeat(results_cvx["losses_test"][1:],cvx_iters)
results_cvx["accs_test"] = np.repeat(results_cvx["accs_test"][1:],cvx_iters)
results_cvx["noncvx_losses_test"] = np.repeat(results_cvx["noncvx_losses_test"][1:],cvx_iters)
results_cvx["epoch"] = np.repeat(np.arange(P["cvx_num_epochs"]),cvx_iters)

df_cvx = pd.DataFrame.from_dict(results_cvx)

In [None]:
df_cvx.to_csv(cvx_save_loc+".csv")

# Lottery Ticket 
Borrows from https://github.com/rahulvigneswaran/Lottery-Ticket-Hypothesis-in-Pytorch/blob/master/main.py

# TODO: 
 * Better plotting