In [1]:
from resnet20 import ResNetCIFAR
from lenet import LeNet5, LeNet300
from train_util import train, test, train_gsm_unstructured, train_gsm_structured
from summary import summary
import torch
import numpy as np
from final_pruning import final_unstruct_pruning, final_struct_pruning
import torch.nn as nn
import matplotlib.pyplot as plt

from evaluate_util import compute_conv_flops

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
EPOCHS = 50

### Based model LeNet-5 training with SGD

In [3]:
net = LeNet5()
net = net.to(device)

# Comment if you have pretrained weights
# train(net, epochs=EPOCHS, batch_size=128, lr=0.1, reg=1e-4, net_name = 'lenet_5_base.pt')

In [4]:
net.load_state_dict(torch.load("saved_models/lenet_5_base.pt"))
test(net)
summary(net)
print('FLOPs: ', compute_conv_flops(net, cuda=True, prune=True))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!






Test Loss=0.0218, Test accuracy=0.9943
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Convolutional_Param	500		500			0.000000
1		Convolutional_Filter	20		20			0.000000
2		Convolutional_Param	25000		25000			0.000000
2		Convolutional_Filter	50		50			0.000000
3		Linear		400000		400000			0.000000
3		tLinear_Filter	800		800			0.000000
4		Linear		5000		5000			0.000000
4		tLinear_Filter	500		500			0.000000
Total nonzero parameters: 430500
Total parameters: 430500
Total sparsity: 0.000000
FLOPs:  2293000.0


### LeNet-5 Model Trained with Unstructured GSM SGD

In [5]:
NON_ZERO_RATIO = 0.15

In [6]:
net = LeNet5()
net = net.to(device)
net.load_state_dict(torch.load("saved_models/lenet_5_base.pt"))


# Comment if you have loaded pretrained weights
# train_gsm_unstructured(net, epochs=EPOCHS, batch_size=256, lr=0.03, nonzero_ratio = NON_ZERO_RATIO, 
#                        reg=1e-4, net_name = 'lenet_5_unstruct_gsm_before_pruning.pt')

<All keys matched successfully>

In [7]:
# net.load_state_dict(torch.load("saved_models/lenet_5_unstruct_gsm_before_pruning.pt"))
# final_unstruct_pruning(net, nonzero_ratio = NON_ZERO_RATIO, 
#                      net_name = "lenet_5_unstruct_gsm_after_pruning.pt")

In [8]:
net.load_state_dict(torch.load("saved_models/lenet_5_unstruct_gsm_after_pruning.pt"))
test(net)
summary(net)
print('FLOPs: ', compute_conv_flops(net, cuda=True, prune=True))

Test Loss=0.0271, Test accuracy=0.9924
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Convolutional_Param	500		159			0.682000
1		Convolutional_Filter	20		8			0.600000
2		Convolutional_Param	25000		867			0.965320
2		Convolutional_Filter	50		29			0.420000
3		Linear		400000		60785			0.848037
3		tLinear_Filter	800		459			0.426250
4		Linear		5000		2764			0.447200
4		tLinear_Filter	500		383			0.234000
Total nonzero parameters: 64575
Total parameters: 430500
Total sparsity: 0.850000
FLOPs:  660978.0


### LeNet-5 Model Trained with Structured GSM SGD

In [9]:
NON_ZERO_RATIO = 0.08

In [10]:
net = LeNet5()
net = net.to(device)
net.load_state_dict(torch.load("saved_models/lenet_5_base.pt"))

# Uncomment to load pretrained weights
# net.load_state_dict(torch.load("lenet_5_base_struct_gsm_before_pruning.pt"))

# Comment if you have loaded pretrained weights
# train_gsm_structured(net, epochs=EPOCHS, batch_size=256, lr=0.03, nonzero_ratio = NON_ZERO_RATIO, 
#                      reg=1e-4, net_name = 'lenet_5_base_struct_gsm_before_pruning.pt')

<All keys matched successfully>

In [11]:
# net.load_state_dict(torch.load("saved_models/lenet_5_base_struct_gsm_before_pruning.pt"))
# final_struct_pruning(net, nonzero_ratio = NON_ZERO_RATIO, 
#                      net_name = "lenet_5_struct_gsm_after_pruning.pt")

In [12]:
net.load_state_dict(torch.load("saved_models/lenet_5_struct_gsm_after_pruning.pt"))
test(net)
summary(net)
print('FLOPs: ', compute_conv_flops(net, cuda=True, prune=True))

Test Loss=0.0289, Test accuracy=0.9915
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Convolutional_Param	500		175			0.650000
1		Convolutional_Filter	20		7			0.650000
2		Convolutional_Param	25000		3500			0.860000
2		Convolutional_Filter	50		7			0.860000
3		Linear		400000		40287			0.899282
3		tLinear_Filter	800		91			0.886250
4		Linear		5000		320			0.936000
4		tLinear_Filter	500		32			0.936000
Total nonzero parameters: 44282
Total parameters: 430500
Total sparsity: 0.897138
FLOPs:  365524.0


### Based model LeNet-300 training with SGD

In [13]:
net = LeNet300()
net = net.to(device)

# Comment if you have pretrained weights
# train(net, epochs=EPOCHS, batch_size=128, lr=0.1, reg=1e-4, net_name = 'lenet_300_base.pt')

In [14]:
net.load_state_dict(torch.load("saved_models/lenet_300_base.pt"))
test(net)
summary(net)
print('FLOPs: ', compute_conv_flops(net, cuda=True, prune=True))

Test Loss=0.0496, Test accuracy=0.9855
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Linear		235200		235200			0.000000
1		tLinear_Filter	784		784			0.000000
2		Linear		30000		30000			0.000000
2		tLinear_Filter	300		300			0.000000
3		Linear		1000		1000			0.000000
3		tLinear_Filter	100		100			0.000000
Total nonzero parameters: 266200
Total parameters: 266200
Total sparsity: 0.000000
FLOPs:  266200


### LeNet-300 Model Trained with Unstructured GSM SGD

In [15]:
NON_ZERO_RATIO = 0.2

In [16]:
net = LeNet300()
net = net.to(device)
net.load_state_dict(torch.load("saved_models/lenet_300_base.pt"))

# Comment if you have pretrained weights
# train_gsm_unstructured(net, epochs=EPOCHS, batch_size=256, lr=0.03, nonzero_ratio = NON_ZERO_RATIO, 
#                        reg=1e-4, net_name = 'lenet_300_unstruct_gsm_before_pruning.pt')

<All keys matched successfully>

In [17]:
# net.load_state_dict(torch.load("saved_models/lenet_300_unstruct_gsm_before_pruning.pt"))
# final_unstruct_pruning(net, nonzero_ratio = NON_ZERO_RATIO, 
#                      net_name = "lenet_300_unstruct_gsm_after_pruning.pt")

In [18]:
net.load_state_dict(torch.load("saved_models/lenet_300_unstruct_gsm_after_pruning.pt"))
test(net)
summary(net)
print('FLOPs: ', compute_conv_flops(net, cuda=True, prune=True))

Test Loss=0.0530, Test accuracy=0.9853
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Linear		235200		41712			0.822653
1		tLinear_Filter	784		533			0.320153
2		Linear		30000		10743			0.641900
2		tLinear_Filter	300		255			0.150000
3		Linear		1000		785			0.215000
3		tLinear_Filter	100		95			0.050000
Total nonzero parameters: 53240
Total parameters: 266200
Total sparsity: 0.800000
FLOPs:  160603


### LeNet-300 Model Trained with Structured GSM SGD

In [19]:
NON_ZERO_RATIO = 0.3

In [20]:
net = LeNet300()
net = net.to(device)
net.load_state_dict(torch.load("saved_models/lenet_300_base.pt"))

# Comment if you have loaded pretrained weights
# train_gsm_structured(net, epochs=EPOCHS, batch_size=256, lr=0.03, nonzero_ratio = NON_ZERO_RATIO, 
#                      reg=1e-4, net_name = 'lenet_300_base_struct_gsm_before_pruning.pt')

<All keys matched successfully>

In [21]:
# net.load_state_dict(torch.load("saved_models/lenet_300_base_struct_gsm_before_pruning.pt"))
# final_struct_pruning(net, nonzero_ratio = NON_ZERO_RATIO, 
#                      net_name = "lenet_300_struct_gsm_after_pruning.pt")

In [22]:
net.load_state_dict(torch.load("saved_models/lenet_300_struct_gsm_after_pruning.pt"))
test(net)
summary(net)
compute_conv_flops(net, cuda=True, prune=True)

Test Loss=0.0755, Test accuracy=0.9803
Layer id	Type		Parameter	Non-zero parameter	Sparsity(\%)
1		Linear		235200		70752			0.699184
1		tLinear_Filter	784		236			0.698980
2		Linear		30000		5500			0.816667
2		tLinear_Filter	300		55			0.816667
3		Linear		1000		640			0.360000
3		tLinear_Filter	100		64			0.360000
Total nonzero parameters: 76892
Total parameters: 266200
Total sparsity: 0.711150


76940