-
Notifications
You must be signed in to change notification settings - Fork 5
/
2.1_five_layers_relu.py
92 lines (73 loc) · 3.47 KB
/
2.1_five_layers_relu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import torch
from torch.autograd import Variable
from torchvision import datasets, transforms
import pytorchvisu
import numpy as np
# Get data iterators
# We need transforms.ToTensor to convert PIL image into a Torch Tensor
transform = transforms.Compose([transforms.ToTensor()])
mnist_tr = torch.utils.data.DataLoader(
datasets.MNIST('../datasets', train=True, download=True, transform=transform),
batch_size=100, shuffle=True)
mnist_te = torch.utils.data.DataLoader(
datasets.MNIST('../datasets', train=False, download=True, transform=transform),
batch_size=1000, shuffle=True)
# Model definition
def build_model(input_dim, output_dim):
model = torch.nn.Sequential()
model.add_module("linear1", torch.nn.Linear(input_dim, 200, bias=True))
model.add_module("sigmoid1", torch.nn.ReLU())
model.add_module("linear2", torch.nn.Linear(200, 100, bias=True))
model.add_module("sigmoid2", torch.nn.ReLU())
model.add_module("linear3", torch.nn.Linear(100, 60, bias=True))
model.add_module("sigmoid3", torch.nn.ReLU())
model.add_module("linear4", torch.nn.Linear(60, 30, bias=True))
model.add_module("sigmoid4", torch.nn.ReLU())
model.add_module("linear5", torch.nn.Linear(30, output_dim, bias=True))
model.add_module("softmax", torch.nn.Softmax())
return model
# Build MNIST model
model = build_model(784, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
loss_fn = torch.nn.CrossEntropyLoss(size_average=False)
# Extract the weights and biases
params = model.state_dict()
weights = params['linear5.weight']
biases = params['linear5.bias']
w = weights.numpy().reshape(-1)
b = biases.numpy().reshape(-1)
datavis = pytorchvisu.MnistDataVis()
# Visualisation
def training_step(i, update_test_data, update_train_data):
_, (x, y) = next(enumerate(mnist_tr)) # Get a shuffled batch
optimizer.zero_grad()
y_pred = model(Variable(x.view(-1,28*28)))
loss = loss_fn(y_pred, Variable(y))
loss.backward()
optimizer.step()
if update_train_data:
w_ = np.sort(w)
b_ = np.sort(b)
x_np = x.numpy()
y_pred_np = np.argmax(y_pred.data.numpy(), axis=1)
y_np = y.numpy()
accuracy = np.count_nonzero(y_np == y_pred_np) / (1.0 * y_np.shape[0])
datavis.append_training_curves_data(i, accuracy, loss.data[0] / 2)
datavis.append_data_histograms(i, w_, b_)
im = pytorchvisu.numpy_format_mnist_images(x_np, y_pred_np, y_np)
datavis.update_image1(im)
print(str(i) + ": train accuracy: " + str(accuracy) + " training loss: " + str(loss.data[0]))
if update_test_data:
_, (xt, yt) = next(enumerate(mnist_te))
yt_pred = model(Variable(xt.view(-1,28*28)))
loss_t = loss_fn(yt_pred, Variable(yt))
xt_np = xt.numpy()
yt_np = yt.numpy()
yt_pred_np = np.argmax(yt_pred.data.numpy(), axis=1)
accuracy = np.count_nonzero(yt_np == yt_pred_np) / (1.0 * yt_np.shape[0])
datavis.append_test_curves_data(i, accuracy, loss_t.data[0] / 20)
im = pytorchvisu.numpy_format_mnist_images(xt_np, yt_pred_np, yt_np)
datavis.update_image2(im)
print(str(i) + ": test accuracy: " + str(accuracy) + " test loss: " + str(loss_t.data[0]))
datavis.animate(training_step, iterations=10000, train_data_update_freq=20, test_data_update_freq=50, more_tests_at_start=True)
print("max test accuracy: " + str(datavis.get_max_test_accuracy()))