In [5]:
import torch

"""
A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.
This implementation uses the nn package from PyTorch to build the network.
PyTorch autograd makes it easy to define computational graphs and take gradients,
but raw autograd can be a bit too low-level for defining complex neural networks;
this is where the nn package can help. The nn package defines a set of Modules,
which you can think of as a neural network layer that has produces output from
input and may have some trainable weights or other state.
"""

'\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\nThis implementation uses the nn package from PyTorch to build the network.\nPyTorch autograd makes it easy to define computational graphs and take gradients,\nbut raw autograd can be a bit too low-level for defining complex neural networks;\nthis is where the nn package can help. The nn package defines a set of Modules,\nwhich you can think of as a neural network layer that has produces output from\ninput and may have some trainable weights or other state.\n'

In [18]:
ROOT = '/home/ansuini/repos/WellTemperedSGD/'

In [6]:
device = torch.device('cpu')
# device = torch.device('cuda') # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in, device=device)
y = torch.randn(N, D_out, device=device)

In [7]:
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# After constructing the model we use the .to() method to move it to the
# desired device.
model = torch.nn.Sequential(
          torch.nn.Linear(D_in, H),
          torch.nn.ReLU(),
          torch.nn.Linear(H, D_out),
        ).to(device)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 1e-4

# Print parameters to text file


Usually you do not want to do this, since there is a method to
save the current state on a python dictionary.
But if you want to do that I suggest to define a custom function
that let you do that from this dictionary.
In what follows I make a sketch of such a function.

In [13]:
# the parameters are accessible from the generator model.parameters()
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[-2.2907e-02, -1.1424e-02, -1.6427e-02,  ...,  2.5289e-02,
         -1.1739e-02,  7.4289e-03],
        [-2.6876e-02,  1.1051e-02,  1.5004e-02,  ...,  2.7995e-02,
          7.2402e-03,  9.9765e-04],
        [ 1.3872e-02,  3.8234e-03, -1.5813e-02,  ..., -1.0161e-03,
         -1.0347e-02, -2.1643e-02],
        ...,
        [-2.5930e-02, -1.7957e-02, -2.8090e-02,  ...,  4.5477e-03,
          1.7450e-02,  2.9173e-02],
        [-6.5370e-03,  3.9465e-03,  2.0835e-02,  ...,  2.0754e-02,
         -2.4976e-02, -3.0415e-03],
        [-1.6758e-02,  2.9056e-02, -8.1800e-03,  ..., -2.5358e-02,
          2.0175e-02,  5.2555e-03]])
Parameter containing:
tensor(1.00000e-02 *
       [-1.2625,  0.8135, -2.2165, -2.3456,  0.0207,  1.6068,  1.1609,
        -3.0683, -1.3745, -2.9605,  0.2081,  2.5556, -1.6344,  0.5483,
        -0.3025, -1.5084, -1.0039, -0.1315,  1.2407, -2.4769,  1.2136,
         0.2585,  1.2187,  0.2941, -2.7129,  1.4293, -1.0003,  2.5029,
         0.4624,  0

In [20]:
# to save the current state

# in this case torch.save_state_dict() did not work
torch.save(model.state_dict(), ROOT + 'my_model.pt')

In [44]:
# to load it
state = torch.load(ROOT + 'my_model.pt')
#print(state_dict)

In [98]:
filename = ROOT + 'my_model.txt'
def printState2Txt(filename, state):
    with open(filename,'w') as f:
        for d in state.keys():
            f.write(d + '\n')
            f.write('\n')

            x = state[d]
            sx = x.size()

            if len(sx) == 1:
                for i in range(x.size(0)):                    
                    f.write(str(x[i].item()) + ' ')
                f.write('\n')

            if len(sx) == 2:
                for i in range(x.size(0)):
                    for j in range(x.size(1)):
                        f.write(str(x[i,j].item()) + ' ')
                    f.write('\n')

In [99]:
printState2Txt(filename,state)

In [4]:
for t in range(500):
  # Forward pass: compute predicted y by passing x to the model. Module objects
  # override the __call__ operator so you can call them like functions. When
  # doing so you pass a Tensor of input data to the Module and it produces
  # a Tensor of output data.
  y_pred = model(x)

  # Compute and print loss. We pass Tensors containing the predicted and true
  # values of y, and the loss function returns a Tensor containing the loss.
  loss = loss_fn(y_pred, y)
  print(t, loss.item())
  
  # Zero the gradients before running the backward pass.
  model.zero_grad()

  # Backward pass: compute gradient of the loss with respect to all the learnable
  # parameters of the model. Internally, the parameters of each Module are stored
  # in Tensors with requires_grad=True, so this call will compute gradients for
  # all learnable parameters in the model.
  loss.backward()

  # Update the weights using gradient descent. Each parameter is a Tensor, so
  # we can access its data and gradients like we did before.
  with torch.no_grad():
    for param in model.parameters():
      param.data -= learning_rate * param.grad

0 625.9913330078125
1 579.6355590820312
2 539.7922973632812
3 504.93304443359375
4 473.69317626953125
5 445.5487976074219
6 419.7396240234375
7 395.89495849609375
8 373.8634338378906
9 353.36865234375
10 334.2891845703125
11 316.36187744140625
12 299.6120300292969
13 283.8233642578125
14 268.7804870605469
15 254.46823120117188
16 240.87664794921875
17 227.9961395263672
18 215.75332641601562
19 204.11007690429688
20 193.05709838867188
21 182.54612731933594
22 172.54734802246094
23 163.026611328125
24 153.9678955078125
25 145.37112426757812
26 137.21580505371094
27 129.48358154296875
28 122.16249084472656
29 115.22429656982422
30 108.65581512451172
31 102.43675231933594
32 96.5379638671875
33 90.97488403320312
34 85.6992416381836
35 80.72434997558594
36 76.01497650146484
37 71.57112121582031
38 67.39192962646484
39 63.45368576049805
40 59.75044631958008
41 56.26570510864258
42 52.986900329589844
43 49.90156173706055
44 47.003631591796875
45 44.28755569458008
46 41.732177734375
47 39.3276