In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch

### Tensors

#### Basics

In [3]:
torch.empty(5, 4)

tensor([[ 3.0919e+10,  4.5804e-41, -4.1639e-34,  3.0701e-41],
        [-4.0676e-34,  3.0701e-41, -4.1162e-34,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.4013e-45,  0.0000e+00,  3.3771e-43,  0.0000e+00],
        [-4.1168e-34,  3.0701e-41, -4.1172e-34,  3.0701e-41]])

In [4]:
torch.rand(5, 4)

tensor([[0.0852, 0.3397, 0.1428, 0.8152],
        [0.4049, 0.6252, 0.8189, 0.8183],
        [0.0893, 0.2852, 0.0370, 0.4465],
        [0.2315, 0.8034, 0.2854, 0.8236],
        [0.8572, 0.0638, 0.7866, 0.3811]])

In [5]:
torch.zeros(5, 4)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [9]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [10]:
x.size()

torch.Size([2, 3])

In [11]:
y = torch.rand(5, 4)
y

tensor([[0.1635, 0.8862, 0.3673, 0.2578],
        [0.4794, 0.2664, 0.6679, 0.9998],
        [0.5770, 0.4683, 0.5257, 0.0643],
        [0.5916, 0.3146, 0.1784, 0.0361],
        [0.3267, 0.2900, 0.1372, 0.4784]])

In [12]:
y.tolist()[:1]

[[0.1635279655456543,
  0.8862454891204834,
  0.36734676361083984,
  0.2578318119049072]]

In [13]:
y.numpy()

array([[ 0.16352797,  0.88624549,  0.36734676,  0.25783181],
       [ 0.47940516,  0.2664358 ,  0.6679309 ,  0.99979913],
       [ 0.57697415,  0.46834332,  0.52573955,  0.06427157],
       [ 0.59158486,  0.31462371,  0.17836815,  0.03613251],
       [ 0.32673222,  0.28996468,  0.13719589,  0.47843015]], dtype=float32)

#### GPU

In [6]:
torch.cuda.is_available()

True

In [None]:
device = torch.device('cuda')
x = x.to(device)
xx = x * x

In [None]:
xx.to('cpu').numpy()

### Autograd

#### Grad setup

In [91]:
x = torch.tensor([3, 1], requires_grad=True, dtype=torch.float)

In [97]:
y = x + 2
y

tensor([5., 3.], grad_fn=<AddBackward>)

In [98]:
z = y * y + torch.rand(1, 2)
z

tensor([[25.9221,  9.5707]], grad_fn=<ThAddBackward>)

In [80]:
z.requires_grad_(True)
z.requires_grad

True

In [99]:
out = z.mean()
out

tensor(17.7464, grad_fn=<MeanBackward1>)

In [100]:
out.backward()

In [101]:
x.grad

tensor([10.,  6.])

In [76]:
# Gradient is cumulative
x.grad.data.zero_()

tensor([0., 0.])

#### Join existing gradinet

In [84]:
x = torch.tensor([3, 1], requires_grad=True, dtype=torch.float)
y = x + 2
z = y * y + torch.rand(1, 2)
out = z.mean()


ready_gradient = torch.tensor(10, dtype=torch.float)
out.backward(ready_gradient)

x.grad

tensor([50., 30.])

#### Turning of gradient

In [85]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


### Models

In [None]:
dtype = torch.float
device = torch.device("cpu")

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        w1.grad.zero_()
        w2.grad.zero_()

In [None]:
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')


learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
    y_pred = model(x)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    #optimizer.zero_grad()
    model.zero_grad()
    
    loss.backward()

    optimizer.step()

In [None]:
import random
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)

    def forward(self, x):
        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred


N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = DynamicNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [6]:
params = list(model.named_parameters())
params

[('input_linear.weight', Parameter containing:
  tensor([[ 0.0061,  0.0238, -0.0026,  ...,  0.0098, -0.0177,  0.0157],
          [-0.0202, -0.0154, -0.0319,  ...,  0.0318,  0.0074,  0.0169],
          [-0.0454,  0.0056,  0.0340,  ..., -0.0133,  0.0121, -0.0135],
          ...,
          [ 0.0253, -0.0386, -0.0294,  ..., -0.0249, -0.0236, -0.0066],
          [ 0.0170,  0.0281, -0.0128,  ..., -0.0159,  0.0252,  0.0091],
          [-0.0163,  0.0239, -0.0185,  ...,  0.0242, -0.0194,  0.0089]],
         requires_grad=True)), ('input_linear.bias', Parameter containing:
  tensor([-0.0179, -0.0024, -0.0087, -0.0024,  0.0281, -0.0200,  0.0011, -0.0042,
          -0.0110,  0.0131, -0.0274,  0.0019, -0.0290,  0.0364, -0.0308, -0.0477,
          -0.0260,  0.0139, -0.0133, -0.0409,  0.0174, -0.0344, -0.0195,  0.0039,
          -0.0317, -0.0213,  0.0136,  0.0107, -0.0066, -0.0178,  0.0187, -0.0356,
          -0.0203, -0.0260,  0.0179, -0.0087, -0.0213, -0.0101,  0.0258, -0.0329,
          -0.0215,  

In [8]:
params[0]

('input_linear.weight', Parameter containing:
 tensor([[ 0.0061,  0.0238, -0.0026,  ...,  0.0098, -0.0177,  0.0157],
         [-0.0202, -0.0154, -0.0319,  ...,  0.0318,  0.0074,  0.0169],
         [-0.0454,  0.0056,  0.0340,  ..., -0.0133,  0.0121, -0.0135],
         ...,
         [ 0.0253, -0.0386, -0.0294,  ..., -0.0249, -0.0236, -0.0066],
         [ 0.0170,  0.0281, -0.0128,  ..., -0.0159,  0.0252,  0.0091],
         [-0.0163,  0.0239, -0.0185,  ...,  0.0242, -0.0194,  0.0089]],
        requires_grad=True))

### Data processing

In [19]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import numpy as np

In [45]:
class CustomDataset(Dataset):
    def __init__(self, transformations=None):
        super(CustomDataset, self).__init__()
        self.data = np.random.randn(10, 10)
        self.transformations = transformations
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        sample = {'random_vector': self.data[index]}
        if self.transformations:
            sample = self.transformations(sample)
        
        return sample

In [46]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        random_vector = sample['random_vector']

        return {'random_vector': torch.from_numpy(random_vector)}

In [47]:
transformations = transforms.Compose([ToTensor()])

In [48]:
cd = CustomDataset(transformations=transformations)

In [49]:
cd[3]

{'random_vector': tensor([ 0.1416, -1.2562, -0.9917, -1.7103,  1.9722, -1.2794, -0.8276,  1.9544,
         -0.1124,  1.5208], dtype=torch.float64)}

In [50]:
dataloader = DataLoader(dataset=cd, batch_size=4,
                        shuffle=True, num_workers=4)

In [51]:
iter(dataloader).__next__()

{'random_vector': tensor([[ 1.0592,  0.1721,  0.1390,  0.8516,  0.3061, -0.6238,  0.2344, -1.8680,
          -0.0221,  1.6277],
         [-1.3695,  0.2851, -1.7845,  0.1750,  0.0251,  0.7172,  1.6801,  0.3078,
           1.6139, -1.5179],
         [-0.7740,  0.9719, -0.1336, -0.3983,  0.0529, -1.7813,  1.3427, -0.1581,
           2.3445, -1.7506],
         [-0.8614,  0.8886, -0.4223,  0.1397, -3.1475,  0.7675,  1.4525,  0.1545,
           3.0248, -1.2721]], dtype=torch.float64)}