In [None]:
import torch
torch.cuda.is_available()

In [None]:
import torch
import numpy as np

numpy_tensor = np.random.randn(10, 20)

# convert numpy array to pytorch array
pytorch_tensor = torch.Tensor(numpy_tensor)
# or another way
pytorch_tensor = torch.from_numpy(numpy_tensor)

# convert torch tensor to numpy representation
pytorch_tensor.numpy()

# if we want to use tensor on GPU provide another type
dtype = torch.cuda.FloatTensor
gpu_tensor = torch.randn(10, 20).type(dtype)
# or just call `cuda()` method
gpu_tensor = pytorch_tensor.cuda()
# call back to the CPU
cpu_tensor = gpu_tensor.cpu()

# define pytorch tensors
x = torch.randn(10, 20)
y = torch.ones(20, 5)
# `@` mean matrix multiplication from python3.5, PEP-0465
res = x @ y

# get the shape
res.shape  # torch.Size([10, 5])

In [None]:
%load_ext autoreload
%autoreload 2

from data import data_gen
x = data_gen(20,1)
print(x)
print(next(x))
print(next(x))

In [None]:
import torch
from torch.autograd import Variable

# define an inputs
x_tensor = torch.randn(10, 20)
y_tensor = torch.randn(10, 5)
x = Variable(x_tensor, requires_grad=False)
y = Variable(y_tensor, requires_grad=False)
# define some weights
w = Variable(torch.randn(20, 5), requires_grad=True)

#print(w)
# get variable tensor
print(type(w.data))  # torch.FloatTensor
# get variable gradient
print(w.grad)  # None

loss = torch.mean((y - x @ w) ** 2)

# calculate the gradients
loss.backward()
print(w.grad)  # some gradients
# manually apply gradients
w.data -= 0.01 * w.grad.data
# manually zero gradients after update
w.grad.data.zero_()

In [None]:
from math import floor,ceil

class WarpMatrix(torch.autograd.Function):
    @staticmethod
    def forward(ctx, a):
        b = torch.cumsum(a, 0)
        dim1 = int(a.shape[0])
        dim0 = int(ceil(b[-1]))
        #t_dim = [dim0, dim1]
        t_dim = [dim1, dim1]
        trans_mat = torch.zeros(t_dim)
        grad_indices = torch.FloatTensor(dim1)
        #trans_grad = torch.zeros(t_dim.append(a.shape[0]))
        prev_ind = 0
        cross_boundary = []
        for i, x in enumerate(zip(a, b)):
            ai, bi = x
            this_ind = floor(bi)
            if this_ind == prev_ind:
                trans_mat[this_ind, i] = ai
                cross_boundary.append(False)
            else:  # we just crossed an integer boundary
                tmp = bi - this_ind
                trans_mat[this_ind, i] = tmp
                trans_mat[this_ind - 1, i] = ai - tmp
                cross_boundary.append(True)
            grad_indices[i]=this_ind
            prev_ind = this_ind
        # assert ((a - trans_mat.sum(0)).abs().max() < 1e-6)
        # assert ((torch.ones(trans_mat.shape[0] - 1) - trans_mat.sum(1)[:-1]).abs().max() < 1e-6)
        ctx.save_for_backward(a)
        ctx.grad_indices = grad_indices
        ctx.cross_boundary = cross_boundary
        return trans_mat
    @staticmethod
    def backward(ctx,grad_output):
        #print('grad output:', grad_output)
        a, = ctx.saved_variables
        grad_indices = ctx.grad_indices
        my_grad = torch.zeros_like(a)
        for k, ind in enumerate(grad_indices):
            my_grad[k] = grad_output[int(ind),k]
            for j in range(k+1,int(grad_output.data.shape[1])):
                if ctx.cross_boundary[j]:
                    iofj = int(grad_indices[j])            
                    my_grad[k] = my_grad[k] +\
                                (grad_output[iofj,j] - \
                                grad_output[iofj-1,j] )
                    #print(my_grad[k].view(1,-1))
        return my_grad#torch.ones_like(a)

In [None]:
%load_ext autoreload
%autoreload 2
#from reshape import WarpMatrix
from torch.autograd import Variable
import torch

#w=Variable(torch.FloatTensor(5,1).uniform_(), requires_grad = True)
wmat = WarpMatrix.apply(w)
loss = wmat.sum(0)[2]
loss.backward()
print(wmat, loss, w.grad)
a =w.grad.data.zero_()

In [3]:
%load_ext autoreload
%autoreload 2
from models import FittedWarp
from torch.autograd import Variable
import torch

warp = FittedWarp((10,1))
x = Variable(torch.randn(20,10))
out = warp.forward(x)
loss = out.sum()
loss.backward()
print(out, loss, warp.w.grad)
# a =w.grad.data.zero_()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Variable containing:
 0.4973  0.4175 -0.2877 -0.0765  0.9019  0.5446  0.3386 -0.2201 -0.2986 -0.8582
-0.1429  0.1432 -0.0785 -0.0853  0.2460 -0.6929 -0.0686  0.3580  0.1541 -0.5233
-0.6284 -1.6492 -0.8562 -0.8518  1.2276  0.1810 -0.3068 -0.0795  1.2501 -0.5421
 0.3221 -0.9090 -2.0184  1.5936 -0.4268 -0.1354  0.6666 -0.2423  0.6903  2.1152
 0.2605  0.7742 -0.0216  0.1031  0.9346  1.2678  0.7366 -1.3916  0.8819  0.8035
-0.5749  0.8674 -0.5634 -0.3252  1.2010  0.3106  0.2800 -1.2129  0.4422  0.5886
-0.2918  0.4893  0.0004  0.3263 -0.3633  0.3901  0.1029 -0.7333  0.0564 -0.1340
 0.1859 -0.2086  0.1170  0.4281 -0.2778  0.0461  0.0140  0.0867 -0.3471 -0.0899
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
 0.0000  0.

In [None]:
from torch.autograd import gradcheck

# gradchek takes a tuple of tensor as input, check if your gradient
# evaluated with these tensors are close enough to numerical
# approximations and returns True if they all verify this condition.
input = (Variable(torch.randn(20,20).double(), requires_grad=True), Variable(torch.randn(30,20).double(), requires_grad=True),)
test = gradcheck(WarpMatrix.apply, input, eps=1e-6, atol=1e-4)
print(test)

In [None]:
import torch
print(torch.__version__)
class MyFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        output = torch.sign(input)
        return output
    @staticmethod
    def backward(ctx, grad_output):
        # saved tensors - tuple of tensors, so we need get first
        input, = ctx.saved_variables
#         grad_output[input.ge(1)] = 0
#         grad_output[input.le(-1)] = 0
        return grad_output


# usage
x = torch.autograd.Variable(torch.randn(10, 20), requires_grad = True)
y = MyFunction.apply(x)
# or
# my_func = MyFunction.apply
# y = my_func(x)
loss = y.sum()
loss.backward()

In [None]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F


x = Variable(torch.randn(10, 20), requires_grad=False)
y = Variable(torch.randn(10, 3), requires_grad=False)
# define some weights
w1 = Variable(torch.randn(20, 5), requires_grad=True)
w2 = Variable(torch.randn(5, 3), requires_grad=True)

learning_rate = 0.1
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)
for step in range(5):
    pred = F.sigmoid(x @ w1)
    pred = F.sigmoid(pred @ w2)
    loss = loss_fn(pred, y)

    # manually zero all previous gradients
    optimizer.zero_grad()
    # calculate new gradients
    loss.backward()
    # apply new gradients
    optimizer.step()
    print(loss)

In [None]:
import torch

first_counter = torch.Tensor([0])
second_counter = torch.Tensor([10])
some_value = torch.Tensor(15)

while (first_counter < second_counter)[0]:
    first_counter += 2
    second_counter += 1

In [None]:
next([1].iter())

In [None]:
from collections import OrderedDict

import torch.nn as nn


# Example of using Sequential
model = nn.Sequential(
    nn.Conv2d(1, 20, 5),
    nn.ReLU(),
    nn.Conv2d(20, 64, 5),
    nn.ReLU()
)

# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(1, 20, 5)),
    ('relu1', nn.ReLU()),
    ('conv2', nn.Conv2d(20, 64, 5)),
    ('relu2', nn.ReLU())
]))

#output = model(some_input)

In [None]:
from torch import nn

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 12, kernel_size=3, padding=1, stride=1),
            nn.Conv2d(12, 24, kernel_size=3, padding=1, stride=1),
        )
        self.second_extractor = nn.Conv2d(
            24, 36, kernel_size=3, padding=1, stride=1)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.second_extractor(x)
        # note that we may call same layer twice or mode
        x = self.second_extractor(x)
        return x

In [None]:
import torch

class MyFunction(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        output = torch.sign(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # saved tensors - tuple of tensors, so we need get first
        input, = ctx.saved_variables
        grad_output[input.ge(1)] = 0
        grad_output[input.le(-1)] = 0
        return grad_output


# usage
x = torch.randn(10, 20)
y = MyFunction.apply(x)
# or
my_func = MyFunction.apply
print(MyFunction)
y = my_func(x)


# and if we want to use inside nn.Module
class MyFunctionModule(torch.nn.Module):
    def forward(self, x):
        return MyFunction.apply(x)


In [None]:

import torch

### tensor example
x_cpu = torch.randn(10, 20)
w_cpu = torch.randn(20, 10)
# direct transfer to the GPU
x_gpu = x_cpu.cuda()
w_gpu = w_cpu.cuda()
result_gpu = x_gpu @ w_gpu
# get back from GPU to CPU
result_cpu = result_gpu.cpu()

### model example
model = model.cuda()
# train step
inputs = Variable(inputs.cuda())
outputs = model(inputs)
# get back from GPU to CPU
outputs = outputs.cpu()


In [None]:

import torch

# check is cuda enabled
torch.cuda.is_available()

# set required device
torch.cuda.set_device(0)

# work with some required cuda device
with torch.cuda.device(1):
    # allocates a tensor on GPU 1
    a = torch.cuda.FloatTensor(1)
    assert a.get_device() == 1

    # but you still can manually assign tensor to required device
    d = torch.randn(2).cuda(2)
    assert d.get_device() == 2


In [None]:

import torch
from torch.autograd import Variable

# new way with `init` module
w = torch.Tensor(3, 5)
torch.nn.init.normal(w)
# work for Variables also
w2 = Variable(w)
torch.nn.init.normal(w2)
# old styled direct access to tensors data attribute
w2.data.normal_()

# example for some module
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

# for loop approach with direct access
class MyModel(nn.Module):
    def __init__(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()


In [None]:

# scheduler example
from torch.optim import lr_scheduler

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

for epoch in range(100):
    scheduler.step()
    train()
    validate()

# Train flag can be updated with boolean
# to disable dropout and batch norm learning
model.train(True)
# execute train step
model.train(False)
# run inference step

# CPU seed
torch.manual_seed(42)
# GPU seed
torch.cuda.manual_seed_all(42)

In [None]:

from collections import OrderedDict

import torch.nn as nn
save_path = 'test.mdl'
model = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(1, 20, 5)),
    ('relu1', nn.ReLU()),
    ('conv2', nn.Conv2d(20, 64, 5)),
    ('relu2', nn.ReLU())
]))

print(model)

# Sequential (
#   (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
#   (relu1): ReLU ()
#   (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
#   (relu2): ReLU ()
# )

# save/load only the model parameters(prefered solution)
torch.save(model.state_dict(), save_path)
model.load_state_dict(torch.load(save_path))

# save whole model
torch.save(model, save_path)
model = torch.load(save_path)


In [None]:
def to_gpu(x):
    return x.cuda()

class ImagesDataset(torch.utils.data.Dataset):
    pass

class Net(nn.Module):
    pass

model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
criterion = torch.nn.MSELoss()

fit_dataset = ImagesDataset(path_to_fit_images)
fit_data_loader = torch.utils.data.DataLoader(fitdataset, batch_size=10)

valid_dataset = ImagesDataset(path_to_valid_images)
valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=10)

best_valid_loss = float('inf')
for epoch in range(epochs):
    # training
    lr_scheduler.step()
    for inputs, labels in fit_data_loader:
        inputs = Variable(to_gpu(inputs))
        labels = Variable(to_gpu(labels))

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # validation:
    # TODO: spell this out!
    valid_inputs, valid_labels = get_data()
    
    inputs = Variable(to_gpu(valid_inputs))
    labels = Variable(to_gpu(valid_labels))

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    if loss < best_valid_loss:
        best_valid_loss = loss
        # spell_out:
        save_model(model)


In [None]:
import torch

### tensor example
x_cpu = torch.randn(10, 20)
w_cpu = torch.randn(20, 10)
# direct transfer to the GPU
x_gpu = x_cpu.cuda()
w_gpu = w_cpu.cuda()
result_gpu = x_gpu @ w_gpu
# get back from GPU to CPU
result_cpu = result_gpu.cpu()
print(result_cpu)

### model example
model = model.cuda()
# train step
inputs = Variable(inputs.cuda())
outputs = model(inputs)
# get back from GPU to CPU

In [None]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F


x = Variable(torch.randn(10, 20), requires_grad=False)
y = Variable(torch.randn(10, 3), requires_grad=False)
# define some weights
w1 = Variable(torch.randn(20, 5), requires_grad=True)
w2 = Variable(torch.randn(5, 3), requires_grad=True)

learning_rate = 0.1
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)
for step in range(5):
    pred = F.sigmoid(x @ w1)
    pred = F.sigmoid(pred @ w2)
    loss = loss_fn(pred, y)

    # manually zero all previous gradients
    optimizer.zero_grad()
    # calculate new gradients
    loss.backward()
    # apply new gradients
    optimizer.step()

In [None]:
import torch
from torch.autograd import Variable
x = Variable(torch.randn(3, 4), requires_grad=False)
y = Variable(torch.randn(3, 2), requires_grad=False)
# define some weights
w1 = Variable(torch.randn(4, 2), requires_grad=True)
w2 = Variable(torch.FloatTensor(w1.data.numpy()), requires_grad=True)
for i in range(5):
    loss1 = torch.mean((y - x @ w1) ** 2)
    loss2 = torch.mean((y - x @ w2) ** 2)
# calculate the gradients
    loss1.backward()
    loss2.backward()
    print("w1 grad(zeroed)", w1.grad)
    print("w2 grad(not zeroed)", w2.grad)
    w1.grad.data.zero_()
    print('-'*10)