In [4]:
import torch

### init

In [5]:
# tensor init

arr = [[1,2,3],[4,5,6]]
tensor = torch.tensor(arr)
print(tensor)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [6]:
type(tensor)

torch.Tensor

In [7]:
val =2.0
tensor = torch.tensor(val)
print(tensor)

tensor(2.)


In [8]:
# Init tensor from numpy array
import numpy as np
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [9]:
x = torch.from_numpy(arr)
print(x)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [10]:
# common pytorch api  endpoints

zeros_t = torch.zeros(3,3)
print(zeros_t)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


### attributes

In [11]:
zeros_t.shape

torch.Size([3, 3])

In [12]:
zeros_t.dtype

torch.float32

In [13]:
zeros_t.device

device(type='cpu')

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

### tensors operation

In [15]:
c = 10
x = x* c
print(x)

tensor([[10, 20, 30],
        [40, 50, 60]])


In [16]:
x1 = torch.zeros((1,2))
x2 = torch.ones((1,2))
print(x1)

tensor([[0., 0.]])


In [17]:
print(x2)

tensor([[1., 1.]])


In [18]:
x1+x2

tensor([[1., 1.]])

In [19]:
x1.shape

torch.Size([1, 2])

In [20]:
x1_t = torch.tensor([[1,2], [3,4]])
x1_t

tensor([[1, 2],
        [3, 4]])

In [21]:
x2_t = torch.tensor([[1,2,3], [4,5,6]])
x2_t

tensor([[1, 2, 3],
        [4, 5, 6]])

In [22]:
x1_t.shape

torch.Size([2, 2])

In [23]:
x2_t.shape

torch.Size([2, 3])

In [24]:
torch.matmul(x1_t, x2_t)

tensor([[ 9, 12, 15],
        [19, 26, 33]])

In [28]:
# indexing

i,j,k = 1,1,2
x3_t = torch.tensor([[[3,7,9], [2,4,5]],
                     [[8,6,2], [3,9,1]]])
x3_t

tensor([[[3, 7, 9],
         [2, 4, 5]],

        [[8, 6, 2],
         [3, 9, 1]]])

In [29]:
x3_t[i,j,k]

tensor(1)

In [27]:
# slicing large tensors

x3_t[:, :, 1]

tensor([[7, 4],
        [6, 9]])

In [30]:
x_t = torch.rand(2,3,4)
x_t

tensor([[[0.4247, 0.9919, 0.4736, 0.2631],
         [0.0730, 0.0341, 0.3086, 0.7490],
         [0.5629, 0.9324, 0.8899, 0.1352]],

        [[0.0058, 0.3869, 0.4387, 0.0652],
         [0.6078, 0.1466, 0.9773, 0.2264],
         [0.2182, 0.7884, 0.9977, 0.1405]]])

In [31]:
sub = torch.rand(2,4)
sub

tensor([[0.5660, 0.3828, 0.4389, 0.3647],
        [0.7247, 0.7415, 0.5450, 0.8979]])

In [32]:
x_t[0,1:3,:] = sub
x_t

tensor([[[0.4247, 0.9919, 0.4736, 0.2631],
         [0.5660, 0.3828, 0.4389, 0.3647],
         [0.7247, 0.7415, 0.5450, 0.8979]],

        [[0.0058, 0.3869, 0.4387, 0.0652],
         [0.6078, 0.1466, 0.9773, 0.2264],
         [0.2182, 0.7884, 0.9977, 0.1405]]])

## Pytorch nn Module

In [33]:
import torch.nn as nn

In [34]:
# initialize weight matrix for feed forward neural network

in_dim, out_dim = 256, 10
vector = torch.rand(in_dim)
layer = nn.Linear(in_dim, out_dim)
out = layer(vector)
print(out)

tensor([ 0.6067, -0.4077,  0.0831, -0.0106,  0.5423, -0.3523,  0.2946,  0.0436,
        -0.4086,  0.2903], grad_fn=<AddBackward0>)


In [35]:
W = torch.rand(10, 256)
b = torch.zeros(10,1)
out = torch.matmul(W, vector) + b
print(out)

tensor([[61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 63.9795, 64.1855, 63.8692, 64.3802, 62.7955, 60.2315, 67.0167,
         62.7987, 66.0760],
        [61.0322, 6

### feed forward nn

In [37]:
# math : W2(W1x + b1) + b2

in_dim, featurs, out_dim = 784, 256, 10
vec = torch.rand(in_dim)
layer1 = nn.Linear(in_dim, featurs)
layer2 = nn.Linear(featurs, out_dim)
out = layer2(layer1(vec))
print(out)

tensor([ 0.0939, -0.0381, -0.1344,  0.0877, -0.1091,  0.0179, -0.3880, -0.0113,
         0.0845, -0.1620], grad_fn=<AddBackward0>)


In [38]:
# with non linear activation function, applied between layers

relu = nn.ReLU()
out = layer2(relu(layer1(vec)))
print(out)

tensor([-0.0912,  0.0337, -0.0073, -0.0011, -0.0960, -0.0200, -0.0733, -0.0777,
         0.0945, -0.0412], grad_fn=<AddBackward0>)


## Simple NN Model

In [39]:
# simple two layers nn

class BaseClassifier(nn.Module):
    def __init__(self, in_dim, feature_dim, out_dim):
        super(BaseClassifier, self).__init__()
        self.layer1 = nn.Linear(in_dim, feature_dim)
        self.layer2 = nn.Linear(feature_dim, out_dim)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        out = self.layer2(x)
        return out

In [41]:
no_examples = 10
in_dim, feature_dim, out_dim = 784, 256, 10
x = torch.rand(no_examples, in_dim)
classifier = BaseClassifier(in_dim, feature_dim, out_dim)
out = classifier(x)

In [42]:
print(out)

tensor([[ 0.0640,  0.2168, -0.0935,  0.1479, -0.0952,  0.0768, -0.0652,  0.0217,
         -0.2808, -0.1004],
        [ 0.0528,  0.1145,  0.0180,  0.2113,  0.0715,  0.1339,  0.1167,  0.0198,
         -0.1473, -0.0429],
        [ 0.1036,  0.1865, -0.0634,  0.1873,  0.0555,  0.0628, -0.0119,  0.0373,
         -0.2629,  0.0021],
        [ 0.0139,  0.1252, -0.0411,  0.1643, -0.0919, -0.0322, -0.0734,  0.0883,
         -0.1834,  0.1305],
        [ 0.0553,  0.1062, -0.1330,  0.1078,  0.0040,  0.0108,  0.1088,  0.0367,
         -0.1612, -0.0886],
        [-0.0597,  0.0675,  0.0409,  0.1127, -0.1379,  0.0242, -0.0319,  0.0894,
         -0.1560, -0.0657],
        [ 0.0264,  0.1349, -0.1171,  0.2642, -0.0478,  0.0708, -0.1352,  0.0789,
         -0.0731, -0.0326],
        [-0.0306,  0.1068, -0.0028,  0.0987, -0.1200,  0.0399, -0.0953, -0.0058,
         -0.2501, -0.0085],
        [ 0.0326,  0.1524,  0.0109,  0.0997, -0.0925,  0.0131, -0.0904,  0.0108,
         -0.2703, -0.0022],
        [-0.0579,  

In [43]:
loss = nn.CrossEntropyLoss()
target = torch.tensor([2,4,0,9,7,1,3,5,8,6])
compute_loss = loss(out, target)
compute_loss.backward()

In [44]:
print(compute_loss)

tensor(2.2776, grad_fn=<NllLossBackward0>)


In [45]:
help(nn.CrossEntropyLoss)

Help on class CrossEntropyLoss in module torch.nn.modules.loss:

class CrossEntropyLoss(_WeightedLoss)
 |  CrossEntropyLoss(weight: Optional[torch.Tensor] = None, size_average=None, ignore_index: int = -100, reduce=None, reduction: str = 'mean', label_smoothing: float = 0.0) -> None
 |  
 |  This criterion computes the cross entropy loss between input logits
 |  and target.
 |  
 |  It is useful when training a classification problem with `C` classes.
 |  If provided, the optional argument :attr:`weight` should be a 1D `Tensor`
 |  assigning weight to each of the classes.
 |  This is particularly useful when you have an unbalanced training set.
 |  
 |  The `input` is expected to contain the unnormalized logits for each class (which do `not` need
 |  to be positive or sum to 1, in general).
 |  `input` has to be a Tensor of size :math:`(C)` for unbatched input,
 |  :math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1` for the
 |  `K`-dimensional cas

In [48]:
for p in classifier.parameters():
    print(p.shape)

torch.Size([256, 784])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])


In [49]:
# during gradient descent, we need to update the weights and biases based on the gradients

from torch import optim

In [50]:
lr = 1e-3
optimizer = optim.SGD(classifier.parameters(), lr=lr)