A tensor is a multidimentional array containing elements of a single data type.

## What is a Tensor

### Simple CPU Example

In [1]:
import torch

x = torch.tensor([[1,2,3], [4,5,6]])
y = torch.tensor([[7,8,9],[10,11,12]])
z = x + y
print(z)

tensor([[ 8, 10, 12],
        [14, 16, 18]])


In [2]:
print(z.size())

torch.Size([2, 3])


### Simple GPU Example

In [3]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
x = torch.tensor([[1,2,3], [4,5,6]],
                 device=device)
y = torch.tensor([[7,8,9], [10,11,12]],
                 device=device)
z = x + y
print(z)

tensor([[ 8, 10, 12],
        [14, 16, 18]], device='mps:0')


In [4]:
print(z.size())
print(z.device)

torch.Size([2, 3])
mps:0


### Moving Tensors Between CPUs and GPUs

In [5]:
x = x.to(device)
y = y.to(device)
z = x + y
z = z.to('cpu')
z

tensor([[ 8, 10, 12],
        [14, 16, 18]])

## Creating Tensors

In [6]:
import numpy

# create from preexisting arrays
w = torch.tensor([1,2,3])   # from a list
w = torch.tensor((1,2,3))   # from a tuple
w = torch.tensor(numpy.array([1,2,3]))  # from a nunmpy array

# initalized by size
w = torch.empty(100, 200)   # element values are not predictable
w = torch.zeros(100, 200)
w = torch.ones(100, 200)

In [7]:
# initialize a tensor with random values
w = torch.rand(100, 200)    # create 100x200 tensor with elements from a uniform distribution on the interval [0,1)
w = torch.randn(100, 200)   # random numbers from a normal distribution with a mean 0 and a variance of 1
w = torch.randint(5, 10, (100, 200))    # Element are random integers between 5 and 10.

In [8]:
w = torch.rand(100, 200, 200)
w.shape

torch.Size([100, 200, 200])

In [10]:
# initialize with specified data type or device
w = torch.empty((100, 200), dtype=torch.float32,
                device=device)

# initialize to have the same size, data type
# and device as another tensor
x = torch.empty_like(w)

### Tensor Attributes

In [11]:
w.dtype

torch.float32

In [13]:
w.device

device(type='mps', index=0)

In [14]:
w.shape

torch.Size([100, 200])

In [15]:
w.ndim

2

In [16]:
w.requires_grad

False

In [17]:
w.grad

In [18]:
w.grad_fn

In [19]:
w.layout

torch.strided

### Data Types

In [25]:
# Specify the data type at creating using dtype
w = torch.tensor([1,2,3], dtype=torch.float32)

# Use the casting method to cast to a new data type
w.int()     # w remains a float32 after the cast
print(w)
w = w.int() # w changes to an int32 after the cast
print(w)

# use the to() method to cast to a new type
w = w.to(torch.float32)
print(w)
# w = w.to(dytpe=torch.float16)

# python automatically converts data types during operation
x = torch.tensor([1,2,3], dtype=torch.int32)
y = torch.tensor([1,2,3], dtype=torch.float32)
z = x + y
print(z)

tensor([1., 2., 3.])
tensor([1, 2, 3], dtype=torch.int32)
tensor([1., 2., 3.])
tensor([2., 4., 6.])


## Tensor Operations

### Indexing, Slicing, Combining, and Splitting Tensors

In [26]:
x = torch.tensor([[1,2], [3,4], [5,6],[7,8]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])


In [27]:
# Indexing, returns a tensor
print(x[1,1])

tensor(4)


In [28]:
# Indexing, returns a value as a Python number
print(x[1,1].item())

4


In [29]:
# slicing
print(x[:2, 1])

tensor([2, 4])


In [35]:
# boolean indexing
print(x[x<5])

tensor([1, 2, 3, 4])


In [36]:
# Transpose array: x.t() or x.T
print(x.t())

tensor([[1, 3, 5, 7],
        [2, 4, 6, 8]])


In [37]:
# chage shape: usually view() is preferred over reshape()
print(x.view((2,4)))

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])


In [38]:
# combining tensors
y = torch.stack((x, x))
print(y)

tensor([[[1, 2],
         [3, 4],
         [5, 6],
         [7, 8]],

        [[1, 2],
         [3, 4],
         [5, 6],
         [7, 8]]])


### Tensor Operation for Mathematics

#### Automatic Differentiation (Autograd)

We define a function, $f=\text{sum}(x^2)$, where $x$ is a matrix of variables. If we want to find $df/dx$ for each variable in the matrix, we need to set requires_grad = True flag for the tensor $x$.

In [39]:
x = torch.tensor([[1,2,3], [4,5,6]],
                 dtype=torch.float, requires_grad=True)
print(x)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)


In [40]:
f = x.pow(2).sum()
print(f)

tensor(91., grad_fn=<SumBackward0>)


In [41]:
f.backward()
print(x.grad)   # df/dx = 2x

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])
