In [52]:
import torch as t
torch.__version__

'2.0.1'

In [2]:
s = t.tensor(7) # s for scalar
s

tensor(7)

In [3]:
s.ndim

0

In [4]:
# to get the number with in a tensor, but only works with single element tensor
s.item()

7

In [49]:
v = t.tensor([7,7]) # v for vector
v

tensor([7, 7])

In [7]:
v.ndim

1

In [8]:
v.shape

torch.Size([2])

In [10]:
M = t.tensor([[7,8],
                     [9,10]])
M # M for matrix
# notice convention is lowercase for scalars & vectors
#   uppercase for matrices & tensors

tensor([[ 7,  8],
        [ 9, 10]])

In [11]:
M.ndim

2

In [12]:
M.shape

torch.Size([2, 2])

In [13]:
T = t.tensor([[[1,2,3],
                       [3,6,9],
                       [2,4,5]]])
T # T for tensor

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [14]:
T.ndim

3

In [15]:
# notice dimensions go outer to inner
T.shape
# so the 1 is a dummy dimension since it's not actually "used"

torch.Size([1, 3, 3])

In [17]:
R = t.tensor([[[1,2,3],
                       [3,6,9],
                       [2,4,5]],
                     [[1,2,3],
                       [4,5,6],
                       [7,8,9]],
                     [[1,2,4],
                       [8,16,32],
                       [64,128,256]]])
R # R for rubik's cube since this one actually takes advantage of all dimensions

tensor([[[  1,   2,   3],
         [  3,   6,   9],
         [  2,   4,   5]],

        [[  1,   2,   3],
         [  4,   5,   6],
         [  7,   8,   9]],

        [[  1,   2,   4],
         [  8,  16,  32],
         [ 64, 128, 256]]])

In [18]:
R.ndim

3

In [19]:
R.shape
# so this one actually takes full advantage of each dimension

torch.Size([3, 3, 3])

In [20]:
# random tensors
rand_T = t.rand(size=(3,4))
rand_T, rand_T.dtype

(tensor([[0.2022, 0.5528, 0.2419, 0.6681],
         [0.0777, 0.0615, 0.0688, 0.8312],
         [0.5906, 0.6101, 0.7064, 0.5748]]),
 torch.float32)

In [21]:
# zeros
Z = t.zeros(size=(2,3))
Z, Z.dtype

(tensor([[0., 0., 0.],
         [0., 0., 0.]]),
 torch.float32)

In [22]:
# ones
O = t.ones(size=(4,3))
O, O.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

In [23]:
# range
Zto10 = t.arange(0,10,1) # or specify (start=0,end=10,step=1)
Zto10

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
# create a tensor of zeros of the same shape as a pre-existing tensor
ten_zeros = t.zeros_like(Zto10)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [25]:
f32T = t.tensor([3.,6.,9.],
                         dtype=None,# the default "None" is actually float32
                         device=None,# default tensor type, specify GPU
                         requires_grad=False)# if true, operations performed on the tensor are recorded
f32T.shape, f32T.dtype, f32T.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [28]:
# putting it on my macbook's neural engine
x = t.ones(5,device="mps")

In [29]:
# now any operation happens on the GPU
y = x**2

In [34]:
x.device, y.device

(device(type='mps', index=0), device(type='mps', index=0))

In [32]:
# how to move a model over to mps
#model = YourFavoriteModel()
#model.to("mps")

In [38]:
# basic operations
test = t.tensor([1,2,3])
test_plus = test+10
test_mult = test*10
test, test_plus, test_mult

(tensor([1, 2, 3]), tensor([11, 12, 13]), tensor([10, 20, 30]))

In [39]:
# you can also use torch functions
test_plus = t.add(test, 10)
test_mult = t.multiply(test, 10)
test_plus, test_mult

(tensor([11, 12, 13]), tensor([10, 20, 30]))

In [40]:
# element-wise multiplication
test*test

tensor([1, 4, 9])

In [41]:
# matrix multiplication (in this case it's just a dot product obvi)
test @ test

tensor(14)

In [59]:
# the torch funciton alternative
tensor = t.rand((3,3))
tensor, t.matmul(tensor,tensor), t.mm(tensor,tensor) # mm is shorthand

(tensor([[0.4138, 0.3141, 0.7638],
         [0.3819, 0.4269, 0.7309],
         [0.1975, 0.7735, 0.0335]]),
 tensor([[0.4420, 0.8549, 0.5712],
         [0.4654, 0.8676, 0.6282],
         [0.3837, 0.4182, 0.7174]]),
 tensor([[0.4420, 0.8549, 0.5712],
         [0.4654, 0.8676, 0.6282],
         [0.3837, 0.4182, 0.7174]]))

In [None]:
tensor

In [45]:
%%time
# If you want to time something this is useful
# purposely slow code
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: user 606 µs, sys: 1.58 ms, total: 2.18 ms
Wall time: 1.45 ms


tensor([[ 1,  4,  9],
        [ 9, 36, 81],
        [ 4, 16, 25]])

In [47]:
%%time
# faster
t.matmul(tensor,tensor)

CPU times: user 280 µs, sys: 454 µs, total: 734 µs
Wall time: 475 µs


tensor([[[ 13,  26,  36],
         [ 39,  78, 108],
         [ 24,  48,  67]]])

In [57]:
# transpose
skinny = t.rand((2,5))
skinny, skinny.T

(tensor([[0.4765, 0.9615, 0.7560, 0.3669, 0.5726],
         [0.5232, 0.9302, 0.2960, 0.7707, 0.4632]]),
 tensor([[0.4765, 0.5232],
         [0.9615, 0.9302],
         [0.7560, 0.2960],
         [0.3669, 0.7707],
         [0.5726, 0.4632]]))

In [65]:
# to make it reproducible
t.manual_seed(42)

x = t.tensor([[1, 2],
                    [3, 4],
                    [5, 6]], dtype=t.float32)

linear = t.nn.Linear(in_features=2,# set the inner dimension of input so you can multiply
                        out_features=6)# describes outer value

output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[ 2.2595,  1.2380, -0.1997,  0.6665, -0.7400,  0.7964,  0.4267,  0.6104],
        [ 4.5145,  2.2058, -0.2241,  0.8086, -0.5308,  2.2903,  1.6631,  1.0926],
        [ 6.7696,  3.1736, -0.2486,  0.9506, -0.3216,  3.7842,  2.8995,  1.5748]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 8])


In [68]:
x = t.arange(0,100,10)
x.min(), x.max(), x.type(t.float32).mean(), x.sum()
# notice mean is kinda weird in that you need to specify type

(tensor(0), tensor(90), tensor(45.), tensor(450))

In [69]:
# different ways to do things
t.min(x), t.max(x), t.mean(x.type(t.float32)), t.sum(x)

(tensor(0), tensor(90), tensor(45.), tensor(450))

In [73]:
# to find the index where the max or min value occurs
x.argmin(), x[x.argmin()], x.argmax(), x[x.argmax()]

(tensor(0), tensor(0), tensor(9), tensor(90))

In [84]:
# reshaping, adding dimension, stacking, etc
x = t.arange(1.,8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [78]:
# addign extra dimension
x_reshaped = x.reshape(1,7)
x_reshaped, x_reshaped.shape
# so i suppose either it does this intelligently or the first wraps the second by default

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [85]:
# we can alternatively change the view which keeps same data
# see more https://stackoverflow.com/a/54507446/7900723
z = x.view(1,7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [86]:
# this new view tensor will always *share* its data with the original
x = x+1
z, z.shape
# ok not sure why this isn't working

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [87]:
# Changing z changes x?
z[:, 0] = 5
z, x
# very weird so i can't figure out which direction mutibility is supposed to go in

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([2., 3., 4., 5., 6., 7., 8.]))

In [91]:
# stack tensors on top of each other
x_stacked_v = t.stack([x,x,x,x], dim=0) # pay attention to dimension
x_stacked_h = t.stack([x,x,x,x], dim=1)
x_stacked_v, x_stacked_h

(tensor([[2., 3., 4., 5., 6., 7., 8.],
         [2., 3., 4., 5., 6., 7., 8.],
         [2., 3., 4., 5., 6., 7., 8.],
         [2., 3., 4., 5., 6., 7., 8.]]),
 tensor([[2., 2., 2., 2.],
         [3., 3., 3., 3.],
         [4., 4., 4., 4.],
         [5., 5., 5., 5.],
         [6., 6., 6., 6.],
         [7., 7., 7., 7.],
         [8., 8., 8., 8.]]))

In [94]:
# to remove any single dummy dimension
a = t.rand((1,3,3))
a, a.shape, a.squeeze(), a.squeeze().shape

(tensor([[[0.5832, 0.3376, 0.8090],
          [0.5779, 0.9040, 0.5547],
          [0.3423, 0.6343, 0.3644]]]),
 torch.Size([1, 3, 3]),
 tensor([[0.5832, 0.3376, 0.8090],
         [0.5779, 0.9040, 0.5547],
         [0.3423, 0.6343, 0.3644]]),
 torch.Size([3, 3]))

In [95]:
# to add a dummy dimension at a specific index
b = a.squeeze()

b, b.unsqueeze(0), b.unsqueeze(1), b.unsqueeze(2)

(tensor([[0.5832, 0.3376, 0.8090],
         [0.5779, 0.9040, 0.5547],
         [0.3423, 0.6343, 0.3644]]),
 tensor([[[0.5832, 0.3376, 0.8090],
          [0.5779, 0.9040, 0.5547],
          [0.3423, 0.6343, 0.3644]]]),
 tensor([[[0.5832, 0.3376, 0.8090]],
 
         [[0.5779, 0.9040, 0.5547]],
 
         [[0.3423, 0.6343, 0.3644]]]),
 tensor([[[0.5832],
          [0.3376],
          [0.8090]],
 
         [[0.5779],
          [0.9040],
          [0.5547]],
 
         [[0.3423],
          [0.6343],
          [0.3644]]]))

In [106]:
# you can rearrange order of axes values
x = t.rand((2,3,4))

# sends 0->1, 1->2, 2->0
x_permuted = x.permute((2,0,1)) 

x.shape, x_permuted.shape

(torch.Size([2, 3, 4]), torch.Size([4, 2, 3]))

In [107]:
x, x_permuted
# .5310 should be in
# first entry of second dimension
# first entry of third dimension
# second entry of first dimension

(tensor([[[0.8879, 0.5310, 0.0187, 0.6942],
          [0.3533, 0.0934, 0.5302, 0.1672],
          [0.6196, 0.1803, 0.6240, 0.0419]],
 
         [[0.7111, 0.9300, 0.5697, 0.6051],
          [0.9739, 0.7306, 0.6755, 0.1768],
          [0.9718, 0.2475, 0.1994, 0.5296]]]),
 tensor([[[0.8879, 0.3533, 0.6196],
          [0.7111, 0.9739, 0.9718]],
 
         [[0.5310, 0.0934, 0.1803],
          [0.9300, 0.7306, 0.2475]],
 
         [[0.0187, 0.5302, 0.6240],
          [0.5697, 0.6755, 0.1994]],
 
         [[0.6942, 0.1672, 0.0419],
          [0.6051, 0.1768, 0.5296]]]))

In [103]:
# let's try and actually understand this
z = t.rand((2,3))
z_permuted = z.permute((1,0))

z.shape, z_permuted.shape, z, z_permuted

(torch.Size([2, 3]),
 torch.Size([3, 2]),
 tensor([[0.4669, 0.1985, 0.4316],
         [0.0238, 0.3256, 0.5471]]),
 tensor([[0.4669, 0.0238],
         [0.1985, 0.3256],
         [0.4316, 0.5471]]))

^ I guess the concept of transpose really only makes sense in matrices, and permutation is the more general phenomenon in higher dimensions.

so the "top left" corner and "bottom right" corner always stay the same

here it is I've got it

In [108]:
x = t.tensor([[[1,2,3,4],
     [5,6,7,8],
     [9,10,11,12]],
    [[13,14,15,16],
    [17,18,19,20],
    [21,22,23,24]]])

In [109]:
x_permuted = x.permute((1,2,0)) 

So take 10 for example.
10 is in 
- the first entry of the 0th dimension
- the third entry of the 1st dimension
- the second entry of the 2nd dimension

and we just mapped
- 0 -> 2
- 1 -> 0
- 2 -> 1

so 10 should be in
- the first entry of the 2nd dimension
- the third entry of the 0th dimension
- the second entry of the 1st dimension

In [110]:
x_permuted

tensor([[[ 1, 13],
         [ 2, 14],
         [ 3, 15],
         [ 4, 16]],

        [[ 5, 17],
         [ 6, 18],
         [ 7, 19],
         [ 8, 20]],

        [[ 9, 21],
         [10, 22],
         [11, 23],
         [12, 24]]])

badabing badaboom

In [111]:
x = t.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

so i presume this reshape thing works off the mapping i just figured out basically indexing everything and using that to figure out where they should go. definitely goes outer dimension -> inner dimension

In [112]:
# indexing
print(f"First square bracket:\n{x[0]}") 
print(f"Second square bracket: {x[0][0]}") 
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [113]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]

tensor([[1, 2, 3]])

In [114]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [115]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [116]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :] # same as x[0][0]

tensor([1, 2, 3])

In [118]:
# importing from numpy
import numpy as np
array = np.arange(1.,8.)
tensor = t.from_numpy(array)
array, tensor
# numpy arrays turned to float64 by default, 
# but float32 is more common for torch

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [120]:
# and the reverse direction
tensor = t.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [121]:
# taking advantage of seeds for reproducibility
import random as r

seed = 69
t.manual_seed(seed)
A = t.rand(3,4)

# you need to reset the seed EVERY time a new tensor is called
t.manual_seed(seed)
B = t.rand(3,4)

A == B

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [125]:
# checking for GPU
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")
    
# output should show "tensor([1.], device='mps:0')"

tensor([1.], device='mps:0')


In [130]:
# putting tensor on gpu
tensor = t.tensor([1,2,3])
print(tensor.device)

tensor_on_gpu = tensor.to("mps")
print(tensor_on_gpu.device)

cpu
mps:0


if a tensor is on GPU you cant send to numpy so u have to change it back

In [131]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

^ not sure if that's actually a thing for mac given that apple silicon has shared memory but i'm just following the guide